In [1]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver

import string
import datetime
import time

In [2]:
# global variables
%store -r playersToIds
%store -r idsToPlayers
%store -r datesDict
%store -r statsNBAMap

In [3]:
def setupHeadlessChrome(hideImages=True):
    options = webdriver.ChromeOptions() # don't open URL window
    options.add_argument('--headless')
    if hideImages:
        prefs = {"profile.managed_default_content_settings.images": 2, 'disk-cache-size': 4096 }
        options.add_experimental_option("prefs", prefs)
    return options

In [96]:
def getInjuryHistory(playerName):
    space = playerName.index(' ')
    firstName = playerName[:space]
    lastName = playerName[space+1:]
    url = "https://www.prosportstransactions.com/basketball/Search/SearchResults.php?Player={}+{}" \
        + "&Team=&BeginDate=&EndDate=&ILChkBx=yes&" \
        + "InjuriesChkBx=yes&Submit=Search"
    url = url.format(firstName, lastName)
    injuryTimeline = []

    html = urlopen(url)
    soup = BeautifulSoup(html)
    injuries = soup.findAll('tr')
    
    currentTimes = []
    injuryTimeline, currentTimes = addToInjuryTimeline(injuries, currentTimes, injuryTimeline)
            
    links = soup.findAll('p')[3].findAll('a')
    for link in links:
        nextSearch = link['href']
        argIndex = nextSearch.rindex('&')
        nextUrl = url + nextSearch[argIndex:]
        
        nextHtml = urlopen(nextUrl)
        nextSoup = BeautifulSoup(nextHtml)
        nextInjuries = nextSoup.findAll('tr')
        
        injuryTimeline, currentTimes = addToInjuryTimeline(nextInjuries, currentTimes, injuryTimeline)
        
    if currentTimes != []:
        currentTimes.append('present')
        injuryTimeline.append(currentTimes)
        
    return injuryTimeline
    
def addToInjuryTimeline(injuries, currentTimes, injuryTimeline):
    for i in range(1, len(injuries)-1):
        injury = injuries[i]
        tds = injury.findAll('td')
        date = tds[0].text
        
        team = tds[1].text
        team = team.lstrip()
        team = team.rstrip()
        
        acquired = tds[2].text
        relinquished = tds[3].text
        description = tds[4].text

        if acquired == ' ':
            if currentTimes == []:
                currentTimes.append(date)
            else:
                currentTimes = appendCurrentTimes(currentTimes[0], currentTimes, team)
                injuryTimeline.append(currentTimes)
                currentTimes = [date]
            if 'out for season' in description:
                currentTimes = appendCurrentTimes('OFS', currentTimes, team)
                injuryTimeline.append(currentTimes)
                currentTimes = []
        else:
            if (currentTimes == []): # mistake in website, acquired from IL twice in a row
                continue
            currentTimes = appendCurrentTimes(date, currentTimes, team)
            injuryTimeline.append(currentTimes)
            currentTimes = []
            
    return injuryTimeline, currentTimes

def appendCurrentTimes(endTime, currentTimes, team):
    if endTime == currentTimes[0]:
        endDate = datetime.datetime.strptime(endTime, '%Y-%m-%d')
        endDate = endDate + datetime.timedelta(days=1)
        endTime = endDate.strftime('%Y-%m-%d')
    currentTimes.append(endTime)
    currentTimes.append(calculateInjuryDays(currentTimes))
    currentTimes.append(team)
    year = getInSeasonYear(currentTimes)[0]
    currentTimes.append(year)
    return currentTimes

def calculateInjuryDays(times):
    if times[1] == 'present' or times[1] == 'OFS':
        return -1
    startDate = datetime.datetime.strptime(times[0], '%Y-%m-%d')
    endDate = datetime.datetime.strptime(times[1], '%Y-%m-%d')
    return (endDate - startDate).days

############### Find how teams respond when a player X is out injured - net rating, and win pct of opponents played ####
def findTeamResponse(timeline):
    for injury in timeline:
        if injury[1] == "OFS" or injury[1] == 'present' or injury[0] == "OOS":
            continue
        elif injury[2] > 5:
            nbaTeamId = statsNBAMap[injury[3]]
            
            year = injury[4]
            startDate = datetime.datetime.strptime(injury[0], '%Y-%m-%d')
            endDate = datetime.datetime.strptime(injury[1], '%Y-%m-%d')
            
            #### NEXT STEPS: UPDATE FOR PLAYOFFS, USE STEPHEN CURRY AS EXAMPLE
            
            url = "https://stats.nba.com/team/{}" \
                + "/advanced/?Season={}&SeasonType=Regular%20" \
                + "Season&DateFrom={}%2F{}%2F{}&DateTo={}%2F{}%2F{}"
            
            url = url.format(nbaTeamId, year, startDate.month, startDate.day, startDate.year,
                            endDate.month, endDate.day, endDate.year)
            
            netrtg = getAbsentNetRtg(url)
            
            #############
            oppUrl = url + "&Split=opp"
            opponents = getAbsentOpponents(oppUrl)
            
            #########
            standingsUrl = "https://stats.nba.com/teams/traditional/" \
                + "?sort=W_PCT&dir=-1&Season={}&SeasonType=Regular%20Season" \
                + "&DateTo={}%2F{}%2F{}"
            standingsUrl = standingsUrl.format(year, endDate.month, endDate.day, endDate.year)
            
            oppPercentages = getOppPercentages(standingsUrl, opponents)
            
def findActiveResponse(timeline):
    for year, spans in timeline.items():
        for span in spans:
            nbaTeamId = statsNBAMap[span[3]]

            startDate = span[0]
            endDate = span[1]

            #### NEXT STEPS: UPDATE FOR PLAYOFFS, USE STEPHEN CURRY

            url = "https://stats.nba.com/team/{}" \
                + "/advanced/?Season={}&SeasonType=Regular%20" \
                + "Season&DateFrom={}%2F{}%2F{}&DateTo={}%2F{}%2F{}"

            url = url.format(nbaTeamId, year, startDate.month, startDate.day, startDate.year,
                            endDate.month, endDate.day, endDate.year)

            netrtg = getAbsentNetRtg(url)

            #############
            oppUrl = url + "&Split=opp"
            opponents = getAbsentOpponents(oppUrl)

            #########
            standingsUrl = "https://stats.nba.com/teams/traditional/" \
                + "?sort=W_PCT&dir=-1&Season={}&SeasonType=Regular%20Season" \
                + "&DateTo={}%2F{}%2F{}"
            standingsUrl = standingsUrl.format(year, endDate.month, endDate.day, endDate.year)

            oppPercentages = getOppPercentages(standingsUrl, opponents)

def getInSeasonYear(injury):
    startDate = datetime.datetime.strptime(injury[0], '%Y-%m-%d')

    year = ""
    potentialYears = getPotentialTradeYears(startDate.year)

    firstStart = datetime.datetime.strptime(datesDict[potentialYears[0]][0], '%a, %b %d, %Y')
    firstEnd = datetime.datetime.strptime(datesDict[potentialYears[0]][1], '%a, %b %d, %Y')

    secondStart = datetime.datetime.strptime(datesDict[potentialYears[1]][0], '%a, %b %d, %Y')
    secondEnd = datetime.datetime.strptime(datesDict[potentialYears[1]][1], '%a, %b %d, %Y')
    
    endDate = datetime.datetime.strptime(injury[1], '%Y-%m-%d') if injury[1] != "OFS" else "OFS"

    if startDate >= firstStart and startDate <= firstEnd:
        year = potentialYears[0]
    elif startDate >= secondStart and startDate <= secondEnd:
        year = potentialYears[1]
    elif endDate != "OFS" and endDate >= secondStart and endDate <= secondEnd:
        year = potentialYears[1]
    
    if endDate == "OFS":
        endDate = datetime.datetime.strptime(datesDict[year][1], '%a, %b %d, %Y')
    if year == "":
        year = "OOS"
    return year, startDate, endDate #if year is blank, then out of season
            
def getAbsentNetRtg(url):
    print(url)

    driver = webdriver.Chrome() # used to view generated JavaScript
    driver.get(url)

    html = driver.page_source

    table = driver.find_elements_by_tag_name('tbody')
    print(len(table))
    while len(table) == 0:
        table = driver.find_elements_by_tag_name('tbody')
    a = table[0].find_elements_by_tag_name('td')
    netrtg = a[5].get_attribute('innerHTML')
    print("Net Rating: ", netrtg)
    driver.close()
    return netrtg

def getAbsentOpponents(oppUrl):
    opponents = []
    driver = webdriver.Chrome() # used to view generated JavaScript
    driver.get(oppUrl)
    html = driver.page_source
    table = driver.find_elements_by_tag_name('tbody')

    while len(table) == 0:
        table = driver.find_elements_by_tag_name('tbody')
    oppTeams = table[-1].find_elements_by_tag_name('td')
    for t in oppTeams:
        opponents.append(t.text)
    driver.close()
    return opponents
    
def getOppPercentages(standingsUrl, opponents):
    driver = webdriver.Chrome() # used to view generated JavaScript
    driver.get(standingsUrl)
    html = driver.page_source
    table = driver.find_elements_by_tag_name('tbody')

    while len(table) == 0:
        table = driver.find_elements_by_tag_name('tbody')
    teams = table[0].find_elements_by_tag_name('tr')
    winPct = {}
    for team in teams:
        stats = team.find_elements_by_tag_name('td')
        name = stats[1].find_element_by_tag_name('a').text
        pct = stats[5].text
        winPct[name] = pct

    oppPercentages = []
    for opponent in opponents:
        oppPercentages.append([opponent, winPct[opponent]])
    print(oppPercentages)
    print('\n')
    driver.close()
    return oppPercentages
            
def getPotentialTradeYears(year):
    options = []
    options.append(str(year-1) + '-' + str(year)[2:])
    options.append(str(year) + '-' + str((year+1))[2:])
    return options

def getPlayingSpans(injuryTimeline):
    activeDict = {}

    for injury in injuryTimeline:
        team = injury[3]
        year = injury[4]
        if year == "OOS" or year == "present" or injury[1] == "OFS":
            continue
        startDate = datetime.datetime.strptime(injury[0], '%Y-%m-%d')
        endDate = datetime.datetime.strptime(injury[1], '%Y-%m-%d')

        if year not in activeDict:
            yearStart = datetime.datetime.strptime(datesDict[year][0], '%a, %b %d, %Y')
            activeDict[year] = [[yearStart, startDate, endDate, team]]
        else:
            times = activeDict[year]
            times.append([times[-1][2], startDate, endDate, team])
            activeDict[year] = times

    playingDict = {}

    for k, v in activeDict.items():
        newSpan = []
        for span in v:
            if (span[1] - span[0]).days > 0:
                newSpan.append([span[0], span[1], (span[1] - span[0]).days, span[3]])
        playingDict[k] = newSpan

    return playingDict

In [99]:
injuryTimeline = getInjuryHistory("Devin Booker") # works completely for regular-season only player
injuryTimeline

[['2015-11-02', '2015-11-04', 2, 'Suns', '2015-16'],
 ['2016-10-31', '2016-11-02', 2, 'Suns', '2016-17'],
 ['2017-03-19', '2017-03-21', 2, 'Suns', '2016-17'],
 ['2017-03-28', '2017-03-30', 2, 'Suns', '2016-17'],
 ['2017-04-11', '2017-04-12', 1, 'Suns', '2016-17'],
 ['2017-11-26', '2017-11-28', 2, 'Suns', '2017-18'],
 ['2017-12-06', '2017-12-07', 1, 'Suns', '2017-18'],
 ['2017-12-07', '2017-12-26', 19, 'Suns', '2017-18'],
 ['2018-01-29', '2018-01-31', 2, 'Suns', '2017-18'],
 ['2018-02-05', '2018-02-06', 1, 'Suns', '2017-18'],
 ['2018-02-06', '2018-02-14', 8, 'Suns', '2017-18'],
 ['2018-03-10', '2018-03-13', 3, 'Suns', '2017-18'],
 ['2018-03-17', 'OFS', -1, 'Suns', '2017-18'],
 ['2018-03-26', 'OFS', -1, 'Suns', '2017-18'],
 ['2018-10-27', '2018-11-02', 6, 'Suns', '2018-19'],
 ['2018-11-30', '2018-12-02', 2, 'Suns', '2018-19'],
 ['2018-12-03', '2018-12-04', 1, 'Suns', '2018-19'],
 ['2018-12-04', '2018-12-15', 11, 'Suns', '2018-19'],
 ['2019-01-08', '2019-01-15', 7, 'Suns', '2018-19'],
 ['

In [100]:
playingSpans = getPlayingSpans(injuryTimeline)
playingSpans
    

{'2015-16': [[datetime.datetime(2015, 10, 27, 0, 0),
   datetime.datetime(2015, 11, 2, 0, 0),
   6,
   'Suns']],
 '2016-17': [[datetime.datetime(2016, 10, 25, 0, 0),
   datetime.datetime(2016, 10, 31, 0, 0),
   6,
   'Suns'],
  [datetime.datetime(2016, 11, 2, 0, 0),
   datetime.datetime(2017, 3, 19, 0, 0),
   137,
   'Suns'],
  [datetime.datetime(2017, 3, 21, 0, 0),
   datetime.datetime(2017, 3, 28, 0, 0),
   7,
   'Suns'],
  [datetime.datetime(2017, 3, 30, 0, 0),
   datetime.datetime(2017, 4, 11, 0, 0),
   12,
   'Suns']],
 '2017-18': [[datetime.datetime(2017, 10, 17, 0, 0),
   datetime.datetime(2017, 11, 26, 0, 0),
   40,
   'Suns'],
  [datetime.datetime(2017, 11, 28, 0, 0),
   datetime.datetime(2017, 12, 6, 0, 0),
   8,
   'Suns'],
  [datetime.datetime(2017, 12, 26, 0, 0),
   datetime.datetime(2018, 1, 29, 0, 0),
   34,
   'Suns'],
  [datetime.datetime(2018, 1, 31, 0, 0),
   datetime.datetime(2018, 2, 5, 0, 0),
   5,
   'Suns'],
  [datetime.datetime(2018, 2, 14, 0, 0),
   datetime.d

In [101]:
findTeamResponse(injuryTimeline)

https://stats.nba.com/team/1610612756/advanced/?Season=2017-18&SeasonType=Regular%20Season&DateFrom=12%2F7%2F2017&DateTo=12%2F26%2F2017
0
Net Rating:  -2.6
[['Dallas Mavericks', '.286'], ['LA Clippers', '.424'], ['Memphis Grizzlies', '.294'], ['Minnesota Timberwolves', '.618'], ['Sacramento Kings', '.333'], ['San Antonio Spurs', '.686'], ['Toronto Raptors', '.719'], ['Washington Wizards', '.559']]


https://stats.nba.com/team/1610612756/advanced/?Season=2017-18&SeasonType=Regular%20Season&DateFrom=2%2F6%2F2018&DateTo=2%2F14%2F2018
0
Net Rating:  -26.8
[['Denver Nuggets', '.544'], ['Golden State Warriors', '.759'], ['Los Angeles Lakers', '.411'], ['San Antonio Spurs', '.593'], ['Utah Jazz', '.517']]


https://stats.nba.com/team/1610612756/advanced/?Season=2018-19&SeasonType=Regular%20Season&DateFrom=10%2F27%2F2018&DateTo=11%2F2%2F2018
0
Net Rating:  -16.2
[['Memphis Grizzlies', '.714'], ['Oklahoma City Thunder', '.500'], ['San Antonio Spurs', '.714'], ['Toronto Raptors', '.889']]


http

In [102]:
findActiveResponse(playingSpans)

https://stats.nba.com/team/1610612756/advanced/?Season=2015-16&SeasonType=Regular%20Season&DateFrom=10%2F27%2F2015&DateTo=11%2F2%2F2015
0
Net Rating:  1.5
[['Dallas Mavericks', '.667'], ['LA Clippers', '1.000'], ['Portland Trail Blazers', '.500']]


https://stats.nba.com/team/1610612756/advanced/?Season=2016-17&SeasonType=Regular%20Season&DateFrom=10%2F25%2F2016&DateTo=10%2F31%2F2016
0
Net Rating:  -11.2
[['Golden State Warriors', '.667'], ['LA Clippers', '1.000'], ['Oklahoma City Thunder', '1.000'], ['Sacramento Kings', '.500']]


https://stats.nba.com/team/1610612756/advanced/?Season=2016-17&SeasonType=Regular%20Season&DateFrom=11%2F2%2F2016&DateTo=3%2F19%2F2017
0
Net Rating:  -4.4
[['Atlanta Hawks', '.536'], ['Boston Celtics', '.629'], ['Brooklyn Nets', '.188'], ['Charlotte Hornets', '.435'], ['Chicago Bulls', '.471'], ['Cleveland Cavaliers', '.667'], ['Dallas Mavericks', '.435'], ['Denver Nuggets', '.478'], ['Detroit Pistons', '.486'], ['Golden State Warriors', '.797'], ['Houston R