In [28]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver

import string
import datetime
import time
import unicodedata

In [6]:
# global variables
%store -r playersToIds
%store -r idsToPlayers
%store -r datesDict
%store -r playoffDates
%store -r statsNBAMap

In [7]:
def setupHeadlessChrome(hideImages=True):
    options = webdriver.ChromeOptions() # don't open URL window
    options.add_argument('--headless')
    if hideImages:
        prefs = {"profile.managed_default_content_settings.images": 2, 'disk-cache-size': 4096 }
        options.add_experimental_option("prefs", prefs)
    return options

def chromeOptions():
    options = webdriver.ChromeOptions() # don't open URL window
    options.add_argument("--incognito")
    options.add_argument('--disable-gpu')
    options.add_argument("--window-size=0,0")
    options.add_argument("--window-position=100000,100000")
    options.add_argument("--disable-popup-blocking")
    return options

In [32]:
def getInjuryHistory(playerName):
    playerName = nameExceptions(playerName)
    space = playerName.index(' ')
    firstName = playerName[:space]
    lastName = playerName[space+1:]
    url = "https://www.prosportstransactions.com/basketball/Search/SearchResults.php?Player={}+{}" \
        + "&Team=&BeginDate=&EndDate=&ILChkBx=yes&" \
        + "InjuriesChkBx=yes&Submit=Search"
    url = url.format(firstName, lastName)
    injuryTimeline = []

    html = urlopen(url)
    soup = BeautifulSoup(html, features="lxml")
    injuries = soup.findAll('tr')
    
    currentTimes = []
    injuryTimeline, currentTimes = addToInjuryTimeline(injuries, currentTimes, injuryTimeline)
            
    links = soup.findAll('p')[3].findAll('a')
    for link in links:
        nextSearch = link['href']
#         print(nextSearch)
        argIndex = nextSearch.rindex('&')
        nextUrl = url + nextSearch[argIndex:]
        
        nextHtml = urlopen(nextUrl)
        nextSoup = BeautifulSoup(nextHtml, features="lxml")
        nextInjuries = nextSoup.findAll('tr')
        
        injuryTimeline, currentTimes = addToInjuryTimeline(nextInjuries, currentTimes, injuryTimeline)
        
    if currentTimes != []:
        currentTimes.append('present')
        currentTimes.append('present')
        currentTimes.append('present')
        currentTimes.append('present')
        injuryTimeline.append(currentTimes)
        
    return injuryTimeline
    
def addToInjuryTimeline(injuries, currentTimes, injuryTimeline):
    for i in range(1, len(injuries)-1):
        injury = injuries[i]
        tds = injury.findAll('td')
        date = tds[0].text
        
        team = tds[1].text
        team = team.lstrip()
        team = team.rstrip()
        
        acquired = tds[2].text
        relinquished = tds[3].text
        description = tds[4].text

        if acquired == ' ':
            if currentTimes == []:
                currentTimes.append(date)
            else:
                currentTimes = appendCurrentTimes(currentTimes[0], currentTimes, team)
                injuryTimeline.append(currentTimes)
                currentTimes = [date]
            if 'out for season' in description:
                currentTimes = appendCurrentTimes('OFS', currentTimes, team)
                injuryTimeline.append(currentTimes)
                currentTimes = []
        else:
            if (currentTimes == []): # mistake in website, acquired from IL twice in a row
                continue
            currentTimes = appendCurrentTimes(date, currentTimes, team)
            injuryTimeline.append(currentTimes)
            currentTimes = []
            
    return injuryTimeline, currentTimes

def appendCurrentTimes(endTime, currentTimes, team):
    if endTime == currentTimes[0]:
        endDate = datetime.datetime.strptime(endTime, '%Y-%m-%d')
        endDate = endDate + datetime.timedelta(days=1)
        endTime = endDate.strftime('%Y-%m-%d')
    currentTimes.append(endTime)
    currentTimes.append(calculateInjuryDays(currentTimes))
    currentTimes.append(team)
    year = getInSeasonYear(currentTimes)[0]
    currentTimes.append(year)
    return currentTimes

def calculateInjuryDays(times):
    if times[1] == 'present' or times[1] == 'OFS':
        return -1
    startDate = datetime.datetime.strptime(times[0], '%Y-%m-%d')
    endDate = datetime.datetime.strptime(times[1], '%Y-%m-%d')
    return (endDate - startDate).days

############### Find how teams respond when a player X is out injured - net rating, and win pct of opponents played ####
def findTeamResponse(timeline):
    allRatings = {}
    count = 0
    for injury in timeline:
        if injury[1] == "OFS" or injury[1] == 'present' or injury[0] == "OOS":
            continue
        elif injury[2] > 5: #fix length
            startDate = datetime.datetime.strptime(injury[0], '%Y-%m-%d')
            endDate = datetime.datetime.strptime(injury[1], '%Y-%m-%d')
            
            count, allRatings = responseHelper(injury, injury[4], startDate, endDate, count, allRatings)
            
    if count == 0:
        return 0
    
    weightedAvg = 0
    for y in allRatings:
        for rating in allRatings[y]:
            weightedAvg += rating[0] * (rating[4]/count)
    return weightedAvg, allRatings
            
def findActiveResponse(timeline):
    allRatings = {}
    count = 0
    for year, spans in timeline.items():
        for span in spans:
            startDate = span[0]
            endDate = span[1]
            count, allRatings = responseHelper(span, year, startDate, endDate, count, allRatings)
            
    if count == 0:
        return 0
    
    weightedAvg = 0
    for y in allRatings:
        for rating in allRatings[y]:
            weightedAvg += rating[0] * (rating[4]/count)
    return weightedAvg, allRatings

def responseHelper(span, year, startDate, endDate, count, allRatings):
    nbaTeamId = statsNBAMap[span[3]]

    dash = year.index('-')
    yearDate = year[:dash]
    if int(yearDate) < 1996: # stats.nba limitation
        return count, allRatings
    
    seasonType = "Regular%20Season"
    if year != "2019-20":
        playoffs = playoffDates[year]
        playoffStart = datetime.datetime.strptime(playoffs[0], '%a, %b %d, %Y')
        playoffEnd = datetime.datetime.strptime(playoffs[1], '%a, %b %d, %Y')
        if startDate >= playoffStart and startDate <= playoffEnd:
            seasonType = "Playoffs"

    url = "https://stats.nba.com/team/{}" \
        + "/advanced/?Season={}&SeasonType={}" \
        + "&DateFrom={}%2F{}%2F{}&DateTo={}%2F{}%2F{}"

    url = url.format(nbaTeamId, year, seasonType, startDate.month, startDate.day, startDate.year,
                    endDate.month, endDate.day, endDate.year)

    absnet = getAbsentNetRtg(url)

    #############
    oppUrl = url + "&Split=opp"
    opponents, gamesPlayed = getAbsentOpponents(oppUrl)
    count += gamesPlayed

    #########
    standingsUrl = "https://stats.nba.com/teams/traditional/" \
        + "?sort=W_PCT&dir=-1&Season={}&SeasonType=Regular%20Season" \
        + "&DateTo={}%2F{}%2F{}"
    standingsUrl = standingsUrl.format(year, endDate.month, endDate.day, endDate.year)

    oppPercentages = getOppPercentages(standingsUrl, opponents)

    if year in allRatings:
        allRatings[year].append([absnet, oppPercentages, startDate, endDate, gamesPlayed])
    else:
        allRatings[year] = [[absnet, oppPercentages, startDate, endDate, gamesPlayed]]
        
    return count, allRatings


def getInSeasonYear(injury):
    startDate = datetime.datetime.strptime(injury[0], '%Y-%m-%d')

    year = ""
    potentialYears = getPotentialTradeYears(startDate.year)

    firstStart = datetime.datetime.strptime(datesDict[potentialYears[0]][0], '%a, %b %d, %Y')
    firstEnd = datetime.datetime.strptime(datesDict[potentialYears[0]][1], '%a, %b %d, %Y')

    secondStart = datetime.datetime.strptime(datesDict[potentialYears[1]][0], '%a, %b %d, %Y')
    secondEnd = datetime.datetime.strptime(datesDict[potentialYears[1]][1], '%a, %b %d, %Y')
    
    endDate = datetime.datetime.strptime(injury[1], '%Y-%m-%d') if injury[1] != "OFS" else "OFS"

    if startDate >= firstStart and startDate <= firstEnd:
        year = potentialYears[0]
    elif startDate >= secondStart and startDate <= secondEnd:
        year = potentialYears[1]
    elif endDate != "OFS" and endDate >= secondStart and endDate <= secondEnd:
        year = potentialYears[1]
    
    if endDate == "OFS":
        endDate = datetime.datetime.strptime(datesDict[year][1], '%a, %b %d, %Y')
    if year == "":
        year = "OOS"
    return year, startDate, endDate #if year is blank, then out of season
            
def getAbsentNetRtg(url):

    options = chromeOptions()
    driver = webdriver.Chrome(options=options) # used to view generated JavaScript
    driver.get(url)
    window_first = driver.window_handles[0]

    html = driver.page_source
    table = driver.find_elements_by_tag_name('tbody')

    while len(table) == 0:
        html = driver.page_source
        table = driver.find_elements_by_tag_name('tbody')
        
    a = table[0].find_elements_by_tag_name('td')
    netrtg = a[5].get_attribute('innerHTML')
#     print("Net Rating: ", netrtg)
    driver.close()
    return float(netrtg)

def getAbsentOpponents(oppUrl):
    opponents = []
    
    options = chromeOptions()
    
    driver = webdriver.Chrome(options=options) # used to view generated JavaScript
    driver.get(oppUrl)
    html = driver.page_source
    table = driver.find_elements_by_tag_name('tbody')

    while len(table) == 0:
        table = driver.find_elements_by_tag_name('tbody')
    oppTeams = table[-1].find_elements_by_tag_name('td')
    for t in oppTeams:
        opponents.append(t.text)
    
    gamesPlayed = table[0].find_elements_by_tag_name('td')[1].text
    
    driver.close()
    return opponents, int(gamesPlayed)
    
def getOppPercentages(standingsUrl, opponents):
    options = chromeOptions()
    
    driver = webdriver.Chrome(options=options) # used to view generated JavaScript
    driver.get(standingsUrl)
    html = driver.page_source
    table = driver.find_elements_by_tag_name('tbody')

    while len(table) == 0:
        table = driver.find_elements_by_tag_name('tbody')
    teams = table[0].find_elements_by_tag_name('tr')
    winPct = {}
    for team in teams:
        stats = team.find_elements_by_tag_name('td')
        name = stats[1].find_element_by_tag_name('a').text
        pct = stats[5].text
        winPct[name] = pct

    oppPercentages = []
    for opponent in opponents:
        oppPercentages.append([opponent, winPct[opponent]])
#     print(oppPercentages)
#     print('\n')
    driver.close()
    return oppPercentages
            
def getPotentialTradeYears(year):
    options = []
    options.append(str(year-1) + '-' + str(year)[2:])
    options.append(str(year) + '-' + str((year+1))[2:])
    if options[1] == "2020-21":
        options[1] = "2019-20"
    return options

def getPlayingSpans(injuryTimeline):
    activeDict = {}

    for injury in injuryTimeline:
        team = injury[3]
        year = injury[4]
        if year == "OOS" or year == "present" or injury[1] == "OFS":
            continue
        startDate = datetime.datetime.strptime(injury[0], '%Y-%m-%d')
        endDate = datetime.datetime.strptime(injury[1], '%Y-%m-%d')

        if year not in activeDict:
            yearStart = datetime.datetime.strptime(datesDict[year][0], '%a, %b %d, %Y')
            activeDict[year] = [[yearStart, startDate, endDate, team]]
        else:
            times = activeDict[year]
            times.append([times[-1][2], startDate, endDate, team])
            activeDict[year] = times

    playingDict = {}

    for k, v in activeDict.items():
        newSpan = []
        for span in v:
            if (span[1] - span[0]).days > 0:
                newSpan.append([span[0], span[1], (span[1] - span[0]).days, span[3]])
        playingDict[k] = newSpan

    return playingDict

def nameExceptions(playerName):
#     if playerName == "Ersan İlyasova":
#         return "Ersan Ilyasova"
    
#     else:
#         return playerName
    return ''.join(c for c in unicodedata.normalize('NFD', playerName)
                   if unicodedata.category(c) != 'Mn')

In [9]:
# injuryTimeline = getInjuryHistory("Kyle Kuzma")
# injuryTimeline

In [10]:
# playingSpans = getPlayingSpans(injuryTimeline)
# playingSpans

In [11]:
# findTeamResponse(injuryTimeline)

In [12]:
# findActiveResponse(playingSpans)

In [13]:
def collectInjuryData(playerName):
    injuryData = {}
    
    injuryTimeline = getInjuryHistory(playerName)
    playingSpans = getPlayingSpans(injuryTimeline)
    
    injuryData['InjResp'] = findTeamResponse(injuryTimeline)
    injuryData['ActResp'] = findActiveResponse(playingSpans)
    return injuryData

In [30]:
# collectInjuryData("Álex Abrines")