In [1]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver

import string
import datetime

In [115]:
# global variables
%store -r playersToIds
%store -r idsToPlayers
%store -r datesDict

In [294]:
###### WHO HAS A PLAYER PLAYED WITH, AND FOR HOW LONG? #########
def findTotalSharedTimes(playerName):
    playerId = playersToIds[playerName]
    url = getMainPlayerURL(playerId)
    
    html = urlopen(url)
    soup = BeautifulSoup(html)
    
    sharedTimes = {}
    seenYears = set()
    lineups = soup.find(id='inner_nav').findAll('a')
    for row in lineups:
        if 'lineups' in row['href']:
            if row['href'] in seenYears: # repeats in HTML
                break
            seenYears.add(row['href'])
            yearUrl = "https://www.basketball-reference.com" + row['href']
            print("loading JavaScript: ", yearUrl)
            yearTeammates = findSharedCourtTimes(playerId, yearUrl)
            for teammate in yearTeammates:
                timeStr = yearTeammates[teammate]
                colon = timeStr.index(':')
                timeShared = datetime.timedelta(seconds=60*int(timeStr[:colon]) + int(timeStr[colon+1:]))
                if teammate in sharedTimes:
                    sharedTimes[teammate] += timeShared
                else:
                    sharedTimes[teammate] = timeShared
    return sharedTimes

def findSharedCourtTimes(playerId, url):
    options = setupHeadlessChrome()
    
    driver = webdriver.Chrome(options=options) # used to view generated JavaScript
    driver.get(url)
    sharedTimes = {}
    
    html = driver.page_source
    twoMan = driver.find_element_by_id("div_lineups-2-man")
    table = twoMan.find_elements_by_tag_name('tr')
    
    totalRows = len(table)
    for index in range(2, totalRows-1):
        row = table[index]
        elems = row.find_elements_by_tag_name('td')
        players = elems[0].get_attribute('csk')
        minutes = elems[2].get_attribute('csk')

        colon = players.index(":")
        firstTeammate = players[:colon]
        secondTeammate = players[colon+1:]

        teammate = firstTeammate if playerId == secondTeammate else secondTeammate
        sharedTimes[idsToPlayers[teammate]] = minutes
    return sharedTimes

################# Find all dates that a player has been traded, including in-season trades #######################
def getTrades(playerName):
    playerId = playersToIds[playerName]
    url = getMainPlayerURL(playerId)
    
    options = setupHeadlessChrome()
    driver = webdriver.Chrome(options=options) # used to view generated JavaScript
    driver.get(url)
    
    html = driver.page_source
    transactions = driver.find_element_by_id("div_transactions")
    
    transList = transactions.find_elements_by_tag_name('span')
    tradeDates = []
    inSeasonTradeDates = []
    for transaction in transList:
        text = transaction.get_attribute('innerText')
        colon = text.index(':')
        dateStr = text[:colon]
        description = text[colon+1:]
        if 'trade' in description or 'Trade' in description: # not yet strong enough
            
            date = datetime.datetime.strptime(dateStr, '%B %d, %Y')
            tradeDates.append(date)
            potentialYears = getPotentialTradeYears(date.year)
        
            outOfSeason = True
            if potentialYears[0] in datesDict:
                firstDates = datesDict[potentialYears[0]]
                firstStart = datetime.datetime.strptime(firstDates[0], '%a, %b %d, %Y')
                firstEnd = datetime.datetime.strptime(firstDates[1], '%a, %b %d, %Y')
                if date >= firstStart and date <= firstEnd:
                    outOfSeason = False
            
            if potentialYears[1] in datesDict:
                lastDates = datesDict[potentialYears[1]]
                lastStart = datetime.datetime.strptime(lastDates[0], '%a, %b %d, %Y')
                lastEnd = datetime.datetime.strptime(lastDates[1], '%a, %b %d, %Y')
                if date >= lastStart and date <= lastEnd:
                    outOfSeason = False
                    
            if not outOfSeason:
                inSeasonTradeDates.append(date)
            
    return tradeDates, inSeasonTradeDates
    
def getPotentialTradeYears(year):
    options = []
    options.append(str(year-1) + '-' + str(year)[2:])
    options.append(str(year) + '-' + str((year+1))[2:])
    return options

######## FIND ALL TEAMS AN NBA PLAYER HAS PLAYED FOR ##########
def getTeams(player):
    playerId = playersToIds[playerName]
    url = getMainPlayerURL(playerId)
    
    html = urlopen(url)
    soup = BeautifulSoup(html)
    seasons = soup.findAll('tr')
    teams = []
    for i in range(1, len(seasons)):
        season = seasons[i]
        if season.th:
            if season.th.getText() == "Career": # career table starts
                break
        else: # did not play this season
            continue
        teamName = season.findAll('td')[1].getText()
        if teamName != "TOT": # traded in the middle of a season
            teams.append(teamName)
    return teams

######## FIND FRACTION OF POSITIONS PLAYER HAS PLAYED IN CAREER ##########
def findPositionRatios(playerName):
    playerId = playersToIds[playerName]
    url = getMainPlayerURL(playerId)

    options = setupHeadlessChrome()
    driver = webdriver.Chrome(options=options) # used to view generated JavaScript
    driver.get(url)

    html = driver.page_source
    pbp = driver.find_element_by_id("div_pbp")
    table = pbp.find_elements_by_tag_name('tr')

    for i in range(2, len(table)): # first two rows set up table
        season = table[i]
        stats = season.find_elements_by_tag_name('td')
        if stats[0].text == "": # stats[0] is age, so an empty string means "Career" row
            ratio = {}
            for row in stats:
                stat = row.get_attribute('data-stat')
                if 'pct' in stat:
                    ratio[stat[-1]] = float(row.text.strip('%'))/100 if row.text != "" else 0.0
            return ratio
        
################# Find +/- stats for a player's individual playing years, and career ######################
def getPlusMinus(playerName):
    playerId = playersToIds[playerName]
    url = getMainPlayerURL(playerId)
    options = setupHeadlessChrome()
    
    driver = webdriver.Chrome(options=options) # used to view generated JavaScript
    driver.get(url)
    
    html = driver.page_source
    pbp = driver.find_element_by_id("div_pbp")
    table = pbp.find_elements_by_tag_name('tr')

    plusMinusDict = {}
    for i in range(2, len(table)): # first two rows set up table
        season = table[i]
        stats = season.find_elements_by_tag_name('td')
        
        year = season.find_element_by_tag_name('a').text if stats[0].text != "" else "Career"
        onCourt = stats[11].get_attribute('innerText')
        onOff = stats[12].get_attribute('innerText')
        plusMinusDict[year] = [onCourt, onOff]
        
        if stats[0].text == "":
            return plusMinusDict

def findStreaks(playerName):
    playerId = playersToIds[playerName]
    url = getMainPlayerURL(playerId)
    
    html = urlopen(url)
    soup = BeautifulSoup(html)
    seasons = soup.findAll('tr')
    
    ptsStreakDict = {}
    for i in range(1, len(seasons)):
        season = seasons[i]
        if season.th:
            if season.th.getText() == "Career": # career table starts
                break
        else: # did not play this season
            continue
        yearStr = season['id']
        colon = yearStr.index('.')
        year = yearStr[colon+1:]
        teamId = season.findAll('a')[1].text
        teamId = streakFindingTeam(teamId)
        ptsStreaks = getStreakData(playerId, year, teamId, True, 20)
        ptsStreakDict[year] = ptsStreaks
    return ptsStreakDict
        
def getStreakData(playerId, year, teamId, isPts, value):
    url = "https://www.basketball-reference.com/play-index/pstreak.cgi?" \
        + "request=1&year_min={}&year_max={}&team_id={}&" \
        + "season_start=1&season_end=-1&c1stat={}&c1comp=ge&" \
        + "c1val={}"
    url = url.format(year, year, teamId, 'pts' if isPts else 'ast', value)
    print(url)
    
    html = urlopen(url)
    soup = BeautifulSoup(html)
    
    playerStreaks = []
    streaks = soup.findAll('tr')
    for i in range(1, len(streaks)):
        streak = streaks[i]
        print(streak)
        if streak.find('td') is not None:
            nextId = streak.find('td')['data-append-csv']
            if playerId == nextId:
                startDate = streak.findAll('a')[1].text
                endDate = streak.findAll('a')[2].text
                streakLength = streak.findAll('td')[-1].text
                
                playerStreaks.append([startDate, endDate, streakLength])
    return playerStreaks # fix Kyrie

######## Parse Basketball Reference URL for Player's Main Page ##########
def getMainPlayerURL(playerId):
    return "https://www.basketball-reference.com/players/{}/{}.html".format(playerId[0], playerId)


def streakFindingTeam(team):
    if team == 'BRK':
        return 'NJN'
    else:
        return team

def setupHeadlessChrome():
    options = webdriver.ChromeOptions() # don't open URL window
    options.add_argument('--headless')
    prefs = {"profile.managed_default_content_settings.images": 2, 'disk-cache-size': 4096 }
    options.add_experimental_option("prefs", prefs)
    return options

In [290]:
playerName = 'Kyrie Irving'

In [190]:
findTotalSharedTimes(playerName)

loading JavaScript:  https://www.basketball-reference.com/players/r/ridnolu01/lineups/2004
loading JavaScript:  https://www.basketball-reference.com/players/r/ridnolu01/lineups/2005
loading JavaScript:  https://www.basketball-reference.com/players/r/ridnolu01/lineups/2006
loading JavaScript:  https://www.basketball-reference.com/players/r/ridnolu01/lineups/2007
loading JavaScript:  https://www.basketball-reference.com/players/r/ridnolu01/lineups/2008
loading JavaScript:  https://www.basketball-reference.com/players/r/ridnolu01/lineups/2009
loading JavaScript:  https://www.basketball-reference.com/players/r/ridnolu01/lineups/2010
loading JavaScript:  https://www.basketball-reference.com/players/r/ridnolu01/lineups/2011
loading JavaScript:  https://www.basketball-reference.com/players/r/ridnolu01/lineups/2012
loading JavaScript:  https://www.basketball-reference.com/players/r/ridnolu01/lineups/2013
loading JavaScript:  https://www.basketball-reference.com/players/r/ridnolu01/lineups/2014

{'Ronald Murray': datetime.timedelta(days=1, seconds=345),
 'Rashard Lewis': datetime.timedelta(days=4, seconds=18004),
 'Vladimir Radmanović': datetime.timedelta(days=1, seconds=43019),
 'Reggie Evans': datetime.timedelta(days=1, seconds=64378),
 'Ray Allen': datetime.timedelta(days=3, seconds=74263),
 'Ansu Sesay': datetime.timedelta(seconds=19223),
 'Jerome James': datetime.timedelta(days=1, seconds=3255),
 'Calvin Booth': datetime.timedelta(seconds=16939),
 'Vitaly Potapenko': datetime.timedelta(seconds=16938),
 'Richie Frahm': datetime.timedelta(seconds=16805),
 'Antonio Daniels': datetime.timedelta(seconds=46601),
 'Nick Collison': datetime.timedelta(days=2, seconds=41802),
 'Danny Fortson': datetime.timedelta(seconds=30069),
 'Damien Wilkins': datetime.timedelta(days=1, seconds=66019),
 'Johan Petro': datetime.timedelta(days=1, seconds=42893),
 'Robert Swift': datetime.timedelta(seconds=40603),
 'Chris Wilcox': datetime.timedelta(days=1, seconds=65978),
 'Earl Watson': datetime.

In [281]:
getTeams(playerName)

['CLE', 'CLE', 'CLE', 'CLE', 'CLE', 'CLE', 'BOS', 'BOS', 'BRK']

In [192]:
findPositionRatios(playerName)

{'1': 0.85, '2': 0.15, '3': 0.01, '4': 0.0, '5': 0.0}

In [193]:
getTrades(playerName)

([datetime.datetime(2003, 2, 20, 0, 0),
  datetime.datetime(2008, 8, 13, 0, 0),
  datetime.datetime(2013, 7, 11, 0, 0),
  datetime.datetime(2014, 2, 20, 0, 0),
  datetime.datetime(2015, 6, 25, 0, 0),
  datetime.datetime(2015, 6, 25, 0, 0),
  datetime.datetime(2015, 6, 25, 0, 0),
  datetime.datetime(2015, 6, 30, 0, 0)],
 [datetime.datetime(2003, 2, 20, 0, 0), datetime.datetime(2014, 2, 20, 0, 0)])

In [278]:
getPlusMinus(playerName)

{'2011-12': ['-5.1', '+4.7'],
 '2012-13': ['-4.8', '+1.0'],
 '2013-14': ['-5.1', '-4.3'],
 '2014-15': ['+7.5', '+9.4'],
 '2015-16': ['+6.4', '-0.3'],
 '2016-17': ['+6.0', '+7.4'],
 '2017-18': ['+5.3', '+3.2'],
 '2018-19': ['+5.9', '+2.8'],
 '2019-20': ['-0.3', '+0.6'],
 'Career': ['+2.2', '+3.1']}

In [293]:
findStreaks(playerName)

https://www.basketball-reference.com/play-index/pstreak.cgi?request=1&year_min=2012&year_max=2012&team_id=CLE&season_start=1&season_end=-1&c1stat=pts&c1comp=ge&c1val=20
<tr><th class="right" csk="1" data-stat="ranker" scope="row">1</th><td class="left" data-append-csv="jamisan01" data-stat="player"><a href="/players/j/jamisan01.html">Antawn Jamison</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201202290NYK.html">2012-02-29</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201203130CLE.html">2012-03-13</a></td><td class="right" data-stat="games">8</td></tr>
<tr><th class="right" csk="2" data-stat="ranker" scope="row">2</th><td class="left" data-append-csv="irvinky01" data-stat="player"><a href="/players/i/irvinky01.html">Kyrie Irving</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201201080POR.html">2012-01-08</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201201160CHA.html">2012-01-16</a></td><td class="righ

<tr><th class="right" csk="1" data-stat="ranker" scope="row">1</th><td class="left" data-append-csv="irvinky01" data-stat="player"><a href="/players/i/irvinky01.html">Kyrie Irving</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201402250CLE.html">2014-02-25</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201403040CLE.html">2014-03-04</a></td><td class="right" data-stat="games">5</td></tr>
<tr><th class="right" csk="2" data-stat="ranker" scope="row">2</th><td class="left" data-append-csv="irvinky01" data-stat="player"><a href="/players/i/irvinky01.html">Kyrie Irving</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201401260CLE.html">2014-01-26</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201402030DAL.html">2014-02-03</a></td><td class="right" data-stat="games">5</td></tr>
<tr><th class="right" csk="3" data-stat="ranker" scope="row">3</th><td class="left" data-append-csv="waitedi01" data-stat="player"><a href

<tr><th class="right" csk="1" data-stat="ranker" scope="row">1</th><td class="left" data-append-csv="jamesle01" data-stat="player"><a href="/players/j/jamesle01.html">LeBron James</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201604090CHI.html">2016-04-09</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201606020GSW.html">2016-06-02</a></td><td class="right" data-stat="games">17</td></tr>
<tr><th class="right" csk="2" data-stat="ranker" scope="row">2</th><td class="left" data-append-csv="jamesle01" data-stat="player"><a href="/players/j/jamesle01.html">LeBron James</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201601210CLE.html">2016-01-21</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201602210OKC.html">2016-02-21</a></td><td class="right" data-stat="games">14</td></tr>
<tr><th class="right" csk="3" data-stat="ranker" scope="row">3</th><td class="left" data-append-csv="jamesle01" data-stat="player"><a hr

<tr><th class="right" csk="1" data-stat="ranker" scope="row">1</th><td class="left" data-append-csv="irvinky01" data-stat="player"><a href="/players/i/irvinky01.html">Kyrie Irving</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201702060WAS.html">2017-02-06</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201703250CLE.html">2017-03-25</a></td><td class="right" data-stat="games">21</td></tr>
<tr><th class="right" csk="2" data-stat="ranker" scope="row">2</th><td class="left" data-append-csv="jamesle01" data-stat="player"><a href="/players/j/jamesle01.html">LeBron James</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201704050BOS.html">2017-04-05</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201705190BOS.html">2017-05-19</a></td><td class="right" data-stat="games">13</td></tr>
<tr><th class="right" csk="3" data-stat="ranker" scope="row">3</th><td class="left" data-append-csv="irvinky01" data-stat="player"><a hr

<tr><th class="right" csk="1" data-stat="ranker" scope="row">1</th><td class="left" data-append-csv="irvinky01" data-stat="player"><a href="/players/i/irvinky01.html">Kyrie Irving</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201712130BOS.html">2017-12-13</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201712310BOS.html">2017-12-31</a></td><td class="right" data-stat="games">11</td></tr>
<tr><th class="right" csk="2" data-stat="ranker" scope="row">2</th><td class="left" data-append-csv="irvinky01" data-stat="player"><a href="/players/i/irvinky01.html">Kyrie Irving</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201801060BRK.html">2018-01-06</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201801290DEN.html">2018-01-29</a></td><td class="right" data-stat="games">8</td></tr>
<tr><th class="right" csk="3" data-stat="ranker" scope="row">3</th><td class="left" data-append-csv="irvinky01" data-stat="player"><a hre

<tr><th class="right" csk="1" data-stat="ranker" scope="row">1</th><td class="left" data-append-csv="dinwisp01" data-stat="player"><a href="/players/d/dinwisp01.html">Spencer Dinwiddie</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201912080BRK.html">2019-12-08</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201912260BRK.html">2019-12-26</a></td><td class="right" data-stat="games">8</td></tr>
<tr><th class="right" csk="2" data-stat="ranker" scope="row">2</th><td class="left" data-append-csv="irvinky01" data-stat="player"><a href="/players/i/irvinky01.html">Kyrie Irving</a></td><td class="left" data-stat="date_first"><a href="/boxscores/201910230BRK.html">2019-10-23</a></td><td class="left" data-stat="date_last"><a href="/boxscores/201911080POR.html">2019-11-08</a></td><td class="right" data-stat="games">8</td></tr>
<tr><th class="right" csk="3" data-stat="ranker" scope="row">3</th><td class="left" data-append-csv="dinwisp01" data-stat="player"><a

{'2012': [['2012-01-08', '2012-01-16', '5'],
  ['2012-02-28', '2012-03-05', '4'],
  ['2012-01-27', '2012-01-31', '3'],
  ['2012-03-19', '2012-03-21', '2'],
  ['2012-03-28', '2012-03-30', '2'],
  ['2012-02-19', '2012-02-21', '2']],
 '2013': [['2013-02-02', '2013-02-11', '5'],
  ['2012-12-14', '2012-12-19', '4'],
  ['2013-03-04', '2013-03-08', '3'],
  ['2013-04-09', '2013-04-12', '3'],
  ['2012-11-03', '2012-11-07', '3'],
  ['2012-11-11', '2012-11-17', '3'],
  ['2013-01-02', '2013-01-05', '3'],
  ['2013-01-22', '2013-01-26', '3'],
  ['2012-12-26', '2012-12-28', '2'],
  ['2013-01-09', '2013-01-11', '2']],
 '2014': [['2014-02-25', '2014-03-04', '5'],
  ['2014-01-26', '2014-02-03', '5'],
  ['2013-12-23', '2013-12-29', '4'],
  ['2014-01-15', '2014-01-22', '4'],
  ['2013-11-16', '2013-11-22', '3'],
  ['2013-12-07', '2013-12-13', '3'],
  ['2014-02-07', '2014-02-09', '2'],
  ['2014-03-08', '2014-03-12', '2'],
  ['2013-12-17', '2013-12-20', '2']],
 '2015': [['2014-11-10', '2014-11-22', '7'],
  [

In [277]:
getStreakData('irvinky01', 2020, 'NJN', True, 20)

https://www.basketball-reference.com/play-index/pstreak.cgi?request=1&year_min=2020&year_max=2020&team_id=NJN&season_start=1&season_end=-1&c1stat=pts&c1comp=ge&c1val=20
2019-10-23
2019-11-08
8
2020-01-23
2020-01-31
4
2020-01-12
2020-01-14
2


[['2019-10-23', '2019-11-08', '8'],
 ['2020-01-23', '2020-01-31', '4'],
 ['2020-01-12', '2020-01-14', '2']]