This Notebook converts the gameplay data into a table (exportable to csv) with the proportion time played by each team combination matchup in an NBA game

In [1]:
import pandas as pd
import datetime
import math
import numpy as np


In [3]:
##Load in the data in pandas dataframe
DF = pd.read_excel('(2016-06-10)-0041500404-GSW@CLE.xlsx',sheet_name='(2016-06-10)-0041500404-GSW@CLE')

In [4]:
##Delete the first row since it is just a description
DF = DF[1:]

In [5]:
##Identify Player Combinations for each row and create a new column/variable for them
DF['ComboA'] = DF.loc[:,('a1', 'a2','a3','a4','a5')].apply(lambda x: ','.join(x), axis=1)
DF['ComboA'] = DF['ComboA'].apply(lambda x: sorted(x.split(',')))

DF['ComboH'] = DF.loc[:,('h1', 'h2','h3','h4','h5')].apply(lambda x: ','.join(x), axis=1)
DF['ComboH'] = DF['ComboH'].apply(lambda x: sorted(x.split(',')))


In [8]:
#Convert elapsed and remaing time to a TimeDelta Object
DF['elapsed']=pd.to_timedelta(DF['elapsed'].astype(str))
DF['remaining_time']=pd.to_timedelta(DF['remaining_time'].astype(str))

DF.remaining_time.iloc[5]

Timedelta('0 days 00:11:14')

In [29]:
##create a new dataframe to record the data of seconds spent per team
Team_Team_Seconds_DF = pd.DataFrame(columns = ['Team_A','Team_H','Seconds_Spent']) #creates a new dataframe that's empty

#Initialize variables
Current_Team_A=''
Current_Team_H=''
start_time=datetime.timedelta(0,0,0) # current team configuration start time
home_score, away_score = 0,0
## Loop over rows, examining for changes, record time spent on each team-team matchup
for i in DF.index:
    #check for end of period
    if DF.at[i,'event_type']=='end of period' :
        time_spent = DF.at[i,'elapsed']-start_time
        #append to dataframe
        home_scored = DF.at[i,"home_score"] - home_score
        away_scored = DF.at[i,"away_score"] - away_score
        home_score = DF.at[i,"home_score"]
        away_score = DF.at[i,"away_score"]
        Team_Team_Seconds_DF = Team_Team_Seconds_DF.append({'Team_A' : str(Current_Team_A) , 'Team_H' : str(Current_Team_H),'Seconds_Spent' : time_spent.seconds, 'Home_Points' : home_scored,"Away_Points": away_scored} , ignore_index=True)

        start_time=datetime.timedelta(0,0,0)

    else:
        #check if team compositions changed
        if Current_Team_A != DF.at[i,'ComboA'] or Current_Team_H != DF.at[i,'ComboH']:
            #update times
            time_spent = DF.at[i,'elapsed']-start_time
            start_time = DF.at[i,'elapsed']
            home_scored = DF.at[i,"home_score"] - home_score
            away_scored = DF.at[i,"away_score"] - away_score

            
            
            #append to dataframe
            Team_Team_Seconds_DF = Team_Team_Seconds_DF.append({'Team_A' : str(Current_Team_A) , 'Team_H' : str(Current_Team_H),'Seconds_Spent' : time_spent.seconds,'Home_Points' : home_scored,"Away_Points": away_scored} , ignore_index=True)
            
            #update teams
            Current_Team_A = DF.at[i,'ComboA']
            Current_Team_H = DF.at[i,'ComboH']
            home_score = DF.at[i,"home_score"]
            away_score = DF.at[i,"away_score"]

In [32]:
#drop if no time spent (e.g. multiple substitions occur such as at beggining of period)
Team_Team_Seconds_DF=Team_Team_Seconds_DF[Team_Team_Seconds_DF['Seconds_Spent']>0]
#ensure data is numeric and convert to proportion of time
Team_Team_Seconds_DF["Seconds_Spent"] = pd.to_numeric(Team_Team_Seconds_DF["Seconds_Spent"])
Team_Team_Seconds_DF["Fract_Spent"] = pd.to_numeric(Team_Team_Seconds_DF["Seconds_Spent"]/2880)
Team_Team_Seconds_DF["Home_point_adv"] = Team_Team_Seconds_DF["Home_Points"] - Team_Team_Seconds_DF["Away_Points"]

In [33]:
##get unique team combos of each so we may later sort by order of appearence
H_unique = Team_Team_Seconds_DF.Team_H.unique()
A_unique = Team_Team_Seconds_DF.Team_A.unique()


In [34]:
##CREATE A PIVOT TABLE
df=Team_Team_Seconds_DF
#pd.DataFrame(np.outer(df, df), df.index, df.index)
##aggregate by summing
PivotTable = (Team_Team_Seconds_DF.pivot_table(index='Team_A', columns='Team_H', values='Home_point_adv',aggfunc = sum))

PivotTable = PivotTable.fillna(0) ##replace nan with 0
##Resort pivot table based upon team appearence in order
pt = PivotTable[H_unique]

pt = pt.reindex(A_unique)


In [36]:
pt

Team_H,"['J.R. Smith', 'Kyrie Irving', 'LeBron James', 'Richard Jefferson', 'Tristan Thompson']","['J.R. Smith', 'Kevin Love', 'Kyrie Irving', 'LeBron James', 'Richard Jefferson']","['Iman Shumpert', 'J.R. Smith', 'Kevin Love', 'Kyrie Irving', 'LeBron James']","['Iman Shumpert', 'J.R. Smith', 'Kevin Love', 'Kyrie Irving', 'Tristan Thompson']","['Iman Shumpert', 'J.R. Smith', 'Matthew Dellavedova', 'Richard Jefferson', 'Tristan Thompson']","['Channing Frye', 'Iman Shumpert', 'Kevin Love', 'LeBron James', 'Matthew Dellavedova']","['Channing Frye', 'Iman Shumpert', 'LeBron James', 'Matthew Dellavedova', 'Richard Jefferson']","['Channing Frye', 'Iman Shumpert', 'Kyrie Irving', 'LeBron James', 'Richard Jefferson']","['Channing Frye', 'J.R. Smith', 'Kyrie Irving', 'LeBron James', 'Richard Jefferson']","['J.R. Smith', 'Kevin Love', 'Kyrie Irving', 'LeBron James', 'Tristan Thompson']","['Iman Shumpert', 'Kevin Love', 'Kyrie Irving', 'LeBron James', 'Matthew Dellavedova']","['Channing Frye', 'Dahntay Jones', 'J.R. Smith', 'Kyrie Irving', 'LeBron James']"
Team_A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
"['Andrew Bogut', 'Draymond Green', 'Harrison Barnes', 'Klay Thompson', 'Stephen Curry']",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"['Andre Iguodala', 'Draymond Green', 'Harrison Barnes', 'Klay Thompson', 'Stephen Curry']",-4.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-7.0,0.0,0.0
"['Andre Iguodala', 'Draymond Green', 'Harrison Barnes', 'Shaun Livingston', 'Stephen Curry']",0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"['Andre Iguodala', 'Draymond Green', 'James Michael McAdoo', 'Shaun Livingston', 'Stephen Curry']",0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"['Andre Iguodala', 'Harrison Barnes', 'James Michael McAdoo', 'Shaun Livingston', 'Stephen Curry']",0.0,0.0,0.0,-2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"['Andre Iguodala', 'Draymond Green', 'Harrison Barnes', 'Klay Thompson', 'Shaun Livingston']",0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,2.0,-1.0,0.0,0.0
"['Andre Iguodala', 'Draymond Green', 'James Michael McAdoo', 'Klay Thompson', 'Stephen Curry']",0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-1.0,0.0,0.0,0.0
"['Andre Iguodala', 'Draymond Green', 'Festus Ezeli', 'Klay Thompson', 'Stephen Curry']",-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"['Festus Ezeli', 'Harrison Barnes', 'Klay Thompson', 'Shaun Livingston', 'Stephen Curry']",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0
"['Draymond Green', 'Festus Ezeli', 'Klay Thompson', 'Shaun Livingston', 'Stephen Curry']",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [35]:
##write to csv
pt.to_csv('Team_Team_Point_Diff_Sample.csv')

In [31]:
Team_Team_Seconds_DF.sort_values("Fract_Spent", ascending=False)

Unnamed: 0,Team_A,Team_H,Seconds_Spent,Away_Points,Home_Points,Fract_Spent
29,"['Andrew Bogut', 'Draymond Green', 'Harrison B...","['J.R. Smith', 'Kyrie Irving', 'LeBron James',...",341,14.0,14.0,0.118403
27,"['Andre Iguodala', 'Draymond Green', 'Harrison...","['J.R. Smith', 'Kevin Love', 'Kyrie Irving', '...",308,11.0,15.0,0.106944
1,"['Andrew Bogut', 'Draymond Green', 'Harrison B...","['J.R. Smith', 'Kyrie Irving', 'LeBron James',...",260,10.0,10.0,0.090278
34,"['Anderson Varejao', 'Andre Iguodala', 'Draymo...","['Iman Shumpert', 'J.R. Smith', 'Kevin Love', ...",242,7.0,7.0,0.084028
14,"['Andre Iguodala', 'Draymond Green', 'Harrison...","['Channing Frye', 'Iman Shumpert', 'Kevin Love...",222,5.0,4.0,0.077083
45,"['Andre Iguodala', 'Draymond Green', 'Harrison...","['Channing Frye', 'J.R. Smith', 'Kyrie Irving'...",199,5.0,5.0,0.069097
40,"['Andre Iguodala', 'Harrison Barnes', 'James M...","['Iman Shumpert', 'J.R. Smith', 'Kevin Love', ...",177,7.0,6.0,0.061458
2,"['Andre Iguodala', 'Draymond Green', 'Harrison...","['J.R. Smith', 'Kyrie Irving', 'LeBron James',...",157,7.0,5.0,0.054514
8,"['Andre Iguodala', 'Harrison Barnes', 'James M...","['Iman Shumpert', 'J.R. Smith', 'Kevin Love', ...",121,7.0,5.0,0.042014
4,"['Andre Iguodala', 'Draymond Green', 'Harrison...","['J.R. Smith', 'Kevin Love', 'Kyrie Irving', '...",119,4.0,5.0,0.041319
