In [76]:
import pandas as pd
import math
from collections import defaultdict
import numpy as np


q1 = pd.read_csv('Heat_Lakers_OCT6_2020_Q1.csv',header=None)
q2 = pd.read_csv('Heat_Lakers_OCT6_2020_Q2.csv',header=None)
q3 = pd.read_csv('Heat_Lakers_OCT6_2020_Q3.csv',header=None)
q4 = pd.read_csv('Heat_Lakers_OCT6_2020_Q4.csv',header=None)
all_quarters=[q1,q2,q3,q4]

Find all end of possesion events (make, miss, turnover, etc)

In [77]:
game=[]

for quarter in all_quarters:
    quarter_plays=[]
    for index,row in quarter.iterrows():
        l = list(row)
        l= [x for x in l if type(x)!=float]
        quarter_plays.append(l)
    game.append(quarter_plays)

In [78]:
end_actions = []
for quarter in game:
    for play in quarter:
        last_action=play[-1]
        end_actions.append(last_action)
        
end_actions = set(end_actions)

Denote each player node as 'initials' + 'pass layer' where pass layer is 0,1,2... depending on what position in the passing sequence they belong to.

In [79]:
for quarter in all_quarters:
    for pass_layer in quarter.columns:
        quarter[pass_layer] = quarter[pass_layer]+str(pass_layer)

Fix outcome events that now have the pass layer added to their string

In [80]:
def strip_outcomes(token):
    try:
        if math.isnan(token):
            return token
    except:
        pass
    
    if token[:-1] in end_actions:
        return token[:-1]
    else:
        return token

In [81]:
for quarter in all_quarters:
    for pass_layer in quarter.columns:
        quarter[pass_layer] = quarter[pass_layer].apply(lambda x: strip_outcomes(x))

In [82]:
game=[]

for quarter in all_quarters:
    quarter_plays=[]
    for index,row in quarter.iterrows():
        l = list(row)
        l= [x for x in l if type(x)!=float]
        quarter_plays.append(l)
    game.append(quarter_plays)

Get all our passing sequences into lists, we can iterate through these and get counts for each directed action

In [83]:
transition_dict = defaultdict(lambda: defaultdict(int))
for quarter in game:
    for play in quarter:
        for idx in range(len(play)-1):
            passer = play[idx]
            receiver = play[idx+1]
            transition_dict[passer][receiver]+=1

In [84]:
connections = pd.DataFrame(columns=list(transition_dict.keys()),index=list(transition_dict.keys()))

for passer in transition_dict.keys():
    for receiver in transition_dict[passer].keys():
        if receiver in end_actions:
            continue
        connections.loc[passer,receiver]=transition_dict[passer][receiver]


In [385]:
all_initializations=0
for key in transition_dict.keys():
    if key[-1]==str(0):
        all_initializations+=sum(list(transition_dict[key].values()))

print('Jimmy Butler accounted for ' +str(100*sum(list(transition_dict['JB0'].values()))/all_initializations)+' of initializations')

Jimmy Butler accounted for 73.33333333333333 of initializations


Get rid of possesions not started by Jimmy Butler

In [86]:
jimmy_dict = transition_dict.copy()
jimmy_dict.pop('TH0')
jimmy_dict.pop('JC0')
jimmy_dict.pop('AI0')
jimmy_dict.pop('KN0')
jimmy_dict.pop('BA0')
jimmy_dict.pop('JB1')

defaultdict(int,
            {'TH2': 1,
             'NO PLAY': 1,
             'AND_ONE': 1,
             'DR2': 1,
             'BA2': 2,
             'KN2': 2,
             'FT': 1})

In [87]:
connections = pd.DataFrame(columns=list(jimmy_dict.keys()),index=list(jimmy_dict.keys()))

for passer in jimmy_dict.keys():
    for receiver in jimmy_dict[passer].keys():
        if receiver in end_actions:
            continue
        connections.loc[passer,receiver]=jimmy_dict[passer][receiver]

In [89]:
id_mapper={}

all_labels=[]
for key in jimmy_dict.keys():
    for key2 in jimmy_dict[key].keys():
        all_labels.append(key)
        all_labels.append(key2)
        
all_labels=set(all_labels)
all_labels = [label for label in all_labels if label not in end_actions]

n=0
for label in all_labels:
    id_mapper[label]=n
    n+=1

In [90]:
reverse_mapper={}

for key,value in id_mapper.items():
    reverse_mapper[value]=key
    

In [121]:
name_mapper={'JB':'Butler','DR':'Robinson','TH':'Herro','JC':'Crowder','BA':'Adebayo','AI':'Iguodala','KN':'Nunn'}
color_mapper={'JB': 'red','DR':'white','TH':'black','JC':'grey','BA':'yellow','AI':'orange','KN':'brown'}

In [91]:
def find_indexes(labels,layer):
    idx_list=[]
    layer=str(layer)
    for idx,label in enumerate(labels):
        if layer in label:
            idx_list.append(idx)
            
    return idx_list

In [199]:


source=[]
target=[]
value=[]
for passer in jimmy_dict.keys():
    for receiver in transition_dict[passer].keys():
        if receiver in end_actions:
            continue
        source.append(id_mapper[passer])
        target.append(id_mapper[receiver])
        value.append(transition_dict[passer][receiver])
        
        
labels = [reverse_mapper[x] for x in range(len(id_mapper.keys()))]
x_pos = [int(x[-1])/6 for x in labels]



y_pos=[0 for x in range(len(x_pos))] #initialize list of zeros


nodes_in_layer=defaultdict(int)
for label in labels:
    passing_layer=int(label[-1])
    nodes_in_layer[passing_layer]+=1
    
fixed_labels = [name_mapper[initials[:-1]] for initials in labels]
colors = [color_mapper[initials[:-1]] for initials in labels]

for layer in nodes_in_layer.keys():
    idx_list = find_indexes(labels,layer)
    n=0
    for idx in idx_list:
        position=n/len(idx_list)
        y_pos[idx]=position
        n+=1

#Replace y values of 0 with small value, prevents layer shift
for idx,y in enumerate(y_pos):
    if y==0:
        y_pos[idx]=0.05


turnovers=[]
makes=[]
misses=[]
drew_fouls=[]


for label in labels:
    drew_foul=0
    make=0
    miss=0
    turnover=0
    for ending in end_actions:
        if ending in ['AND_ONE', 'FOUL NON-SHOOTING','FT']:
            drew_foul += jimmy_dict[label][ending]
        
        if ending in ['AND_ONE','MAKE']:
            make+=jimmy_dict[label][ending]
            
        if ending in ['TURNOVER']:
            turnover+=jimmy_dict[label][ending]
            
        if ending in ['MISS']:
                miss+=jimmy_dict[label][ending]
                
    turnovers.append(turnover)
    makes.append(make)
    misses.append(miss)
    drew_fouls.append(drew_foul)

shooting_percentages=[]
for i in range(len(misses)):
    try:
        percentage = 100*makes[i]/(makes[i]+misses[i])
    except:
        percentage = 'NA'
        
    shooting_percentages.append(percentage)
    
all_data=[]
for i in range(len(turnovers)):
    all_data.append([turnovers[i],makes[i],misses[i],drew_fouls[i],shooting_percentages[i]])
    

In [266]:
turnovers_by_layer=[]
metric='TURNOVER'
for layer in range(len(nodes_in_layer.keys())):
    stat=0
    for key in jimmy_dict.keys():
        if key[-1]==str(layer):
            stat+=(jimmy_dict[key][metric])
    turnovers_by_layer.append(stat)
        
makes_by_layer=[]
metric='MAKE'
metric2='AND_ONE'
for layer in range(len(nodes_in_layer.keys())):
    stat=0
    for key in jimmy_dict.keys():
        if key[-1]==str(layer):
            stat+=(jimmy_dict[key][metric])
            stat+=(jimmy_dict[key][metric2])
    makes_by_layer.append(stat)

misses_by_layer=[]
metric='MISS'
for layer in range(len(nodes_in_layer.keys())):
    stat=0
    for key in jimmy_dict.keys():
        if key[-1]==str(layer):
            stat+=(jimmy_dict[key][metric])
    misses_by_layer.append(stat)
    
fouls_drawn_by_layer=[]
metric='FT'
metric2='FOUL NON-SHOOTING'
metric3='AND_ONE'
for layer in range(len(nodes_in_layer.keys())):
    stat=0
    for key in jimmy_dict.keys():
        if key[-1]==str(layer):
            stat+=(jimmy_dict[key][metric])
            stat+=(jimmy_dict[key][metric2])
            stat+=(jimmy_dict[key][metric3])
    fouls_drawn_by_layer.append(stat)
    
possesions_by_layer=[]
for layer in range(len(nodes_in_layer.keys())):
    possesions=0
    for key in jimmy_dict.keys():
        
        if key[-1]==str(layer):
            possesions+=sum(list(jimmy_dict[key].values()))
    possesions_by_layer.append(possesions)
    
shooting_percentage_by_layer=[makes_by_layer[i]/(makes_by_layer[i]+misses_by_layer[i]) for i in range(len(makes_by_layer))]
turnover_rate_by_layer = [turnovers_by_layer[i]/possesions_by_layer[i] for i in range(len(makes_by_layer))]
attempts_by_layer=[makes_by_layer[i]+misses_by_layer[i] for i in range(len(makes_by_layer))]

In [355]:

headers=['','<b>0th pass</b>','<b>1st pass</b>','<b>2nd pass</b>','<b>3rd pass</b>','<b>4th pass</b>','<b>5th pass</b>','<b>6th pass</b>']
table = go.Figure(data=[go.Table(header=dict(values=headers),
                 cells=dict(values=[['<b>Possesions</b>','<b>Fouls Drawn</b>', '<b>FGA</b>','<b>Shooting %</b>' ,'<b>Turnovers</b>','<b>Turnover per Possesion</b>'],
                                    [possesions_by_layer[0], fouls_drawn_by_layer[0], attempts_by_layer[0],np.round(100*shooting_percentage_by_layer[0],1),turnovers_by_layer[0],np.round(turnover_rate_by_layer[0],3)],
                                    [possesions_by_layer[1], fouls_drawn_by_layer[1], attempts_by_layer[1],np.round(100*shooting_percentage_by_layer[1],1),turnovers_by_layer[1],np.round(turnover_rate_by_layer[1],3)],
                                    [possesions_by_layer[2], fouls_drawn_by_layer[2], attempts_by_layer[2],np.round(100*shooting_percentage_by_layer[2],1),turnovers_by_layer[2],np.round(turnover_rate_by_layer[2],3)],
                                    [possesions_by_layer[3], fouls_drawn_by_layer[3], attempts_by_layer[3],np.round(100*shooting_percentage_by_layer[3],1),turnovers_by_layer[3],np.round(turnover_rate_by_layer[3],2)],
                                    [possesions_by_layer[4], fouls_drawn_by_layer[4], attempts_by_layer[4],np.round(100*shooting_percentage_by_layer[4],1),turnovers_by_layer[4],np.round(turnover_rate_by_layer[4],2)],
                                    [possesions_by_layer[5], fouls_drawn_by_layer[5], attempts_by_layer[5],np.round(100*shooting_percentage_by_layer[5],1),turnovers_by_layer[5],np.round(turnover_rate_by_layer[5],2)],
                                    [possesions_by_layer[6], fouls_drawn_by_layer[6], attempts_by_layer[6],np.round(100*shooting_percentage_by_layer[6],1),turnovers_by_layer[6],np.round(turnover_rate_by_layer[6],2)]
                                   ]))
                     ])
table.update_layout(margin=dict(t=0))

In [346]:
import plotly.graph_objects as go
import numpy as np

fig = go.Figure(data=[go.Sankey(arrangement='snap',
    node = dict(
        
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = fixed_labels,
      x = x_pos,
      y = y_pos,
      customdata=all_data,
      color= colors,
      hovertemplate='<b>Passes Received:</b> %{value: 0f}'
    ),
    link = dict(
      source = source, # indices correspond to labels, eg A1, A2, A2, B1, ...
      target = target,
      value = value
  ))])

fig.add_annotation(x=-0.01, y=1.1,
            text="<b>0th pass</b>", showarrow=False)

fig.add_annotation(x=0.14, y=1.1,
            text="<b>1st pass</b>", showarrow=False)

fig.add_annotation(x=0.31, y=1.1,
            text="<b>2nd pass</b>", showarrow=False)

fig.add_annotation(x=0.5, y=1.1,
            text="<b>3rd pass</b>", showarrow=False)

fig.add_annotation(x=0.7, y=1.1,
            text="<b>4th pass</b>", showarrow=False)

fig.add_annotation(x=0.87, y=1.1,
            text="<b>5th pass</b>", showarrow=False)

fig.add_annotation(x=1.01, y=1.1,
            text="<b>6th pass</b>", showarrow=False)


fig.update_layout(title_text="Miami Heat Passing Sequences, vs LA Lakers 10/9/2020", font_size=10,hovermode='closest',margin=dict(b=20),width)
fig.show()

In [341]:
sankey=go.Sankey(arrangement='snap',
    node = dict(
        
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = fixed_labels,
      x = x_pos,
      y = y_pos,
      customdata=all_data,
      color= colors,
      hovertemplate='<b>Passes Received:</b> %{value: 0f}'+
        '<br><b>Turnovers:</b> %{customdata[0]}'+
        '<br><b>Makes:</b> %{customdata[1]}'+
        '<br><b>Misses:</b> %{customdata[2]}'+
        '<br><b>Fouls Drawn:</b> %{customdata[3]}'+
        '<br><b>Shooting Percentage:</b> %{customdata[4]: .1f}%'
    ),
    link = dict(
      source = source, # indices correspond to labels, eg A1, A2, A2, B1, ...
      target = target,
      value = value
  ),domain={
        'y': [0.43, 1.]
    })

table2=go.Table(header=dict(values=headers),
                 cells=dict(values=[['<b>Possesions</b>','<b>Fouls Drawn</b>', '<b>FGA</b>','<b>Shooting %</b>' ,'<b>Turnovers</b>','<b>Turnover per Possesion</b>'],
                                    [possesions_by_layer[0], fouls_drawn_by_layer[0], attempts_by_layer[0],np.round(100*shooting_percentage_by_layer[0],1),turnovers_by_layer[0],np.round(turnover_rate_by_layer[0],3)],
                                    [possesions_by_layer[1], fouls_drawn_by_layer[1], attempts_by_layer[1],np.round(100*shooting_percentage_by_layer[1],1),turnovers_by_layer[1],np.round(turnover_rate_by_layer[1],3)],
                                    [possesions_by_layer[2], fouls_drawn_by_layer[2], attempts_by_layer[2],np.round(100*shooting_percentage_by_layer[2],1),turnovers_by_layer[2],np.round(turnover_rate_by_layer[2],3)],
                                    [possesions_by_layer[3], fouls_drawn_by_layer[3], attempts_by_layer[3],np.round(100*shooting_percentage_by_layer[3],1),turnovers_by_layer[3],np.round(turnover_rate_by_layer[3],2)],
                                    [possesions_by_layer[4], fouls_drawn_by_layer[4], attempts_by_layer[4],np.round(100*shooting_percentage_by_layer[4],1),turnovers_by_layer[4],np.round(turnover_rate_by_layer[4],2)],
                                    [possesions_by_layer[5], fouls_drawn_by_layer[5], attempts_by_layer[5],np.round(100*shooting_percentage_by_layer[5],1),turnovers_by_layer[5],np.round(turnover_rate_by_layer[5],2)],
                                    [possesions_by_layer[6], fouls_drawn_by_layer[6], attempts_by_layer[6],np.round(100*shooting_percentage_by_layer[6],1),turnovers_by_layer[6],np.round(turnover_rate_by_layer[6],2)]
                                   ]),domain={
        'y': [0, 0.4]
    })

In [384]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig2 = go.Figure(data=[sankey,table2])
fig2.update_layout(title_text="Miami Heat Passing Sequences, vs LA Lakers 10/9/2020", height=900)
fig2.add_annotation(x=-0.01, y=1.05,
            text="<b>0th pass</b>", showarrow=False)

fig2.add_annotation(x=0.14, y=1.05,
            text="<b>1st pass</b>", showarrow=False)

fig2.add_annotation(x=0.31, y=1.05,
            text="<b>2nd pass</b>", showarrow=False)

fig2.add_annotation(x=0.5, y=1.05,
            text="<b>3rd pass</b>", showarrow=False)

fig2.add_annotation(x=0.7, y=1.05,
            text="<b>4th pass</b>", showarrow=False)

fig2.add_annotation(x=0.87, y=1.05,
            text="<b>5th pass</b>", showarrow=False)

fig2.add_annotation(x=1.01, y=1.05,
            text="<b>6th pass</b>", showarrow=False)
fig2.show()
#fig2.write_html('Sankey_custom_data.html')

In [382]:
paragraph='**Hypothesis**: Longer passing sequences result in successful possesions more often than shorter sequences.\n\nThis visualization represents all passing sequences initialized by Jimmy Butler of the Miami Heat against the LA Lakers in game 5 of the 2020 NBA Finals. The player who initializes the possesion is the player who brings the ball past half court or inbounds the ball in the frontcourt. Only possesions initialized by Jimmy Butler are shown here. Jimmy Butler accounted for 73.3% of all initializations.\n\nSummary statistics by passing layer are shown. For example, FGA for the 1st pass layer represents the number of Field Goals Attempted for all players who received the first pass in a passing sequence.\n\n All data recorded manually from game 5 of 2020 NBA Finals and can be found [here](https://github.com/brendenconnors/HW4-Network).' 


In [383]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
import pandas as pd


app = dash.Dash()

app.layout = html.Div(children=[
    html.Div([
        dcc.Graph(figure=fig,id='g1',style={'display':'block','height': 450}),dcc.Graph(figure=table,id='table',style={'display':'block','height': 350})], style={'display': 'inline-block','height':900,'width':1250}),
    html.Div([
        html.P(
           dcc.Markdown(paragraph),style={'height':350,'fontSize':17,'display':'block'})],style={'margin-left':2,'margin-right':0,'height':900,'width':250,'vertical-align':'top','display': 'inline-block'})
        
], style={'width': '100%', 'display': 'inline-block'})



if __name__ == '__main__':
    app.run_server(debug=False)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [02/Nov/2020 12:40:11] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/Nov/2020 12:40:11] "[37mGET /_dash-component-suites/dash_renderer/react@16.8.6.min.js?v=1.1.2&m=1576595738 HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/Nov/2020 12:40:11] "[37mGET /_dash-component-suites/dash_renderer/prop-types@15.7.2.min.js?v=1.1.2&m=1576595738 HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/Nov/2020 12:40:11] "[37mGET /_dash-component-suites/dash_renderer/react-dom@16.8.6.min.js?v=1.1.2&m=1576595738 HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/Nov/2020 12:40:11] "[37mGET /_dash-component-suites/dash_html_components/dash_html_components.min.js?v=1.0.1&m=1576596177 HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/Nov/2020 12:40:12] "[37mGET /_dash-component-suites/dash_core_components/highlight.pack.js?v=1.3.1&m=1576595950 HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/Nov/2020 12:40:12] "[37mGET /_dash-component-suites/dash_renderer/dash_renderer.min.js?v=1.1.