In [33]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)

chains_2023 =  pd.read_csv("../data/chains_data.csv")

## Rename columns
chains_2023.rename(columns={'homeTeam.teamName': "homeTeam",
  'awayTeam.teamName': "awayTeam",
  'homeTeamScore.totalScore': 'homeTeamScore',
  'awayTeamScore.totalScore': 'awayTeamScore',
  'chain_number': "chainNumber",
  'venue.name': "venueName",
  'team': "playingFor"}, inplace=True)

# Create columns fr grouping for easy summarising

game_identifiers = ['season', 'roundNumber', 'homeTeam']
chain_identifiers = game_identifiers + ['chainNumber']
possession_identifiers = chain_identifiers + ['possessionNum']

# Remove Kick Into F50 type results
chains_2023 = chains_2023[~chains_2023.description.isin(['Kick Into F50','Kick Inside 50 Result'])]

chains_2023['x_norm'] = chains_2023['x'] / (chains_2023['venueLength'] / 2)
## The average ratio of width/length in the league is about 0.78 or so, round up to 0.8
chains_2023['y_norm'] = chains_2023['y'] / (chains_2023['venueWidth'] / 2) * 0.8
chains_2023.fillna({'playerId': 'CONTEXT'}, inplace=True)

# Create number for each possession
chains_2023['possessionNum'] = chains_2023.groupby(["roundNumber", "homeTeam", "period", "chainNumber"],  group_keys=False)['playerId'].apply(lambda s: s.ne(s.shift()).cumsum())
# Create ID for chains and possession for easy lookup
chains_2023['chainId'] = chains_2023.season.astype(str) + "_" + chains_2023.roundNumber.astype(str) + "_" + chains_2023.homeTeam.apply(lambda x: x.replace(" ", "_")) + "_C" + chains_2023.chainNumber.astype(str)
chains_2023['possessionId'] = chains_2023['chainId'] + "_P" + chains_2023.possessionNum.astype(str)

# Identify duplicate rows that show the player gaining possession and remove 
dupes = chains_2023.duplicated(subset=['season', 'roundNumber', 'homeTeam', 'period', 'periodSeconds', 'playerId', 'description'], keep='last')
chains_2023 = chains_2023[~dupes]

# Create of column of instances of possession changes
chains_2023['possChng'] = chains_2023.groupby(game_identifiers + ['period'],  group_keys=False)['playingFor'].apply(lambda s: s.ne(s.shift(-1)))

# Create 
player_info = chains_2023[['playerId', 'firstName', 'surname', 'playingFor']].drop_duplicates().dropna()
player_info['full_name'] = chains_2023['firstName'] + " " + chains_2023['surname']
player_info.drop(['firstName', 'surname'], axis=1, inplace=True)
# player_info.set_index('playerId', inplace=True)

In [34]:
## Create possession summary

def paste(outcm_strings):
    return ', '.join(outcm_strings)

def count_bounce(x):
    return sum(x == "Bounce")

def effective_poss(x):
    return sum(x=="effective")

def calc_dist(x0, y0, x1, y1):
    x0 = np.array(x0)
    y0 = np.array(y0)
    x1 = np.array(x1)
    y1 = np.array(y1)
    return np.sqrt((x1-x0)**2 + (y1 - y0)**2)

possession_summary = (chains_2023
                        .groupby(['season', 'roundNumber', 'chainId', 'possessionId', 'playerId','playingFor', 'venueName', 'venueLength', 'venueWidth'], as_index=False)
                        .agg(
                            n=('x', 'size'),
                            outcomes=('description', paste),
                            xInitialPoss = ('x', 'first'),
                            yInitialPoss = ('y', 'first'),
                            xFinalPoss = ('x', 'last'),
                            yFinalPoss = ('y', 'last'),
                            bounces = ('description', count_bounce),
                            possChng = ('possChng', 'any'),
                            disposal=('disposal', effective_poss),
                            chainInitialState = ('initialState', 'first'),
                            chainFinalState = ('finalState', 'first'),
                            initialState = ('description', 'first'),
                            finalState = ('description', 'last')
                            ))

possession_summary = possession_summary.assign(distanceFromPoss = calc_dist(possession_summary.xInitialPoss, possession_summary.yInitialPoss, possession_summary.xFinalPoss, possession_summary.yFinalPoss),
                        distanceDisposal = calc_dist(possession_summary.xFinalPoss, possession_summary.yFinalPoss, possession_summary.xInitialPoss.shift(-1), possession_summary.yInitialPoss.shift(-1), ),
                        initialDistFromGoal = calc_dist(possession_summary.xInitialPoss, possession_summary.yInitialPoss, possession_summary.venueLength, 0),
                        finalDistFromGoal = calc_dist(possession_summary.xFinalPoss, possession_summary.yFinalPoss, possession_summary.venueLength, 0),
                        disposalRecipient = np.where(~possession_summary.possChng & possession_summary.disposal > 0, possession_summary.playerId.shift(-1),"ieff"))

possession_summary['deltaDistFromGoal'] = possession_summary.finalDistFromGoal - possession_summary.initialDistFromGoal
possession_summary.drop(['venueLength','venueWidth'], inplace=True)

In [None]:
## Sanity check - check what the largest change in position was wrt the goals
possession_summary.sort_values('deltaDistFromGoal', ascending=True).head(1).merge(chains_2023, on=['season', 'roundNumber', 'homeTeam', 'chainNumber', 'possessionNum'], how='inner')

In [None]:
## Sanity check - visualise a possession
df = chains_2023[(chains_2023.roundNumber == 1) & (chains_2023['homeTeam'] == "Richmond") & (chains_2023.period == 1) & (chains_2023.chainNumber == 2)]
df
# plt.scatter('x', 'y', c='chainNumber', data=df)

In [40]:
# Create pass networks - subset the possession summary and join on player names

pass_network = possession_summary[['season', 'roundNumber', 'possessionId','playingFor', 'playerId','disposalRecipient', 'finalState']].merge(player_info, on='playerId', how='inner')
pass_network = pass_network.merge(player_info, left_on='disposalRecipient', right_on='playerId', how='inner', suffixes=['_disposer','_reciever'])

# Count interations
pass_network.groupby(['playingFor']).value_counts(['full_name_disposer', 'full_name_reciever', 'finalState']).sort_values(ascending=False).head(40)

playingFor        finalState  full_name_disposer  full_name_reciever
Essendon          Handball    Darcy Parish        Zach Merrett          47
Collingwood       Handball    Tom Mitchell        Nick Daicos           42
Adelaide Crows    Handball    Rory Laird          Jordan Dawson         35
GWS Giants        Handball    Tom Green           Stephen Coniglio      35
Fremantle         Kick        Luke Ryan           Brennan Cox           32
                  Handball    Andrew Brayshaw     Caleb Serong          32
                              Caleb Serong        Andrew Brayshaw       31
Carlton           Handball    Patrick Cripps      Sam Walsh             31
Collingwood       Handball    Nick Daicos         Scott Pendlebury      30
Carlton           Handball    Adam Cerra          Sam Walsh             30
Collingwood       Handball    Nick Daicos         Josh Daicos           30
                              Scott Pendlebury    Nick Daicos           29
                              N

In [37]:
possession_summary

Unnamed: 0,season,roundNumber,chainId,possessionId,playerId,playingFor,venueName,venueLength,venueWidth,n,outcomes,xInitialPoss,yInitialPoss,xFinalPoss,yFinalPoss,bounces,possChng,disposal,chainInitialState,chainFinalState,initialState,finalState,distanceFromPoss,distanceDisposal,initialDistFromGoal,finalDistFromGoal,disposalRecipient,deltaDistFromGoal
0,2023,1,2023_1_GWS_Giants_C1,2023_1_GWS_Giants_C1_P2,CD_I291969,GWS Giants,GIANTS Stadium,164,128,2,"Hard Ball Get, Handball",8,-1,10,-3,0,True,0,centreBounce,outOfBounds,Hard Ball Get,Handball,2.828427,11.313708,156.003205,154.029218,ieff,-1.973987
1,2023,1,2023_1_GWS_Giants_C1,2023_1_GWS_Giants_C1_P3,CD_I1017109,Adelaide Crows,GIANTS Stadium,164,128,2,"Hard Ball Get, Kick",18,-11,22,-9,0,True,1,centreBounce,outOfBounds,Hard Ball Get,Kick,4.472136,93.391648,146.413797,142.284925,ieff,-4.128872
2,2023,1,2023_1_GWS_Giants_C10,2023_1_GWS_Giants_C10_P1,CD_I1001396,GWS Giants,GIANTS Stadium,164,128,2,"Kickin play on, Kick",-69,12,-69,12,0,True,1,kickIn,turnover,Kickin play on,Kick,0.000000,52.773099,233.308808,233.308808,ieff,0.000000
3,2023,1,2023_1_GWS_Giants_C10,2023_1_GWS_Giants_C10_P2,CD_I1008230,Adelaide Crows,GIANTS Stadium,164,128,1,Spoil,-22,36,-22,36,0,True,0,kickIn,turnover,Spoil,Spoil,0.000000,13.038405,189.451841,189.451841,ieff,0.000000
4,2023,1,2023_1_GWS_Giants_C10,2023_1_GWS_Giants_C10_P3,CD_I294305,GWS Giants,GIANTS Stadium,164,128,2,"Loose Ball Get, Handball",-33,29,-34,32,0,False,1,kickIn,turnover,Loose Ball Get,Handball,3.162278,5.099020,199.123078,200.569190,CD_I296347,1.446113
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135890,2023,19,2023_19_Richmond_C98,2023_19_Richmond_C98_P1,CD_I1000981,Richmond,MCG,160,141,2,"Contested Mark, Kick",-30,-39,-36,-37,0,False,1,possGain,ballUpCall,Contested Mark,Kick,6.324555,27.313001,193.961336,199.461776,CD_I1002245,5.500439
135891,2023,19,2023_19_Richmond_C98,2023_19_Richmond_C98_P2,CD_I1002245,Richmond,MCG,160,141,2,"Uncontested Mark, Kick",-47,-12,-37,6,0,False,1,possGain,ballUpCall,Uncontested Mark,Kick,20.591260,42.579338,207.347534,197.091349,CD_I1006133,-10.256185
135892,2023,19,2023_19_Richmond_C98,2023_19_Richmond_C98_P3,CD_I1006133,Richmond,MCG,160,141,2,"Gather, Kick",-30,48,-31,51,0,True,1,possGain,ballUpCall,Gather,Kick,3.162278,43.289722,195.969385,197.691679,ieff,1.722294
135893,2023,19,2023_19_Richmond_C98,2023_19_Richmond_C98_P4,CD_I296254,Hawthorn,MCG,160,141,1,Spoil,12,56,12,56,0,True,0,possGain,ballUpCall,Spoil,Spoil,0.000000,95.126232,158.240324,158.240324,ieff,0.000000
