In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import numpy as np
from plot_field import generate_afl_oval
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)

chains_2023 =  pd.read_csv("../data/chains_data.csv")

## Rename columns
chains_2023.rename(columns={'homeTeam.teamName': "homeTeam",
  'awayTeam.teamName': "awayTeam",
  'homeTeamScore.totalScore': 'homeTeamScore',
  'awayTeamScore.totalScore': 'awayTeamScore',
  'chain_number': "chainNumber",
  'venue.name': "venueName",
  'team': "playingFor"}, inplace=True)

# Create columns fr grouping for easy summarising

game_identifiers = ['season', 'roundNumber', 'homeTeam']
chain_identifiers = game_identifiers + ['chainNumber']
possession_identifiers = chain_identifiers + ['possessionNum']

# Remove Kick Into F50 type results
chains_2023 = chains_2023[~chains_2023.description.isin(['Kick Into F50','Kick Inside 50 Result', 'Inside 50', 'Shot At Goal'])]
chains_2023['date'] = pd.to_datetime(chains_2023['date'])
chains_2023 = chains_2023.sort_values(by=['season', 'roundNumber', 'date', 'homeTeam', 'period', 'periodSeconds'])

chains_2023['x_norm'] = chains_2023['x'] / (chains_2023['venueLength'] / 2)
## The average ratio of width/length in the league is about 0.78 or so, round up to 0.8
chains_2023['y_norm'] = chains_2023['y'] / (chains_2023['venueWidth'] / 2) * 0.8
chains_2023.fillna({'playerId': 'CONTEXT'}, inplace=True)

# Create number for each possession
chains_2023['possessionNum'] = chains_2023.groupby(["roundNumber", "homeTeam", "period", "chainNumber"],  group_keys=False)['playerId'].apply(lambda s: s.ne(s.shift()).cumsum())
# Create ID for chains and possession for easy lookup
chains_2023['chainId'] = chains_2023.season.astype(str) + "_" + chains_2023.roundNumber.astype(str) + "_" + chains_2023.homeTeam.apply(lambda x: x.replace(" ", "_")) + "_C" + chains_2023.chainNumber.astype(str)
chains_2023['possessionId'] = chains_2023['chainId'] + "_P" + chains_2023.possessionNum.astype(str)

# Identify duplicate rows that show the player gaining possession and remove 
dupes = chains_2023.duplicated(subset=['season', 'roundNumber', 'homeTeam', 'period', 'periodSeconds', 'playerId', 'description'], keep='last')
chains_2023 = chains_2023[~dupes]

# Create of column of instances of possession changes
chains_2023['possChng'] = chains_2023.groupby(game_identifiers + ['period'],  group_keys=False)['playingFor'].apply(lambda s: s.ne(s.shift(-1)))
chains_2023['endOfQtr'] = chains_2023.periodSeconds > chains_2023.periodSeconds.shift(-1)
# Final possession in a chain
chains_2023['finalPos'] = chains_2023.possessionNum == chains_2023.groupby('chainId').possessionNum.transform(max)
# Create 
player_info = chains_2023[['playerId', 'firstName', 'surname', 'playingFor']].drop_duplicates().dropna()
player_info['full_name'] = chains_2023['firstName'] + " " + chains_2023['surname']
player_info.drop(['firstName', 'surname'], axis=1, inplace=True)
# player_info.set_index('playerId', inplace=True)

In [None]:
## Create possession summary

def paste(outcm_strings):
    return ', '.join(outcm_strings)

def calc_dist(x0, y0, x1, y1):
    x0 = np.array(x0, dtype=float)
    y0 = np.array(y0, dtype=float)
    x1 = np.array(x1, dtype=float)
    y1 = np.array(y1, dtype=float)
    return np.sqrt((x1-x0)**2 + (y1 - y0)**2)

def check_final_disposal(x):
    return len(x) - np.argmax(np.flip(x.isin(['Kick', 'Handball', 'Ground Kick']).values)) - 1

possession_summary = (chains_2023
                        .groupby(['season', 'roundNumber', 'homeTeam', 'period', 'chainId',  'possessionNum', 'possessionId', 'playerId','playingFor', 'venueName', 'venueLength', 'venueWidth'])
                        .agg(
                            n=('x', 'size'),
                            outcomes=('description', lambda outcm_strings: ', '.join(outcm_strings)),
                            xInitialPoss = ('x', 'first'),
                            yInitialPoss = ('y', 'first'),
                            xFinalPoss = ('x', 'last'),
                            yFinalPoss = ('y', 'last'),
                            posStart = ('periodSeconds', 'first'),
                            posEnd = ('periodSeconds', 'last'),
                            bounces = ('description', lambda x: sum(x == "Bounce")),
                            goals = ('description', lambda x: sum(x == "Goal")),
                            behind = ('description', lambda x: sum(x == "Behind")),
                            behindDesc = ('behindInfo', lambda x: next((item for item in x if item is not np.NaN), None)),
                            possChng = ('possChng', 'any'),
                            finalPos = ('finalPos', 'any'),
                            disposal=('disposal', lambda x: sum(x == "effective")),
                            disposalList = ('description', lambda x: list(x)),
                            xList = ('x', lambda x: list(x)),
                            yList = ('y', lambda x: list(x)),
                            finalDisposal = ('description', check_final_disposal),
                            chainInitialState = ('initialState', 'first'),
                            chainFinalState = ('finalState', 'first'),
                            initialState = ('description', 'first'),
                            finalState = ('description', 'last'),
                            endOfQtr = ('endOfQtr', 'any'))).reset_index()

# Get the start position of the next possession as the 
possession_summary['xNextPos'] = possession_summary.groupby(['season', 'roundNumber', 'homeTeam', 'period'], as_index=False)['xInitialPoss'].shift(-1)
possession_summary['yNextPos'] = possession_summary.groupby(['season', 'roundNumber', 'homeTeam', 'period'], as_index=False)['yInitialPoss'].shift(-1)

# Remove spoils from final poss
# Update final disposal characteristics when not the final event
for index, row in possession_summary.iterrows():
    if row.finalDisposal + 1 < row.n:
        possession_summary.loc[index, 'finalState'] = row.disposalList[row.finalDisposal]
        possession_summary.loc[index, 'xFinalPoss'] = row.xList[row.finalDisposal]
        possession_summary.loc[index, 'yFinalPoss'] = row.yList[row.finalDisposal]
        possession_summary.loc[index, 'xNextPos'] = row.xList[-1]
        possession_summary.loc[index, 'yNextPos'] = row.yList[-1]

possession_summary = possession_summary[~(possession_summary.finalPos & (possession_summary.finalState == "Spoil"))]

possession_summary = possession_summary.assign(xNextPos = np.where(possession_summary.finalPos & (possession_summary.chainFinalState == "turnover"),-possession_summary.xNextPos, possession_summary.xNextPos),
                                               yNextPos = np.where(possession_summary.finalPos & (possession_summary.chainFinalState == "turnover"),-possession_summary.yNextPos, possession_summary.yNextPos),
                                               xInitialPoss = np.where(possession_summary.initialState == "Kickin play on",-(possession_summary.venueLength/2-8.5), possession_summary.xInitialPoss),
                                               yInitialPoss = np.where(possession_summary.initialState == "Kickin play on",0 , possession_summary.yInitialPoss))

# Adjust final position when kicking a goal/behind
condList = [possession_summary.goals > 0, possession_summary.behindDesc=='missLeft', possession_summary.behindDesc=='leftPost',possession_summary.behindDesc=='missRight',possession_summary.behindDesc=='touched',possession_summary.behindDesc=='rightPost']
## Y next pos
ychoiceList = [0, 6.4, 3.2, -6.4, 0, -3.2]
## X next pos
xchoiceList = [possession_summary.venueLength/2] * 6
possession_summary = possession_summary.assign(xNextPos = np.select(condList, xchoiceList, default=possession_summary.xNextPos),
                                               yNextPos = np.select(condList, ychoiceList, default=possession_summary.yNextPos))

possession_summary = possession_summary.assign(distanceFromPoss = calc_dist(possession_summary.xInitialPoss, possession_summary.yInitialPoss, possession_summary.xFinalPoss, possession_summary.yFinalPoss),
                        distanceDisposal = calc_dist(possession_summary.xFinalPoss, possession_summary.yFinalPoss, possession_summary.xInitialPoss.shift(-1), possession_summary.yInitialPoss.shift(-1), ),
                        initialDistFromGoal = calc_dist(possession_summary.xInitialPoss, possession_summary.yInitialPoss, possession_summary.venueLength, 0),
                        finalDistFromGoal = calc_dist(possession_summary.xFinalPoss, possession_summary.yFinalPoss, possession_summary.venueLength, 0),
                        disposalRecipient = np.where((~possession_summary.possChng & (possession_summary.disposal > 0)) | ~possession_summary.endOfQtr, possession_summary.playerId.shift(-1),np.NaN))

possession_summary['deltaDistFromGoal'] = possession_summary.finalDistFromGoal - possession_summary.initialDistFromGoal
possession_summary.drop(['venueLength','venueWidth'], axis=1, inplace=True)

In [None]:
from collections import Counter
res = []
for index, row in possession_summary[possession_summary.finalDisposal < possession_summary.n - 1].head(1000).iterrows():
    if row.disposal == 1:
        res.append(row.disposalList[-1])
        if row.disposalList[-1] == "Goal":
            print(row.possessionId)

Counter(res)

In [None]:
possession_summary = (chains_2023[chains_2023.chainId.isin(["2023_1_GWS_Giants_C17",'2023_1_GWS_Giants_C18'])]
                        .groupby(['season', 'roundNumber', 'homeTeam', 'period', 'chainId',  'possessionNum', 'possessionId', 'playerId','playingFor', 'venueName', 'venueLength', 'venueWidth'])
                        .agg(
                            n=('x', 'size'),
                            outcomes=('description', lambda outcm_strings: ', '.join(outcm_strings)),
                            xInitialPoss = ('x', 'first'),
                            yInitialPoss = ('y', 'first'),
                            xFinalPoss = ('x', 'last'),
                            yFinalPoss = ('y', 'last'),
                            posStart = ('periodSeconds', 'first'),
                            posEnd = ('periodSeconds', 'last'),
                            bounces = ('description', lambda x: sum(x == "Bounce")),
                            goals = ('description', lambda x: sum(x == "Goal")),
                            behind = ('description', lambda x: sum(x == "Behind")),
                            behindDesc = ('behindInfo', lambda x: next((item for item in x if item is not np.NaN), None)),
                            possChng = ('possChng', 'any'),
                            finalPos = ('finalPos', 'any'),
                            disposal=('disposal', lambda x: sum(x == "effective")),
                            disposalList = ('description', lambda x: list(x)),
                            xList = ('x', lambda x: list(x)),
                            yList = ('y', lambda x: list(x)),
                            finalDisposal = ('description', check_final_disposal),
                            chainInitialState = ('initialState', 'first'),
                            chainFinalState = ('finalState', 'first'),
                            initialState = ('description', 'first'),
                            finalState = ('description', 'last'),
                            endOfQtr = ('endOfQtr', 'any'))).reset_index()

# Get the start position of the next possession as the 
possession_summary['xNextPos'] = possession_summary.groupby(['season', 'roundNumber', 'homeTeam', 'period'], as_index=False)['xInitialPoss'].shift(-1)
possession_summary['yNextPos'] = possession_summary.groupby(['season', 'roundNumber', 'homeTeam', 'period'], as_index=False)['yInitialPoss'].shift(-1)

possession_summary[possession_summary.chainId.isin(["2023_1_GWS_Giants_C17"])]
# chains_2023[chains_2023.chainId.isin(["2023_1_GWS_Giants_C17"])]

In [None]:
chains_2023[chains_2023.chainId.isin(chains_2023[chains_2023.description == "Shot At Goal"].chainId.sample(1).values)]

In [None]:
def label_point(x, y, val, ax):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x'], point['y'], str(point['val']), color='white', verticalalignment='center', horizontalalignment='center')



## Sanity check - check what the largest change in position was wrt the goals
chain_sample = possession_summary.head(100).sample(1)
# chain_sample = possession_summary[possession_summary.goals>0].sample(1)

plot_df = possession_summary[possession_summary.chainId.isin(chain_sample.chainId.values)].sort_values('possessionNum').copy()

# plot_df = pd.wide_to_long(plot_df, stubnames=['x', 'y'], i=['possessionNum'], j='state', suffix=r'\w+').reset_index()
plot_df_wide = pd.wide_to_long(plot_df, stubnames=['x', 'y'], i=['possessionNum'], j='state', suffix=r'\w+').reset_index()
plt.figure(figsize=(15,10))
for _, rows in plot_df.iterrows():
    x = (rows.xFinalPoss, rows.xNextPos)
    y = (rows.yFinalPoss, rows.yNextPos)
    plt.plot(x, y, 'r-')
    x = (rows.xInitialPoss, rows.xFinalPoss)
    y = (rows.yInitialPoss, rows.yFinalPoss)
    plt.plot(x, y, 'b--')
label_point(plot_df.xInitialPoss, plot_df.yInitialPoss, plot_df.possessionNum.astype(str), plt.gca())
plt.plot(plot_df['xInitialPoss'], plot_df['yInitialPoss'], 'ko', markersize=14)
generate_afl_oval(plot_df.venueName.values[0])
plt.show()

In [None]:
plot_df
# chains_2023[chains_2023.possessionId == '2023_3_Fremantle_C211_P3']
# chains_2023[chains_2023.chainId.isin(['2023_1_GWS_Giants_C17'])]
possession_summary[possession_summary.chainId.isin(['2023_1_GWS_Giants_C20', '2023_1_GWS_Giants_C21'])]

In [None]:
chains_2023[chains_2023.finalPos & (chains_2023.description == "Spoil")]

In [None]:
t = chains_2023.copy()
t['nextPlay'] = chains_2023.groupby('possessionId').description.shift(-1)
# 
t[t.description == "Handball"].nextPlay.value_counts()

# t[(t.description == "Kick") & (t.nextPlay == "Loose Ball Get")].sample(1).possessionId


In [None]:
chains_2023[chains_2023.possessionId == '2023_1_North_Melbourne_C48_P4']

In [None]:
## Sanity check - visualise a possession
df = chains_2023[(chains_2023.roundNumber == 1) & (chains_2023['homeTeam'] == "Richmond") & (chains_2023.period == 1) & (chains_2023.chainNumber == 2)]
df
# plt.scatter('x', 'y', c='chainNumber', data=df)

In [None]:
# Create pass networks - subset the possession summary and join on player names

pass_network = possession_summary[['season', 'roundNumber', 'possessionId','playingFor', 'playerId','disposalRecipient', 'finalState']].merge(player_info, on='playerId', how='inner')
pass_network = pass_network.merge(player_info, left_on='disposalRecipient', right_on='playerId', how='inner', suffixes=['_disposer','_reciever'])

# Count interations
pass_network.groupby(['playingFor']).value_counts(['full_name_disposer', 'full_name_reciever', 'finalState']).sort_values(ascending=False).head(40)

In [None]:
# Find last disposal and gather info after it

t= (chains_2023
     .head(100)
    .groupby('possessionId')
    .description
    .agg(finalDisposal = lambda s: len(s) - np.argmax(np.flip(s.isin(['Kick', 'Handball', 'Ground Kick']).values)) - 1,
         possessionCount = lambda s: len(s)))
#     .query('(finalDisposal < possessionCount) & (finalDisposal > 0)'))
t

In [None]:
# a = pd.Series(['Mark Fumbled', 'Gather', 'Kick'])
# np.argmax(np.flip(a.isin(['Kick', 'Handball', 'Ground Kick'])))
t.assign(x=t.possessionCount - t.finalDisposal).sort_values('x',ascending=False)