In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)

chains_2023 =  pd.read_csv("../data/chains_data.csv")

## Rename columns
chains_2023.rename(columns={'homeTeam.teamName': "homeTeam",
  'awayTeam.teamName': "awayTeam",
  'homeTeamScore.totalScore': 'homeTeamScore',
  'awayTeamScore.totalScore': 'awayTeamScore',
  'chain_number': "chainNumber",
  'venue.name': "venueName",
  'team': "playingFor"}, inplace=True)

chains_2023['x_norm'] = chains_2023['x'] / (chains_2023['venueLength'] / 2)
## The average ratio of width/length in the league is about 0.78 or so, round up to 0.8
chains_2023['y_norm'] = chains_2023['y'] / (chains_2023['venueWidth'] / 2) * 0.8
chains_2023.fillna({'playerId': 'CONTEXT'}, inplace=True)

# Create id for each possession
chains_2023['possessionId'] = chains_2023.groupby(["roundNumber", "homeTeam", "period", "chainNumber"],  group_keys=False)['playerId'].apply(lambda s: s.ne(s.shift()).cumsum())

# Identify duplicate rows that show the player gaining possession and remove 
dupes = chains_2023.duplicated(subset=['season', 'roundNumber', 'homeTeam', 'period', 'periodSeconds', 'playerId', 'description'], keep='last')
chains_2023 = chains_2023[~dupes]

player_info = chains_2023[['playerId', 'firstName', 'surname']].drop_duplicates().dropna()
player_info['full_name'] = chains_2023['firstName'] + " " + chains_2023['surname']
player_info.drop(['firstName', 'surname'], axis=1, inplace=True)
# player_info.set_index('playerId', inplace=True)

In [None]:
## Create columns fr grouping for easy summarising

game_identifiers = ['season', 'roundNumber', 'homeTeam']
chain_identifiers = game_identifiers + ['chainNumber']
possession_identifiers = chain_identifiers + ['possessionId']

In [None]:
def paste(outcm_strings):
    return ', '.join(outcm_strings)

def count_bounce(x):
    return sum(x == "Bounce")

possession_summary = (chains_2023
                        .groupby(['season', 'roundNumber', 'homeTeam','playingFor', 'venueName', 'venueLength', 'venueWidth', 'period', 'chainNumber', 'possessionId', 'playerId'], as_index=False)
                        .agg(
                            n=('x', 'size'),
                            outcomes=('description', paste),
                            xInitialPoss = ('x', 'first'),
                            yInitialPoss = ('y', 'first'),
                            xFinalPoss = ('x', 'last'),
                            yFinalPoss = ('y', 'last'),
                            bounces = ('description', count_bounce),
                            chainInitialState = ('initialState', 'first'),
                            chainFinalState = ('finalState', 'first'),
                            initialState = ('description', 'first'),
                            finalState = ('description', 'last')
                             ))

In [None]:
def calc_dist(x0, y0, x1, y1):
    x0 = np.array(x0)
    y0 = np.array(y0)
    x1 = np.array(x1)
    y1 = np.array(y1)
    return np.sqrt((x1-x0)**2 + (y1 - y0)**2)

possession_summary = possession_summary.assign(distanceFromPoss = calc_dist(possession_summary.xInitialPoss, possession_summary.yInitialPoss, possession_summary.xFinalPoss, possession_summary.yFinalPoss),
                        distanceDisposal = calc_dist(possession_summary.xFinalPoss, possession_summary.yFinalPoss, possession_summary.xInitialPoss.shift(-1), possession_summary.yInitialPoss.shift(-1), ),
                        initialDistFromGoal = calc_dist(possession_summary.xInitialPoss, possession_summary.yInitialPoss, possession_summary.venueLength, 0),
                        finalDistFromGoal = calc_dist(possession_summary.xFinalPoss, possession_summary.yFinalPoss, possession_summary.venueLength, 0),
                        disposalRecipient = possession_summary.playerId.shift(-1))
possession_summary['deltaDistFromGoal'] = possession_summary.finalDistFromGoal - possession_summary.initialDistFromGoal

In [None]:
## Sanity check - check what the largest change in position was wrt the goals
possession_summary.sort_values('deltaDistFromGoal', ascending=True).head(1).merge(chains_2023, on=['season', 'roundNumber', 'homeTeam', 'chainNumber', 'possessionId'], how='inner')

In [None]:
## Sanity check - visualise a possession
df = chains_2023[(chains_2023.roundNumber == 1) & (chains_2023['homeTeam'] == "Richmond") & (chains_2023.period == 1) & (chains_2023.chainNumber == 2)]
df
# plt.scatter('x', 'y', c='chainNumber', data=df)

In [None]:
# Create pass networks - subset the possession summary and join on player names

pass_network = possession_summary[['season', 'roundNumber', 'playingFor', 'period', 'chainNumber', 'possessionId', 'playerId','disposalRecipient', 'finalState']].merge(player_info, on='playerId', how='inner')
pass_network = pass_network.merge(player_info, left_on='disposalRecipient', right_on='playerId', how='inner', suffixes=['_disposer','_reciever'])

# Count interations
pass_network.groupby(['season', 'roundNumber', 'playingFor']).value_counts(['full_name_disposer', 'full_name_reciever', 'finalState']).sort_values(ascending=False)