In [None]:
import pandas as pd
import numpy as np
from scipy.stats import binned_statistic_2d
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)
from plot_field import generate_afl_oval, plot_events
import seaborn as sns
import matplotlib.pyplot as plt

def calc_dist(x0, y0, x1, y1):
    x0 = np.array(x0, dtype=float)
    y0 = np.array(y0, dtype=float)
    x1 = np.array(x1, dtype=float)
    y1 = np.array(y1, dtype=float)
    return np.sqrt((x1-x0)**2 + (y1 - y0)**2)

xT = pd.read_pickle("../data/xT.pkl")
xT = xT.assign(initialDistFromGoal = calc_dist(xT.x_norm, xT.y_norm, 82, 0))

## Show negative and positive delta xT

In [None]:
xT['neg_xt'] = xT.deltaXT.apply(lambda x: x<0)
xT_summary = xT.groupby(['season', 'playingFor','playerId', 'firstName', 'surname','neg_xt']).deltaXT.sum().sort_values(ascending=False).reset_index()
xT_summary['playerName'] = xT_summary.firstName + ' ' + xT_summary.surname + '-' + xT_summary.season.astype(str)
xT_summary['xT_tot'] = xT_summary.groupby(['season', 'playerId']).deltaXT.transform('sum')
xT_summary

In [None]:
xT['neg_xt'] = xT.deltaXT.apply(lambda x: x<0)
xT_summary = xT[xT.season == 2023].groupby(['season', 'playingFor','playerId', 'firstName', 'surname','neg_xt']).deltaXT.sum().sort_values(ascending=False).reset_index()
xT_summary['playerName'] = xT_summary.firstName + ' ' + xT_summary.surname + '-' + xT_summary.season.astype(str)
xT_summary['xT_tot'] = xT_summary.groupby(['season', 'playerId']).deltaXT.transform('sum')

plot_df = xT_summary.sort_values(by='xT_tot',ascending=False).head(40)

sns.barplot(x='deltaXT', y='playerName', data=plot_df[plot_df.neg_xt],color='red')
sns.barplot(x='xT_tot', y='playerName', data=plot_df[~plot_df.neg_xt],color='green').set(title='Net Expected Threat\nInc. shots at goal',xlabel = 'Net xThreat',ylabel='Player name + season')
# sns.pointplot(x='xT_tot', y='playerName', data=plot_df[~plot_df.neg_xt],color='black',linestyles='')
plt.show()

In [None]:
plot_df

In [None]:
plot_df

In [None]:
xT['neg_xt'] = xT.deltaXT.apply(lambda x: x<0)
xT_summary = xT[xT.season == 2023].groupby(['season', 'playingFor','playerId', 'firstName', 'surname','shotAtGoal']).deltaXT.sum().sort_values(ascending=False).reset_index()
xT_summary['playerName'] = xT_summary.firstName + ' ' + xT_summary.surname + '-' + xT_summary.season.astype(str)
xT_summary['xT_tot'] = xT_summary.groupby(['season', 'playerId']).deltaXT.transform('sum')

plot_df = xT_summary.sort_values(by='xT_tot',ascending=False).head(40).reindex()
fig = plt.figure(figsize=(12,10))
sns.barplot(x='deltaXT', y='playerName', data=plot_df, hue='shotAtGoal', dodge=False).set(title='Net Expected Threat\nInc. shots at goal',xlabel = 'Net xThreat',ylabel='Player name + season')
# sns.pointplot(x='xT_tot', y='playerName', data=plot_df,color='black',linestyles='')
plt.show()

In [None]:
plot_df = xT_summary.sort_values(by='xT_tot',ascending=False).head(40)

sns.barplot(x='deltaXT', y='playerName', data=plot_df[plot_df.neg_xt],color='red')
sns.barplot(x='xT_tot', y='playerName', data=plot_df[~plot_df.neg_xt],color='green').set(title='Net Expected Threat\nNo shots at goal',xlabel = 'Net xThreat',ylabel='Player name + season')
# sns.pointplot(x='xT_tot', y='playerName', data=plot_df[~plot_df.neg_xt],color='black',linestyles='')
plt.show()

In [None]:
xT['neg_xt'] = xT.deltaXT.apply(lambda x: x<0)
xT_summary = xT[(xT.season == 2023)].groupby(['season', 'playingFor','playerId', 'firstName', 'surname','neg_xt']).deltaXT.sum().sort_values(ascending=False).reset_index()
xT_summary['playerName'] = xT_summary.firstName + ' ' + xT_summary.surname + '-' + xT_summary.season.astype(str)
xT_summary['xT_tot'] = xT_summary.groupby(['season', 'playerId']).deltaXT.transform('sum')
xT_summary.sort_values(by='xT_tot', ascending=False).head(20)

In [None]:
xT[(xT.season == 2023) & (xT.playerId == 'CD_I996731') & (xT.deltaXT < -1)]

In [None]:
xT['neg_xt'] = xT.deltaXT.apply(lambda x: x<0)
xT_summary = xT[xT.playerId.isin(["CD_I1005054"])].groupby(['season', 'playingFor','playerId', 'firstName', 'surname','neg_xt']).deltaXT.sum().sort_values(ascending=False).reset_index()
xT_summary['playerName'] = xT_summary.firstName + ' ' + xT_summary.surname + '-' + xT_summary.season.astype(str)
xT_summary['xT_tot'] = xT_summary.groupby(['season', 'playerId']).deltaXT.transform('sum')

plot_df = xT_summary.sort_values(by='xT_tot',ascending=False).head(40)

sns.barplot(x='deltaXT', y='playerName', data=plot_df[plot_df.neg_xt],color='red')
sns.barplot(x='xT_tot', y='playerName', data=plot_df[~plot_df.neg_xt],color='green').set(title='Net Expected Threat\nInc. shots at goal',xlabel = 'Net xThreat',ylabel='Player name + season')
# sns.pointplot(x='xT_tot', y='playerName', data=plot_df[~plot_df.neg_xt],color='black',linestyles='')
plt.show()

In [None]:
xT[xT.playerId.isin(["CD_I1005054"])].description.unique()

In [None]:
# JD = CD_I1005054; CC = CD_I996731; TW = CD_I280506

plot_df = xT[xT.playerId.isin(["CD_I1005054"])].copy()
plot_df['disposal_type'] = plot_df.description.map({'Handball': 'red', 'Kick': 'blue', 'Ground Kick': 'green', 'Kickin play on': 'yellow'})

fig = plt.figure(figsize=(10,8))
sns.scatterplot(x='x_next', y='y_next', hue='description',palette={'Handball': 'red', 'Kick': 'blue', 'Ground Kick': 'green', 'Kickin play on': 'yellow'} ,data=plot_df).set(title="Josh Daicos 2023 disposals")
generate_afl_oval('MCG')
plt.show()

plot_df = xT[xT.playerId.isin(["CD_I996731"])].copy()
plot_df['disposal_type'] = plot_df.description.map({'Handball': 'red', 'Kick': 'blue', 'Ground Kick': 'green', 'Kickin play on': 'yellow'})

fig = plt.figure(figsize=(10,8))
sns.scatterplot(x='x_next', y='y_next', hue='description',palette={'Handball': 'red', 'Kick': 'blue', 'Ground Kick': 'green', 'Kickin play on': 'yellow'} ,data=plot_df).set(title="Charlie Curnow 2023 disposals")
generate_afl_oval('MCG')

plt.show()

plot_df = xT[xT.playerId.isin(["CD_I280506"])].copy()
plot_df['disposal_type'] = plot_df.description.map({'Handball': 'red', 'Kick': 'blue', 'Ground Kick': 'green', 'Kickin play on': 'yellow'})

fig = plt.figure(figsize=(10,8))
sns.scatterplot(x='x_next', y='y_next', hue='description',palette={'Handball': 'red', 'Kick': 'blue', 'Ground Kick': 'green', 'Kickin play on': 'yellow'} ,data=plot_df).set(title="Big Tex 2023 disposals")
generate_afl_oval('MCG')

plt.show()

## Battle of the mids

- Why isn't Daicos higher?

In [None]:
plot_df.sort_values(by='disps',ascending=False)


fig = plt.figure(figsize=(12,10))
sns.scatterplot(x='disps', y='mean_xT', data=plot_df)

def label_point(x, y, val, ax):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x']+.02, point['y'], str(point['val']))

label_point(plot_df.disps, plot_df.mean_xT, plot_df.surname, plt.gca()) 

In [None]:
plot_df = (xT[(xT.deltaXT != 0) & xT.position.isin(["MIDFIELDER"]) & (xT.season == 2023)].groupby(['playerId','firstName', 'surname'])
                                                        .agg(disps = ('x', 'size'),
                                                            n_pos = ('deltaXT', lambda s: sum(s>0)),
                                                             mean_xT = ('deltaXT', lambda s: s[s>0].mean()),
                                                             tot_xT = ('deltaXT', lambda s: s[s>0].sum()),
                                                             ave_goal_dist = ('initialDistFromGoal', 'mean'))
                                                        .sort_values(by='tot_xT',ascending=False)
                                                        .head(30)).reset_index()
fig = plt.figure(figsize=(12,10))
sns.scatterplot(x='ave_goal_dist', y='tot_xT', data=plot_df)

def label_point(x, y, val, ax):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x']+.02, point['y'], str(point['val']))

label_point(plot_df.ave_goal_dist, plot_df.tot_xT, plot_df.surname, plt.gca()) 

In [None]:
plot_df = (xT[xT.position.isin(["MIDFIELDER"]) & (xT.season == 2023)].groupby(['playerId','firstName', 'surname'])
                                                        .agg(disps = ('x', 'size'),
                                                            n_pos_xT = ('deltaXT', lambda s: sum(s>0)),
                                                             mean_xT = ('deltaXT', lambda s: s[s>0].mean()),
                                                             tot_xT = ('deltaXT', lambda s: s[s>0].sum()),
                                                             ave_goal_dist = ('initialDistFromGoal', 'mean'))
                                                        .sort_values(by='tot_xT',ascending=False)
                                                        .head(30)).reset_index()
plot_df

In [None]:
pdf = xT[xT.position.isin(["MIDFIELDER"]) & (xT.season == 2023) & (xT.playerId.isin(plot_df.playerId))].sort_values(by='initialDistFromGoal')
pdf['full_name'] = pdf.firstName + ' ' + pdf.surname
grouped = pdf.groupby(['full_name']).initialDistFromGoal.median().sort_values(ascending=True)

fig = plt.figure(figsize=(12,10))
sns.boxplot(x='initialDistFromGoal', y='full_name', order=grouped.index, data=pdf)
plt.show()

In [None]:
['CD_I1013128', 'CD_I1005054']
plot_df = xT[xT.playerId.isin(['CD_I1013128', 'CD_I1005054','CD_I298210']) & (xT.season == 2023) & ((xT.deltaXT)>0.1)].copy()

plot_df['x_start_mid'] = plot_df.xInitialPoss_bin.apply(lambda s: np.round(s.values.left + 3.5, 3)).astype('float')
plot_df['y_start_mid'] = plot_df.yInitialPoss_bin.apply(lambda s: np.round(s.values.left + 2.5,3)).astype('float')
plot_df['x_end_mid'] = plot_df.xFinalPoss_bin.apply(lambda s: np.round(s.values.left + 3.5, 3)).astype('float')
plot_df['y_end_mid'] = plot_df.yFinalPoss_bin.apply(lambda s: np.round(s.values.left + 2.5,3)).astype('float')
plot_df['full_name'] = plot_df.firstName + ' ' + plot_df.surname
plot_df = plot_df.groupby(['playerId','full_name','x_start_mid', 'y_start_mid', 'x_end_mid', 'y_end_mid']).size().reset_index(name='n')
plot_df['line_fill'] = plot_df.n/plot_df.n.max()


for id in ['CD_I1013128', 'CD_I1005054', 'CD_I298210']:
    fig = plt.figure(figsize=(12,10))
    for row in plot_df[plot_df.playerId == id].itertuples():
        plt.plot([row.x_start_mid, row.x_end_mid], [row.y_start_mid, row.y_end_mid], color=str(row.line_fill), linestyle='-')
        plt.plot([row.x_start_mid], [row.y_start_mid], 'ro')
        plt.plot([row.x_end_mid], [row.y_end_mid], 'ko')
    plt.suptitle(plot_df[plot_df.playerId == id].full_name.head(1).values)
    generate_afl_oval('MCG')
    plt.show()

In [None]:
xT[xT.playerId.isin(['CD_I996731']) & (xT.season == 2023) & (abs(xT.deltaXT)>0.1)].sort_values('deltaXT', ascending=False)
xT[xT.playerId.isin(['CD_I996731']) & (xT.season == 2023) & (abs(xT.deltaXT)>0.1)].shotAtGoal.value_counts()