In [None]:
import pandas as pd
import numpy as np
from scipy.stats import binned_statistic_2d
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)
from plot_field import generate_afl_oval, plot_events
import seaborn as sns
import matplotlib.pyplot as plt

venueWidth = (123/2)
venueLength = (167/2)
venueDims = (-venueLength, venueLength, -venueWidth, venueWidth)
defaultVenue = 'Adelaide Oval'

# Next position
chains_processed =  pd.read_pickle("../data/chains_processed.pkl")
# 1. Record scores against kicks

chains_processed['points'] = np.select([chains_processed.description == "Goal", chains_processed.description == "Behind"], [6, 1], default=np.NaN)
chains_processed['points'] = chains_processed['points'].shift(-1)

# 2. Reset goal positions
finCondList = [chains_processed.description == "Goal",
            chains_processed.behindInfo =='missLeft',
            chains_processed.behindInfo =='leftPost',
            chains_processed.behindInfo =='missRight',
            chains_processed.behindInfo =='touched',
            chains_processed.behindInfo =='rightPost']
## Y next pos
finychoiceList = [0,
              6.4,
              3.2,
              -6.4,
              0,
              -3.2]
## X next pos
finxchoiceList = [167/2] * 6

## Record condition for debugging
nextCondChoice = ['G',
                 'ML',
                 'LP',
                 'MR',
                 'Touched',
                 'RP']

chains_processed = chains_processed.assign(x = np.select(finCondList, finxchoiceList, default=chains_processed.x),
                                           y = np.select(finCondList, finychoiceList, default=chains_processed.y),
                                           Cond = np.select(finCondList, nextCondChoice, default='Def'))

chains_processed['shotAtGoal'] = chains_processed.shotAtGoal.replace(np.NaN,False)

chains_processed['x'] = np.where(chains_processed.description == "Goal", (167/2), chains_processed.x)
chains_processed['y'] = np.where(chains_processed.description == "Goal", 0, chains_processed.y)
# 2. shift next pos
chains_processed['x_next'] = chains_processed.groupby(['season', 'roundNumber', 'homeTeam', 'period'], as_index=False)['x'].shift(-1)
chains_processed['y_next'] = chains_processed.groupby(['season', 'roundNumber', 'homeTeam', 'period'], as_index=False)['y'].shift(-1)

# 3. Reverse position for turnovers
finCondList = ((chains_processed.description == 'Spoil') & (~chains_processed.playingFor.shift(-1).isnull()) & \
    (~chains_processed.playingFor.shift(1).eq(chains_processed.playingFor.shift(-1)))) | \
    (chains_processed.possChng & chains_processed.finalPos & \
              (chains_processed.finalState.isin(["turnover", 'rushed'])) & \
              (chains_processed.description != 'Out On Full After Kick') & \
              (chains_processed.description.shift(-1) != 'Spoil'))

fCondTemp = [(chains_processed.description == 'Spoil') &  (~chains_processed.playingFor.shift(-1).isnull()) & \
    (~chains_processed.playingFor.shift(1).eq(chains_processed.playingFor.shift(-1))), 
    (chains_processed.possChng & chains_processed.finalPos & \
              (chains_processed.finalState.isin(["turnover", 'rushed'])) & \
              (~chains_processed.description.isin(['Out On Full', 'Out On Full After Kick'])) & \
              (chains_processed.description.shift(-1) != 'Spoil'))]

# chains_processed = chains_processed.assign(x_next = np.where(finCondList, -chains_processed.x_next, chains_processed.x_next),
#                                            y_next = np.where(finCondList, -chains_processed.y_next, chains_processed.y_next),
#                                            Cond2 = np.select(fCondTemp, ['TO1', 'TO2'], 'Def'))

# 3. Set kickin as middle of the goal square
chains_processed['x'] = np.where(chains_processed.description.str.contains('Kickin') & chains_processed.description.shift(1).isin(['Goal', 'Behind']), -venueLength + 5, chains_processed.x)
chains_processed['y'] = np.where(chains_processed.description.str.contains('Kickin') & chains_processed.description.shift(1).isin(['Goal', 'Behind']), 0, chains_processed.y)

# 4. Remove game events
game_events = ['Out On Full After Kick', 'Out On Full', 'Out of Bounds', 'Ball Up Call', 'Goal', 'Behind','Kick Inside 50 Result', 'OOF Kick In']

chains_processed = chains_processed[~chains_processed.description.isin(game_events)]


# 5. Calculate distance for QA
def calc_dist(x0, y0, x1, y1):
    x0 = np.array(x0, dtype=float)
    y0 = np.array(y0, dtype=float)
    x1 = np.array(x1, dtype=float)
    y1 = np.array(y1, dtype=float)
    return np.sqrt((x1-x0)**2 + (y1 - y0)**2)

chains_processed = chains_processed.assign(possDist = calc_dist(chains_processed.x, chains_processed.y, chains_processed.x_next, chains_processed.y_next))


In [None]:
x_bins

In [None]:
bin_period = 25

venueWidth = (123/2)
venueLength = (167/2)
venueDims = (-venueLength, venueLength, -venueWidth, venueWidth)


def bin_values(col, dir='x'):
    if dir == 'x':
        s = -venueLength; e = venueLength
    else:
        s = -venueWidth; e = venueWidth
    col = np.select([col > e, col < s], [e-0.1, s+0.1],default=col)
    return pd.cut(col, pd.interval_range(start=s, end=e, periods=bin_period, closed='right'), include_lowest=True, precision=2)

x_bins = pd.interval_range(start=-venueLength, end=venueLength, periods=bin_period, closed='right')
y_bins = pd.interval_range(start=-venueWidth, end=venueWidth, periods=bin_period, closed='right')

chains_processed = chains_processed.assign(xInitialPoss_bin=bin_values(chains_processed.x, 'x'),
                                                yInitialPoss_bin=bin_values(chains_processed.y, 'y'),
                                                xFinalPoss_bin=bin_values(chains_processed.x_next, 'x'),
                                                yFinalPoss_bin=bin_values(chains_processed.y_next, 'y'))

disposal_actions = ['Kick', 'Handball', 'Ground Kick', 'Kickin short', 'Kickin long', 'Kickin play on']

movement_df = chains_processed[chains_processed.description.isin(disposal_actions)]

See here - https://soccermatics.readthedocs.io/en/latest/gallery/lesson4/plot_ExpectedThreat.html

## Step 1

Select only positive disposal type actions

TO DO: Include turnovers, out of bounds


In [None]:
possession_summary = pd.read_pickle("../data/possessions_processed_norm.pkl")

bin_period = 25

venueWidth = (123/2)
venueLength = (167/2)
venueDims = (-venueLength, venueLength, -venueWidth, venueWidth)


def bin_values(col, dir='x'):
    if dir == 'x':
        s = -venueLength; e = venueLength
    else:
        s = -venueWidth; e = venueWidth
    col = np.select([col > e, col < s], [e-0.1, s+0.1],default=col)
    return pd.cut(col, pd.interval_range(start=s, end=e, periods=bin_period, closed='right'), include_lowest=True, precision=2)

x_bins = pd.interval_range(start=-venueLength, end=venueLength, periods=bin_period, closed='right')
y_bins = pd.interval_range(start=-venueWidth, end=venueWidth, periods=bin_period, closed='right')

possession_summary = possession_summary.assign(xInitialPoss_bin=bin_values(possession_summary.xInitialPoss, 'x'),
                                                yInitialPoss_bin=bin_values(possession_summary.yInitialPoss, 'y'))

possession_summary['SAG'] = possession_summary.groupby(['chainId'])['shotAtGoal'].transform(lambda s: any(s.values))
plot_df = possession_summary.drop_duplicates(['possessionId','SAG']).groupby(['xInitialPoss_bin', 'yInitialPoss_bin']).SAG.apply(lambda s: sum(s.values)).rename('xG').reset_index()
plot_df['x'] = plot_df.xInitialPoss_bin.apply(lambda s: np.round(s.values.left,3)).astype('float')
plot_df['y'] = plot_df.yInitialPoss_bin.apply(lambda s: np.round(s.values.left,3)).astype('float')
# plot_df.drop('shotAtGoal', inplace=True, axis=1)
plot_df = plot_df.sort_values(by='y')
plot_df = plot_df.pivot(index='y', columns='x', values='xG')
plot_df = plot_df.reindex(columns=sorted(plot_df.columns))

fig=plt.figure(figsize=(12, 6.5))
ax=fig.add_subplot()
pcm = ax.imshow(plot_df, zorder=0, aspect="auto", extent=venueDims, 
          cmap='Reds')

ax.set_xlim(-venueLength - 10, venueLength + 10)
plt.colorbar(pcm)
generate_afl_oval(defaultVenue)

In [None]:
all_count = movement_df.groupby(['xInitialPoss_bin','yInitialPoss_bin']).size()
move_count = movement_df[~movement_df.shotAtGoal].groupby(['xInitialPoss_bin','yInitialPoss_bin']).size()
shot_count = movement_df[movement_df.shotAtGoal].groupby(['xInitialPoss_bin','yInitialPoss_bin']).size()
score_count = movement_df.groupby(['xInitialPoss_bin','yInitialPoss_bin']).points.agg('sum')

move_probability = move_count.div(all_count, fill_value=0)
shot_probability = shot_count.div(all_count, fill_value=0)
score_probability = score_count.div(shot_count, fill_value=0).div(all_count.apply(lambda x: 1), fill_value=1).replace(np.NaN, 0)
score_probability = score_probability.rename('xG')

In [None]:
plot_df = movement_df[~(chains_processed.description == "Kickin play on")][['xInitialPoss_bin','yInitialPoss_bin']].value_counts().reset_index()
plot_df['x'] = plot_df.xInitialPoss_bin.apply(lambda s: np.round(s.values.left,3))
plot_df['y'] = plot_df.yInitialPoss_bin.apply(lambda s: np.round(s.values.left,3))
plot_df = plot_df.pivot(index='y', columns='x', values='count')

fig=plt.figure(figsize=(10, 6.5))
ax=fig.add_subplot()
pcm = ax.imshow(plot_df, zorder=0, aspect="auto", extent=venueDims, 
          cmap='Greens')
plt.colorbar(pcm)
generate_afl_oval(defaultVenue)

In [None]:
plot_df = movement_df[movement_df.shotAtGoal][['xInitialPoss_bin','yInitialPoss_bin']].value_counts().reset_index()
plot_df['x'] = plot_df.xInitialPoss_bin.apply(lambda s: np.round(s.values.left,3))
plot_df['y'] = plot_df.yInitialPoss_bin.apply(lambda s: np.round(s.values.left,3))
plot_df = plot_df.sort_values(by='y')
plot_df = plot_df.pivot(index='y', columns='x', values='count')
plot_df = plot_df.reindex(columns=sorted(plot_df.columns))

fig=plt.figure(figsize=(10, 6.5))
ax=fig.add_subplot()
pcm = ax.imshow(plot_df, zorder=0, aspect="auto", extent=venueDims, 
          cmap='Greens')
plt.colorbar(pcm)
generate_afl_oval(defaultVenue)

In [None]:
plot_df = move_probability.reset_index()
plot_df['x'] = plot_df.xInitialPoss_bin.apply(lambda s: np.round(s.values.left,3)).astype('float')
plot_df['y'] = plot_df.yInitialPoss_bin.apply(lambda s: np.round(s.values.left,3)).astype('float')
plot_df = plot_df.sort_values(by='y')
plot_df = plot_df.pivot(index='y', columns='x', values='count')
plot_df = plot_df.reindex(columns=sorted(plot_df.columns))

fig=plt.figure(figsize=(10, 6.5))
ax=fig.add_subplot()
pcm = ax.imshow(plot_df, zorder=0, aspect="auto", extent=venueDims, 
          cmap='Blues')
plt.colorbar(pcm)
generate_afl_oval(defaultVenue)

In [None]:
plot_df = shot_probability.reset_index()
plot_df['x'] = plot_df.xInitialPoss_bin.apply(lambda s: np.round(s.values.left,3)).astype('float')
plot_df['y'] = plot_df.yInitialPoss_bin.apply(lambda s: np.round(s.values.left,3)).astype('float')
plot_df = plot_df.sort_values(by='y')
plot_df = plot_df.pivot(index='y', columns='x', values='count')
plot_df = plot_df.reindex(columns=sorted(plot_df.columns))

fig=plt.figure(figsize=(10, 6.5))
ax=fig.add_subplot()
pcm = ax.imshow(plot_df, zorder=0, aspect="auto", extent=(-80,80, -70.5, 70.5), 
          cmap='Reds')
plt.colorbar(pcm)
generate_afl_oval(defaultVenue)

In [None]:
plot_df = score_probability.reset_index()
plot_df['x'] = plot_df.xInitialPoss_bin.apply(lambda s: np.round(s.values.left,3)).astype('float')
plot_df['y'] = plot_df.yInitialPoss_bin.apply(lambda s: np.round(s.values.left,3)).astype('float')
plot_df = plot_df.sort_values(by='y')
plot_df = plot_df.pivot(index='y', columns='x', values='xG')
plot_df = plot_df.reindex(columns=sorted(plot_df.columns))

fig=plt.figure(figsize=(10, 6.5))
ax=fig.add_subplot()
pcm = ax.imshow(plot_df, zorder=0, aspect="auto", extent=venueDims, 
          cmap='Reds')
plt.colorbar(pcm)
generate_afl_oval(defaultVenue)

## Transition matricies

In [None]:
transitions = chains_processed.groupby(['xInitialPoss_bin','yInitialPoss_bin','xFinalPoss_bin', 'yFinalPoss_bin']).size().rename('total_events')

move_shot = chains_processed.groupby(['xInitialPoss_bin','yInitialPoss_bin']).aggregate(shots = ('shotAtGoal', sum),
                                                                           points = ('points', sum),
                                                                           total_events = ('possessionNum', 'size'))
move_shot['xG'] = move_shot.points/move_shot.shots
move_shot['xG'] = move_shot['xG'].replace(np.NaN,0)
move_shot['move_prob'] = 1 - move_shot.shots/move_shot.total_events
move_shot['move_prob'] = move_shot['move_prob'].replace(np.NaN,0)
move_shot['shotXp'] = (1-move_shot.move_prob) * move_shot.xG


In [None]:
# Create a data frame with the final position indexs and set all values to 0
xT = chains_processed.groupby(['xFinalPoss_bin', 'yFinalPoss_bin']).x.count().apply(lambda x: 0)

for i in range(8):
    xT_start = xT
    tX = transitions.div(move_shot.total_events, fill_value=0).replace(np.NaN, 0)
    tX = tX.mul(xT).groupby(['xInitialPoss_bin','yInitialPoss_bin']).sum()
    xT = move_shot.shotXp + move_shot.move_prob * tX
    # print("Diff: ", sum(abs(xT - xT_start)))
    

In [None]:
plot_df = xT.rename('xG').reset_index()
plot_df['x'] = plot_df.xInitialPoss_bin.apply(lambda s: np.round(s.values.left,3)).astype('float')
plot_df['y'] = plot_df.yInitialPoss_bin.apply(lambda s: np.round(s.values.left,3)).astype('float')
plot_df = plot_df.pivot(index='y', columns='x', values='xG').sort_values(by='y',ascending=False)
plot_df = plot_df.reindex(columns=sorted(plot_df.columns))

fig=plt.figure(figsize=(24, 13))
ax=fig.add_subplot()
pcm = ax.imshow(plot_df, zorder=0, aspect="auto", extent=venueDims, 
          cmap='Reds')

# Loop over data dimensions and create text annotations.
for i in plot_df.columns:
    for j in plot_df.index:
        text = ax.text(i + 3.5, j + 3.5, np.round(plot_df.loc[j, i],2),
                       ha="center", va="center", color="k")

ax.set_xlim(-venueLength - 10, venueLength + 10)
plt.colorbar(pcm)
generate_afl_oval(defaultVenue)

## Assigning xThreat

In [None]:
xT_df = (movement_df
        .merge(xT.rename('xT_start').reset_index(), how='left', on=['xInitialPoss_bin', 'yInitialPoss_bin'])
        .merge(xT.reset_index().rename(columns={0: 'xT_end','xInitialPoss_bin': 'xFinalPoss_bin', 'yInitialPoss_bin': 'yFinalPoss_bin'}), how='left', on=['xFinalPoss_bin', 'yFinalPoss_bin'])
        # .drop(['xInitialPoss_bin', 'yInitialPoss_bin', 'xFinalPoss_bin', 'yFinalPoss_bin'], axis=1)
        )
xT_df['deltaXT'] = xT_df.xT_end - xT_df.xT_start

xT_df.to_pickle("../data/xT.pkl")

In [None]:
t = xT_df.groupby(['season', 'playingFor','playerId',	'firstName', 'surname']).deltaXT.sum().sort_values(ascending=False)
t.head(20)