In [1]:
import json
import torch
with open('action_types.json', 'r') as f:
    action_types = json.load(f)
action_types

{'0': 'pass',
 '1': 'cross',
 '2': 'throw_in',
 '3': 'freekick_crossed',
 '4': 'freekick_short',
 '5': 'corner_crossed',
 '6': 'corner_short',
 '7': 'take_on',
 '8': 'foul',
 '9': 'tackle',
 '10': 'interception',
 '11': 'shot',
 '12': 'shot_penalty',
 '13': 'shot_freekick',
 '14': 'keeper_save',
 '15': 'keeper_claim',
 '16': 'keeper_punch',
 '17': 'keeper_pick_up',
 '18': 'clearance',
 '19': 'bad_touch',
 '20': 'non_action',
 '21': 'dribble',
 '22': 'goalkick',
 '23': 'receival',
 '24': 'interception',
 '25': 'out',
 '26': 'offside',
 '27': 'goal',
 '28': 'owngoal',
 '29': 'yellow_card',
 '30': 'red_card',
 '31': 'corner',
 '32': 'freekick'}

In [2]:
import pandas as pd

df = pd.read_csv("WSL_actions.csv", index_col = 0)

grouped = df.groupby(['game_id', 'period_id'])
len(grouped)

652

In [24]:
from preprocess_data import *

Xs = []
ys = []
for name, group in list(grouped):
    # print(f"Processing {len(group):>4} actions from game_id {name[0]: >5} half {name[1]}...")
    
    tokens = (
        group
        .pipe(add_coordinate_bins, n_bins_x = 10, n_bins_y = 10)
        .pipe(add_team_as_dummy)
        .pipe(get_action_type_names, action_types)
        .pipe(get_action_tokens)
        ['action_token'].values
    )

    X, y = sequence_to_sliding_window(tokens, n_prev_actions = 5)
    new_y = [np.concatenate((X[i][1:], np.array([y[i]]))) for i in range(len(X))]
    Xs.append(X)
    ys.append(y)

[array(['True,receival,4,4', 'True,dribble,4,4', 'True,pass,4,4',
       'True,receival,3,0', 'True,dribble,3,0'], dtype=object), array(['True,dribble,4,4', 'True,pass,4,4', 'True,receival,3,0',
       'True,dribble,3,0', 'True,bad_touch,1,1'], dtype=object), array(['True,pass,4,4', 'True,receival,3,0', 'True,dribble,3,0',
       'True,bad_touch,1,1', 'False,goalkick,0,4'], dtype=object), array(['True,receival,3,0', 'True,dribble,3,0', 'True,bad_touch,1,1',
       'False,goalkick,0,4', 'True,interception,4,1'], dtype=object), array(['True,dribble,3,0', 'True,bad_touch,1,1', 'False,goalkick,0,4',
       'True,interception,4,1', 'True,pass,4,1'], dtype=object), array(['True,bad_touch,1,1', 'False,goalkick,0,4',
       'True,interception,4,1', 'True,pass,4,1', 'False,interception,3,1'],
      dtype=object), array(['False,goalkick,0,4', 'True,interception,4,1', 'True,pass,4,1',
       'False,interception,3,1', 'False,pass,3,1'], dtype=object), array(['True,interception,4,1', 'True,pass,4,1

In [7]:
print(len(Xs))

652


In [11]:
X = np.concatenate(Xs)
y = np.concatenate(ys)
X.shape, y.shape

((936660, 5), (936660,))

In [14]:
torch.save(X, 'X.pt')
torch.save(y, 'y.pt')