In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from hockey_rink import NHLRink, IIHFRink, NWHLRink
import sklearn
import sklearn.preprocessing

import torch

from sklearn.model_selection import train_test_split
import pickle

In [2]:
columns= ['eventname', 'ishomegame', 'manpowersituation', 'outcome',
       'playerprimaryposition', 'xadjcoord', 'yadjcoord']

In [3]:
data = pd.read_csv('data/main.csv', usecols=columns)

In [4]:
goal_data = data.loc[(data['eventname'] == 'goal')]

print(goal_data.shape[1])

7


In [5]:
frame = pd.DataFrame()

In [6]:
for i in (goal_data.index):
    
    frame = pd.concat([frame, data[i-30:i+1]], ignore_index = True, axis = 0)

In [7]:
class SportsData(torch.utils.data.Dataset):
    def __init__(self, norm_data, seq_len):
        
        seq_data = []
        for i in range(len(norm_data) - seq_len + 1):
            x = norm_data[i : i + seq_len]
            seq_data.append(x)

        self.samples = []
        idx = torch.randperm(len(seq_data))
        for i in range(len(seq_data)):
            self.samples.append(seq_data[idx[i]])
            
        self.samples = np.asarray(self.samples, dtype = np.float32)
            
    def __len__(self):
        return self.samples.shape[0]

    def __getitem__(self, idx):
        
        return self.samples[idx,:,:]

In [8]:
seq_len = 24

In [9]:
def encoding(norm_data):
    
    enc_event = sklearn.preprocessing.OrdinalEncoder()
    enc_manpow = sklearn.preprocessing.OrdinalEncoder()
    enc_out = sklearn.preprocessing.OrdinalEncoder()
    enc_playerpos = sklearn.preprocessing.OrdinalEncoder()
    

    norm_data['eventname'] = enc_event.fit_transform(np.asarray(norm_data['eventname']).reshape(-1,1))

    norm_data['manpowersituation'] = enc_manpow.fit_transform(np.asarray(norm_data['manpowersituation']).reshape(-1,1))

    norm_data['outcome'] = enc_out.fit_transform(np.asarray(norm_data['outcome']).reshape(-1,1))

    norm_data['playerprimaryposition'] = enc_playerpos.fit_transform(np.asarray(norm_data['playerprimaryposition']).reshape(-1,1))
    
    scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(0,1))
    
    norm_data = scaler.fit_transform(np.asarray(norm_data))
    
    return norm_data, (enc_event, enc_manpow, enc_out, enc_playerpos, scaler)

In [10]:
norm_data_encoded, (enc_event, enc_manpow, enc_out, enc_playerpos, scaler) = encoding(frame)

In [11]:
sports_data = SportsData(norm_data_encoded, seq_len)

In [12]:
train_data, test_data = train_test_split(sports_data, train_size = 0.8, random_state = 2022)

In [13]:
with open(f'data/train_data.pkl', 'wb') as f:
    
    pickle.dump(train_data, f)
    
    f.close()
    
with open(f'data/test_data.pkl', 'wb') as f:
    
    pickle.dump(test_data, f)
    
    f.close()

In [14]:
with open(f'scaler/enc_event.pkl', 'wb') as f:
    
    pickle.dump(enc_event, f)
    
    f.close()
    
with open(f'scaler/scaler.pkl', 'wb') as f:
    
    pickle.dump(scaler, f)
    
    f.close()