In [89]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [90]:
offense = pd.read_csv('2024 Offense.csv')
offense['Unnamed: 0'] = offense['Unnamed: 0'].shift(-2)
offense = offense.dropna()
offense = offense.drop(columns=['SH%', 'TO', 'P', 'A', 'GB', 'TCH', 'PAS'])
offense['Unnamed: 0'] = offense['Unnamed: 0'].astype(str)

In [91]:
offense_1g = offense.groupby(['GID', 'TID'])['1G'].sum().reset_index()
offense_2g = offense.groupby(['GID', 'TID'])['2G'].sum().reset_index()
offense_SH = offense.groupby(['GID', 'TID'])['SH'].sum().reset_index()
offense_SOG = offense.groupby(['GID', 'TID'])['SOG'].sum().reset_index()
offense = pd.merge(offense_1g, offense_2g, on=['GID', 'TID'], how='outer')
offense = pd.merge(offense, offense_SH, on=['GID', 'TID'], how='outer')
offense = pd.merge(offense, offense_SOG, on=['GID', 'TID'], how='outer')

In [92]:
defense = pd.read_csv('2024 Defense.csv')
defense['ï»¿'] = defense['ï»¿'].shift(-2)
defense = defense.dropna()
defense = defense.drop(columns=['GB', 'P', 'PEN', 'SH', 'SOG', 'TCH', 'PAS'])
defense['CT'] = defense['CT'].astype(float)

In [93]:
goalie = pd.read_csv('2024 Goalie.csv')
goalie['Unnamed: 0'] = goalie['Unnamed: 0'].shift(-2)
goalie = goalie.dropna()
goalie = goalie.drop(columns=['SV%'])

In [94]:
posession = pd.read_csv('2024 Posession.csv')

In [95]:
ct_df = defense.groupby(['GID', 'TID'])['CT'].sum().reset_index()

In [96]:
ct_df['OTID'] = ct_df.groupby('GID')['TID'].transform(lambda x: x.iloc[::-1].values)

In [97]:
clean_data = pd.merge(goalie, ct_df, on=['GID', 'TID'], how='outer')
clean_data = pd.merge(clean_data, offense, on=['GID', 'TID'], how='outer')
clean_data = pd.merge(clean_data, posession, on=['GID', 'TID'], how='outer')

In [98]:
cur_data = clean_data.groupby('Unnamed: 0').mean().reset_index()

In [99]:
cur_data.to_csv('sv_cur.csv')

In [100]:
clean_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46 entries, 0 to 45
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  46 non-null     object 
 1   SA          46 non-null     float64
 2   SV          46 non-null     float64
 3   GID         46 non-null     float64
 4   TID         46 non-null     float64
 5   CT          46 non-null     float64
 6   OTID        46 non-null     float64
 7   1G          46 non-null     float64
 8   2G          46 non-null     float64
 9   SH          46 non-null     float64
 10  SOG         46 non-null     float64
 11  PS%         46 non-null     float64
dtypes: float64(11), object(1)
memory usage: 4.4+ KB


In [101]:
def make_context(df):
    to_change = [
        'SA',
        'SV',
        'CT',
        '1G',
        '2G',
        'SH',
        'SOG',
        'PS%'
    ]

    into = [
        'SA',
        'SV_x',
        'CT',
        '1G',
        '2G',
        'SH',
        'SOG',
        'PS%'
    ]

    def calculate_previous_avg(group):
        prev_avg = []
        total = 0
        count = 0
        for value in group:
            if count == 0:
                prev_avg.append(total)
            else:
                prev_avg.append(total / count)
            count += 1
            total += value
        return pd.Series(prev_avg, index = group.index)
    
    df = df.sort_values(by='GID')

    for i in range(len(to_change)):
        df[into[i]] = df.groupby('Unnamed: 0')[to_change[i]].apply(calculate_previous_avg).reset_index(level=0, drop=True)
    

    return df

In [102]:
clean_data = make_context(clean_data)

In [103]:
clean_data = clean_data.sort_values(by=['GID', 'TID'])
clean_data['OPS%'] = clean_data.groupby('GID')['PS%'].transform(lambda x: x.iloc[::-1].values)
clean_data['O1G'] = clean_data.groupby('GID')['1G'].transform(lambda x: x.iloc[::-1].values)
clean_data['O2G'] = clean_data.groupby('GID')['2G'].transform(lambda x: x.iloc[::-1].values)
clean_data['OSH'] = clean_data.groupby('GID')['SH'].transform(lambda x: x.iloc[::-1].values)
clean_data['OSOG'] = clean_data.groupby('GID')['SOG'].transform(lambda x: x.iloc[::-1].values)

In [104]:
clean_data = clean_data.groupby('Unnamed: 0').apply(lambda x: x.iloc[1:]).reset_index(drop=True)
clean_data = clean_data[clean_data['OPS%'] != 0]

  clean_data = clean_data.groupby('Unnamed: 0').apply(lambda x: x.iloc[1:]).reset_index(drop=True)


In [None]:
clean_data.to_csv('sv_clean.csv')