In [1]:
import pandas as pd
import numpy as np

# OT Data Manipulation

In [2]:
ot_data = pd.read_csv('data/ot_plays.csv',index_col=0)

# Player BoxScore Manipulation

In [3]:
player_boxscore = pd.read_csv('data/players_boxscores.csv', index_col=0)

In [4]:
# Computing the if 3 or more blocked shots
player_boxscore['3plusBlockedShots'] = np.where(player_boxscore['blocked'] >= 3, True, False)

In [5]:
# Computing the if 5 or more shots
player_boxscore['5plusShots'] = np.where(player_boxscore['shots'] >= 5, True, False)

In [6]:
# Computing if has short handed points
player_boxscore['shortHandedPoints'] = player_boxscore['shortHandedAssists'] + player_boxscore['shortHandedGoals']

In [7]:
# Dropping unneccessary columns
player_boxscore = player_boxscore.drop(
    ['timeOnIce', 
     'shortHandedAssists',
     'shortHandedGoals',
     'powerPlayGoals', 
     'powerPlayAssists', 
     'penaltyMinutes', 
     'faceOffPct',
     'faceOffWins', 
     'faceoffTaken',
     'takeaways', 
     'giveaways',
     'hits',
     'plusMinus',
     'evenTimeOnIce',
     'powerPlayTimeOnIce',
     'shortHandedTimeOnIce'
    ],
    axis=1
)

In [8]:
# Computing if 3 plus points
player_boxscore['3plusPoints'] = np.where(player_boxscore['assists'] + player_boxscore['goals'] >= 3, True, False)

In [9]:
# Computing the if hat trick
player_boxscore['hatTrick'] = np.where(player_boxscore['goals'] >= 3, True, False)

In [10]:
# Computing if the player scored a shootout goal
player_boxscore = pd.merge(
    left = player_boxscore, 
    right = ot_data[
        (ot_data['periodNum'] == 5) & 
        (ot_data['playerType'] == 'Scorer')
    ][
        ['gameID','playerID']
    ].rename(
        {'playerID':'playerID_ot'}, 
        axis=1
    ),
    left_on=['gameID','playerID'], 
    right_on=['gameID','playerID_ot'],
    how='left')

player_boxscore['shootoutGoal'] = ~pd.isna(player_boxscore['playerID_ot'])

player_boxscore = player_boxscore.drop('playerID_ot', axis=1)

In [13]:
player_boxscore.to_csv('transformed_data/player_points.csv')

Unnamed: 0,playerID,gameID,assists,goals,shots,blocked,3plusBlockedShots,5plusShots,shortHandedPoints,3plusPoints,hatTrick,shootoutGoal
0,8470604,2021020001,1.0,0.0,1.0,0.0,False,False,0.0,False,False,False
1,8471724,2021020001,2.0,0.0,1.0,1.0,False,False,0.0,False,False,False
2,8470619,2021020001,0.0,1.0,2.0,2.0,False,False,0.0,False,False,False
3,8476927,2021020001,0.0,1.0,3.0,0.0,False,False,0.0,False,False,False
4,8482055,2021020001,1.0,0.0,2.0,2.0,False,False,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
26370,8478463,2021021211,0.0,0.0,2.0,0.0,False,False,0.0,False,False,False
26371,8479356,2021021211,0.0,0.0,3.0,0.0,False,False,0.0,False,False,False
26372,8475231,2021021211,0.0,0.0,0.0,3.0,True,False,0.0,False,False,False
26373,8480222,2021021211,,,,,False,False,,False,False,False


# Goalie BoxScore Manipulation

In [None]:
def atleast_sixty(time):
    position = time.find(':')
    if position != -1:
        time = int(time[0:position])
        if time  >= 60:
            return True
    return False

In [None]:
goalie_boxscore = pd.read_csv('data/goalies_boxscores.csv', index_col=0)

In [None]:
# Computing the goals against
goalie_boxscore['goalsAgainst'] = goalie_boxscore['shots'] - goalie_boxscore['saves']

In [None]:
goalie_boxscore['playedEntireGame'] = goalie_boxscore['timeOnIce'].apply(lambda row: atleast_sixty(row))

In [None]:
goalie_boxscore['shutout'] = np.where(
    (goalie_boxscore['goalsAgainst'] == 0) & (goalie_boxscore['playedEntireGame'] == True),
    True, 
    False
)

In [None]:
# Dropping unneccessary columns
goalie_boxscore = goalie_boxscore.drop(
    [
        'timeOnIce',
        'assists', 
        'pim',
        'shots',
        'powerPlaySaves',
        'shortHandedSaves',
        'evenSaves',
        'shortHandedShotsAgainst',
        'evenShotsAgainst',
        'powerPlayShotsAgainst',
        'savePercentage',
        'evenStrengthSavePercentage',
        'powerPlaySavePercentage',
        'playedEntireGame'
    ], axis=1)

In [None]:
# Getting the 35+ save indicator
goalie_boxscore['35plusSaves'] = np.where(goalie_boxscore['saves'] > 35, True,False)

In [None]:
# Getting column to indicate if game went into OT
goalie_boxscore = pd.merge(left=goalie_boxscore, 
                           right=ot_data[['gameID']].drop_duplicates().rename({'gameID':'isOT'},axis=1),
                           how='left', 
                           left_on='gameID',
                           right_on='isOT')

In [None]:
# Computing if column is overtime loss
goalie_boxscore['overtimeLoss'] = np.where(
    (goalie_boxscore['decision'] == 'L') & ~(pd.isna(goalie_boxscore['isOT'])),
    True,
    False
)

# Dropping unneccessary column
goalie_boxscore = goalie_boxscore.drop(['isOT'],axis=1)

In [None]:
Dropping.to_csv('transformed_data/player_points.csv')