In season: 10-01 through 03-31
Off season: 04-01 through 09-30

In [726]:
import pandas as pd
import numpy as np

In [727]:
# Import the map to nhl player id data
nhl_id_mapping = pd.read_csv(r'../Data/map_player_nhl.csv')

In [728]:
# Import the trade dates data
trade_dates = pd.read_csv(r'../Data/trade_dates.csv')

In [729]:
# Import the traded players stats from the season before being traded.
traded_players_pre_season = pd.read_csv(r'../Data/traded_skater_platform.csv')

In [730]:
# Import the gen info data
gen_info = pd.read_csv(r'../Data/traded_player_general_info.csv')

In [731]:
# Merge trade dates and players data
first_merge = pd.merge(traded_players_pre_season, nhl_id_mapping, on='playerId')

In [732]:
# Merge last result with trade dates data
second_merge = pd.merge(first_merge, trade_dates, on='tradeId', how='inner')

In [733]:
# Merge last result with the general player info data
third_merge = pd.merge(second_merge, gen_info, on=['playerId','position'], how='inner')

In [734]:
third_merge.shape

(1488, 32)

In [735]:
# Read in contractual data
contract_data = pd.read_csv(r'../Data/traded_contracts.csv')

In [736]:
third_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick'],
      dtype='object')

In [737]:
contract_data.columns

Index(['contractId', 'playerId', 'season', 'seasonStart', 'seasonEnd', 'aav',
       'tradeId', 'toTeamId', 'fromTeamId', 'amountOfCapRetained',
       'acquiringCap'],
      dtype='object')

In [738]:
third_merge[['playerId','tradeId']].value_counts()

playerId  tradeId
832445    3063       2
202       2258       1
587498    991        1
592246    737        1
592066    1088       1
                    ..
107608    283        1
107548    224        1
107488    247        1
107087    1146       1
864629    1315       1
Name: count, Length: 1487, dtype: int64

In [739]:
third_merge[third_merge['playerId'] == 832445]

Unnamed: 0,playerId,position,positionGeneral_x,league,leagueLevel,tradeId,regularGamesPlayed,regularGoals,regularAssists,regularPenaltyMinutes,...,trade_date,positionGeneral_y,height_cm,weight_kg,handedness,dateOfBirth,birthCountry,draftYear,draftRound,draftOverallPick
86,832445,RW,F,nhl,professional,3063,5,1,1,0,...,2024-07-03,F,190.5,101.0,R,2000-06-07,Russia,2020.0,2.0,61.0
87,832445,RW,F,nhl,professional,3063,5,1,1,0,...,2024-07-03,F,190.5,101.0,R,2000-06-07,Russia,2020.0,2.0,61.0


In [740]:
third_merge.shape

(1488, 32)

In [741]:
# Mix up in these two rows so drop them
third_merge = third_merge[~(third_merge['playerId'] == 832445)]

In [742]:
third_merge.shape

(1486, 32)

In [743]:
# Merge the last merge result with contractual data
fourth_merge = pd.merge(third_merge, contract_data, on=['playerId','tradeId'], how='inner')

In [744]:
fourth_merge.shape

(1476, 41)

In [745]:
# Import data on team ID's
team_id_mapping = pd.read_csv(r'../Data/team_season_mapping.csv')

In [746]:
fourth_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick', 'contractId', 'season', 'seasonStart', 'seasonEnd',
       'aav', 'toTeamId', 'fromTeamId', 'amountOfCapRetained', 'acquiringCap'],
      dtype='object')

In [747]:
team_id_mapping.columns

Index(['teamId', 'season', 'teamName', 'teamAbbreviation',
       'conferenceAbbreviation', 'divisionName'],
      dtype='object')

In [748]:
# Merge the to team ID mapping data to the player's pre trade team
fifth_merge = pd.merge(fourth_merge, team_id_mapping, left_on=['fromTeamId','season'], right_on=['teamId','season'], how='inner')

In [749]:
fifth_merge.shape

(1473, 46)

In [750]:
# Do this again with the player's post trade team
sixth_merge = pd.merge(fifth_merge, team_id_mapping, left_on=['toTeamId','season'], right_on=['teamId','season'], how='inner', suffixes=('_pre_trade','_post_trade'))

In [751]:
sixth_merge.shape

(1470, 51)

## New formatting and merging

In [752]:
sixth_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick', 'contractId', 'season', 'seasonStart', 'seasonEnd',
       'aav', 'toTeamId', 'fromTeamId', 'amountOfCapRetained', 'acquiringCap',
       'teamId_pre_trade', 'teamName_pre_trade', 'teamAbbreviation_pre_trade',
       'conferenceAbbreviation_pre_trade', 'divisionName_pre_trade',
       'teamId_post_trade', 'teamName_post_trade',
       'teamAbbreviation_post_trade', 'conferen

In [753]:
# Convert trade date column to date data type
sixth_merge['trade_date'] = pd.to_datetime(sixth_merge['trade_date'], format='%Y-%m-%d')

In [754]:
# Create a trade season column instead of just converting the seasons in the skater data
# This is because of a year mix up in the data pulled from the platform
# This column will encapsulate the season in which either a player was traded during or
# the season after a player was traded during the off season
sixth_merge['post_trade_season'] = sixth_merge['trade_date'].apply(lambda x: '24-25' if (pd.to_datetime('2024-04-01') <= x <= pd.to_datetime('2025-03-31')) else ('23-24' if pd.to_datetime('2023-04-01') <= x <= pd.to_datetime('2024-03-31') else ('22-23' if pd.to_datetime('2022-04-01') <= x <= pd.to_datetime('2023-03-31') else ('21-22' if pd.to_datetime('2021-04-01') <= x <= pd.to_datetime('2022-03-31') else ('20-21' if pd.to_datetime('2020-04-01') <= x <= pd.to_datetime('2021-03-31') else ('19-20' if pd.to_datetime('2019-04-01') <= x <= pd.to_datetime('2020-03-31') else ('18-19' if pd.to_datetime('2018-04-01') <= x <= pd.to_datetime('2019-03-31') else ('17-18' if pd.to_datetime('2017-04-01') <= x <= pd.to_datetime('2018-03-31') else ('16-17' if pd.to_datetime('2016-04-01') <= x <= pd.to_datetime('2017-03-31') else ('15-16' if pd.to_datetime('2015-04-01') <= x <= pd.to_datetime('2016-03-31') else ('14-15' if pd.to_datetime('2014-04-01') <= x <= pd.to_datetime('2015-03-31') else ('13-14' if pd.to_datetime('2013-04-01') <= x <= pd.to_datetime('2014-03-31') else ('12-13' if pd.to_datetime('2012-04-01') <= x <= pd.to_datetime('2013-03-31') else ('11-12' if pd.to_datetime('2011-04-01') <= x <= pd.to_datetime('2012-03-31') else ('10-11' if pd.to_datetime('2010-04-01') <= x <= pd.to_datetime('2011-03-31') else ('09-10' if pd.to_datetime('2009-04-01') <= x <= pd.to_datetime('2010-03-31') else ('08-09' if pd.to_datetime('2008-04-01') <= x <= pd.to_datetime('2009-03-31') else (None))))))))))))))))))

In [755]:
# Make a separate column with just the season month and day combination
sixth_merge['trade_month_day'] = pd.to_datetime(sixth_merge['trade_date']).dt.strftime('%m-%d')

In [756]:
# Create a boolean variable for if the player was traded in the off-season
sixth_merge['off-season_trade'] = np.where(sixth_merge['trade_month_day'].between('04-01', '09-30'), 1, 0)

In [757]:
sixth_merge['off-season_trade'].value_counts()

off-season_trade
0    1051
1     419
Name: count, dtype: int64

In [758]:
# Assign pre-trade year based on the trade year
sixth_merge['pre_trade_season'] = sixth_merge['post_trade_season'].apply(lambda x: '23-24' if x == '24-25' else ('22-23' if x == '23-24' else ('21-22' if x == '22-23' else ('20-21' if x == '21-22' else ('19-20' if x == '20-21' else ('18-19' if x == '19-20' else ('17-18' if x == '18-19' else ('16-17' if x == '17-18' else ('15-16' if x == '16-17' else ('14-15' if x == '15-16' else ('13-14' if x == '14-15' else ('12-13' if x == '13-14' else ('11-12' if x == '12-13' else ('10-11' if x == '11-12' else ('09-10' if x == '10-11' else ('08-09' if x == '09-10' else (None)))))))))))))))))

In [759]:
# Change data types from int32 to int64 for consistency
sixth_merge['off-season_trade'] = sixth_merge['off-season_trade'].astype('int64')

In [760]:
# filter for players traded in the off-season only
off_season_traded_players = sixth_merge[sixth_merge['off-season_trade'] == 1]

In [761]:
off_season_traded_players.shape

(419, 55)

In [762]:
# Import skater data from 08-09 season to 24-25 season
skater_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Skater_Rates.csv')

In [763]:
# Read in new dataset for team standings before the offseason in which the player was traded
team_standings = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Team_Standings.csv')

In [764]:
# Read in team level +-G data for pre trade season
PMG_team_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_P_M_G_Teams.csv')

In [765]:
# Read in after the season after the trades gar stats
after_trade_gar_stats = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_GAR_Stats.csv')

In [766]:
off_season_traded_players.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick', 'contractId', 'season', 'seasonStart', 'seasonEnd',
       'aav', 'toTeamId', 'fromTeamId', 'amountOfCapRetained', 'acquiringCap',
       'teamId_pre_trade', 'teamName_pre_trade', 'teamAbbreviation_pre_trade',
       'conferenceAbbreviation_pre_trade', 'divisionName_pre_trade',
       'teamId_post_trade', 'teamName_post_trade',
       'teamAbbreviation_post_trade', 'conferen

In [767]:
skater_data.columns

Index(['Player', 'EH_ID', 'API ID', 'Season', 'Team', 'Position', 'Shoots',
       'Birthday', 'Age', 'Draft Yr', 'Draft Rd', 'Draft Ov', 'GP', 'TOI',
       'G/60', 'A1/60', 'A2/60', 'Points/60', 'iSF/60', 'iFF/60', 'iCF/60',
       'ixG/60', 'Sh%', 'FSh%', 'xFSh%', 'iBLK/60', 'GIVE/60', 'TAKE/60',
       'iHF/60', 'iHA/60', 'iPENT2/60', 'iPEND2/60', 'iPENT5/60', 'iPEND5/60',
       'iPEN±/60', 'FOW/60', 'FOL/60', 'FO±/60'],
      dtype='object')

In [768]:
off_season_traded_players['teamAbbreviation_pre_trade'].unique()

array(['TBL', 'DET', 'PHI', 'NSH', 'VAN', 'CGY', 'FLA', 'CBJ', 'NJD',
       'MIN', 'LAK', 'OTT', 'WSH', 'BOS', 'STL', 'CHI', 'CAR', 'NYR',
       'SJS', 'BUF', 'COL', 'ANA', 'TOR', 'DAL', 'MTL', 'NYI', 'VGK',
       'PIT', 'ARI', 'ATL', 'EDM', 'WPG'], dtype=object)

In [769]:
skater_data['Team'].unique()

array(['COL', 'N.J', 'BOS', 'CGY', 'FLA', 'DET', 'DAL', 'WPG', 'CHI',
       'EDM', 'CBJ', 'NYI', 'WSH', 'ARI', 'MTL', 'CAR', 'VAN', 'NYR',
       'ANA', 'MIN', 'TOR', 'VGK', 'S.J', 'PIT', 'STL', 'T.B', 'OTT',
       'PHI', 'SEA', 'BUF', 'NSH', 'L.A', 'ATL', 'UTA'], dtype=object)

In [770]:
# Change team abbreviations to match that of the ones in the evolving hockey data
team_mapping = {
    'COL': 'COL',
    'NJD': 'N.J',
    'SJS': 'S.J',
    'TBL': 'T.B',
    'LAK': 'L.A',
    'MTL': 'MTL',
    'BOS': 'BOS',
    'BUF': 'BUF',
    'MIN': 'MIN',
    'WSH': 'WSH',
    'TOR': 'TOR',
    'OTT': 'OTT',
    'PHI': 'PHI',
    'PIT': 'PIT',
    'NYR': 'NYR',
    'NYI': 'NYI',
    'FLA': 'FLA',
    'CAR': 'CAR',
    'CHI': 'CHI',
    'CBJ': 'CBJ',
    'ARI': 'ARI',
    'VGK': 'VGK',
    'DAL': 'DAL',
    'EDM': 'EDM',
    'WPG': 'WPG',
    'ANA': 'ANA',
    'VAN': 'VAN',
    'STL': 'STL',
    'CGY': 'CGY',
    'NSH': 'NSH',
    'ARI': 'ARI',
    'ATL': 'ATL',
    'DET': 'DET'
}

off_season_traded_players['teamAbbreviation_pre_trade'] = off_season_traded_players['teamAbbreviation_pre_trade'].map(team_mapping)

# Do it again for the post trade team abbreviations
off_season_traded_players['teamAbbreviation_post_trade'] = off_season_traded_players['teamAbbreviation_post_trade'].map(team_mapping)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  off_season_traded_players['teamAbbreviation_pre_trade'] = off_season_traded_players['teamAbbreviation_pre_trade'].map(team_mapping)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  off_season_traded_players['teamAbbreviation_post_trade'] = off_season_traded_players['teamAbbreviation_post_trade'].map(team_mapping)


In [771]:
# Merge all of the hockey skater stats with the offseason players
off_season_first_merge = pd.merge(off_season_traded_players, skater_data, left_on=['nhlPlayerId','pre_trade_season','teamAbbreviation_pre_trade'], right_on=['API ID','Season','Team'], how='inner')

In [None]:
# Merge all of the hockey skater stats with the offseason players
#test1 = pd.merge(off_season_traded_players, skater_data, left_on=['nhlPlayerId','pre_trade_season','teamAbbreviation_pre_trade'], right_on=['API ID','Season','Team'], how='left')

In [None]:
#test1.shape

(419, 93)

In [None]:
#skater_data[(skater_data['API ID'] == 8477445)]

Unnamed: 0,Player,EH_ID,API ID,Season,Team,Position,Shoots,Birthday,Age,Draft Yr,...,iHF/60,iHA/60,iPENT2/60,iPEND2/60,iPENT5/60,iPEND5/60,iPEN±/60,FOW/60,FOL/60,FO±/60
5996,Hunter Shinkaruk,HUNTER.SHINKARUK,8477445,15-16,CGY,C,L,1994-10-13,20,2013,...,4.55,8.45,0.65,1.3,0.0,0.0,0.65,0.0,0.65,-0.65
5997,Hunter Shinkaruk,HUNTER.SHINKARUK,8477445,15-16,VAN,C,L,1994-10-13,20,2013,...,6.26,25.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5998,Hunter Shinkaruk,HUNTER.SHINKARUK,8477445,16-17,CGY,C,L,1994-10-13,21,2013,...,2.44,8.15,0.81,0.81,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#off_season_traded_players[off_season_traded_players['nhlPlayerId'] == 8477445]

Unnamed: 0,playerId,position,positionGeneral_x,league,leagueLevel,tradeId,regularGamesPlayed,regularGoals,regularAssists,regularPenaltyMinutes,...,teamId_post_trade,teamName_post_trade,teamAbbreviation_post_trade,conferenceAbbreviation_post_trade,divisionName_post_trade,post_trade_season,trade_month_day,off-season_trade,pre_trade_season,traded_season
25,584051,C,F,nhl,professional,510,7,0,1,2,...,434,Montréal Canadiens,MTL,E,Atlantic,18-19,08-20,1,17-18,18-19


In [None]:
#test1[test1['Team'].isna()]

Unnamed: 0,playerId,position,positionGeneral_x,league,leagueLevel,tradeId,regularGamesPlayed,regularGoals,regularAssists,regularPenaltyMinutes,...,iHF/60,iHA/60,iPENT2/60,iPEND2/60,iPENT5/60,iPEND5/60,iPEN±/60,FOW/60,FOL/60,FO±/60
5,584051,C,F,nhl,professional,510,7,0,1,2,...,,,,,,,,,,
30,48682,RD,D,nhl,professional,538,19,0,0,2,...,,,,,,,,,,
37,813211,LD,D,nhl,professional,2341,55,1,6,48,...,,,,,,,,,,
60,11247,LD,D,nhl,professional,524,76,2,8,71,...,,,,,,,,,,
68,105652,LD,D,nhl,professional,401,68,0,12,24,...,,,,,,,,,,
72,32231,RD,D,nhl,professional,1446,61,5,26,24,...,,,,,,,,,,
77,180420,RD,D,nhl,professional,1132,38,1,5,16,...,,,,,,,,,,
81,692304,RD,D,nhl,professional,1406,3,0,0,4,...,,,,,,,,,,
84,575744,LD,D,nhl,professional,1486,82,6,21,24,...,,,,,,,,,,
85,730879,LD,D,nhl,professional,2092,10,1,1,6,...,,,,,,,,,,


In [777]:
off_season_first_merge.shape

(370, 93)

In [778]:
team_standings.columns

Index(['Name', 'Team', 'Season', 'GP', 'TOI', 'W/GP', 'L/GP', 'OL/GP',
       'ROW/GP', 'Points/GP', 'Points%', 'GF/GP', 'GA/GP', 'G±/GP', 'Sh%',
       'Sv%'],
      dtype='object')

In [779]:
# Merge all of the hockey teams stats with the offseason players
off_season_second_merge = pd.merge(off_season_first_merge, team_standings, left_on=['pre_trade_season','teamAbbreviation_pre_trade'], right_on=['Season','Team'], how='inner', suffixes=('_skater','_team'))

In [780]:
off_season_traded_players[['playerId','tradeId']].value_counts()

playerId  tradeId
60638     172        2
575744    1486       2
142907    235        2
610535    61         1
610355    756        1
                    ..
110272    807        1
107968    243        1
107788    39         1
107668    286        1
864629    1315       1
Name: count, Length: 416, dtype: int64

In [781]:
off_season_second_merge.shape

(370, 109)

In [782]:
PMG_team_data.columns

Index(['Name', 'Team', 'Season', 'GP', 'TOI', 'FF/60', 'FA/60', 'xGF/FF',
       'xGA/FA', 'RF/60', 'RA/60', 'QF/60', 'QA/60', 'R±/60', 'Q±/60',
       'Sh±/60', 'Sv±/60', 'G±/60'],
      dtype='object')

In [783]:
# Merge all of the hockey teams stats with the offseason players
off_season_third_merge = pd.merge(off_season_second_merge, PMG_team_data, left_on=['pre_trade_season','teamAbbreviation_pre_trade'], right_on=['Season','Team'], how='inner', suffixes=('_standings','_PMG'))

In [784]:
off_season_third_merge.shape

(370, 127)

In [785]:
off_season_third_merge[['playerId','tradeId']].value_counts()

playerId  tradeId
11128     1043       1
548210    1135       1
606851    667        1
596486    1626       1
593280    749        1
                    ..
107788    39         1
107668    286        1
107548    224        1
107087    1146       1
846660    104        1
Name: count, Length: 370, dtype: int64

In [786]:
after_trade_gar_stats.columns

Index(['Player', 'EH_ID', 'API ID', 'Season', 'Team', 'Position', 'Shoots',
       'Birthday', 'Age', 'Draft Yr', 'Draft Rd', 'Draft Ov', 'GP', 'TOI_All',
       'TOI_EV', 'TOI_PP', 'TOI_SH', 'EVO_GAR/60', 'EVD_GAR/60', 'PPO_GAR/60',
       'SHD_GAR/60', 'Take_GAR/60', 'Draw_GAR/60', 'Off_GAR/60', 'Def_GAR/60',
       'Pens_GAR/60', 'GAR/60', 'WAR/60', 'SPAR/60'],
      dtype='object')

In [787]:
off_season_third_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes',
       ...
       'xGA/FA', 'RF/60', 'RA/60', 'QF/60', 'QA/60', 'R±/60', 'Q±/60',
       'Sh±/60', 'Sv±/60', 'G±/60'],
      dtype='object', length=127)

In [788]:
# Merge all of the hockey teams stats with the offseason players
off_season_fourth_merge = pd.merge(off_season_third_merge, after_trade_gar_stats, left_on=['nhlPlayerId','post_trade_season','teamAbbreviation_post_trade'], right_on=['API ID','Season','Team'], how='inner')


In [789]:
off_season_fourth_merge.shape

(283, 156)

In [790]:
off_season_fourth_merge[['playerId','tradeId']].value_counts()

playerId  tradeId
11186     354        1
579951    745        1
619272    589        1
617129    777        1
613296    1304       1
                    ..
110332    35         1
119040    238        1
119158    293        1
119160    1139       1
846660    104        1
Name: count, Length: 283, dtype: int64

## Now, let's format some of the data and merge all of the evolving hockey data for the pre-trade stats.

In [791]:
# Convert trade date column to date data type
sixth_merge['trade_date'] = pd.to_datetime(sixth_merge['trade_date'], format='%Y-%m-%d')

In [792]:
# Create a trade season column instead of just converting the seasons in the skater data
# This is because of a year mix up in the data pulled from the platform
# This column will encapsulate the season in which either a player was traded during or
# the season after a player was traded during the off season
sixth_merge['traded_season'] = sixth_merge['trade_date'].apply(lambda x: '24-25' if (pd.to_datetime('2024-04-01') <= x <= pd.to_datetime('2025-03-31')) else ('23-24' if pd.to_datetime('2023-04-01') <= x <= pd.to_datetime('2024-03-31') else ('22-23' if pd.to_datetime('2022-04-01') <= x <= pd.to_datetime('2023-03-31') else ('21-22' if pd.to_datetime('2021-04-01') <= x <= pd.to_datetime('2022-03-31') else ('20-21' if pd.to_datetime('2020-04-01') <= x <= pd.to_datetime('2021-03-31') else ('19-20' if pd.to_datetime('2019-04-01') <= x <= pd.to_datetime('2020-03-31') else ('18-19' if pd.to_datetime('2018-04-01') <= x <= pd.to_datetime('2019-03-31') else ('17-18' if pd.to_datetime('2017-04-01') <= x <= pd.to_datetime('2018-03-31') else ('16-17' if pd.to_datetime('2016-04-01') <= x <= pd.to_datetime('2017-03-31') else ('15-16' if pd.to_datetime('2015-04-01') <= x <= pd.to_datetime('2016-03-31') else ('14-15' if pd.to_datetime('2014-04-01') <= x <= pd.to_datetime('2015-03-31') else ('13-14' if pd.to_datetime('2013-04-01') <= x <= pd.to_datetime('2014-03-31') else ('12-13' if pd.to_datetime('2012-04-01') <= x <= pd.to_datetime('2013-03-31') else ('11-12' if pd.to_datetime('2011-04-01') <= x <= pd.to_datetime('2012-03-31') else ('10-11' if pd.to_datetime('2010-04-01') <= x <= pd.to_datetime('2011-03-31') else ('09-10' if pd.to_datetime('2009-04-01') <= x <= pd.to_datetime('2010-03-31') else ('08-09' if pd.to_datetime('2008-04-01') <= x <= pd.to_datetime('2009-03-31') else (None))))))))))))))))))

In [793]:
# Make a separate column with just the season month and day combination
sixth_merge['trade_month_day'] = pd.to_datetime(sixth_merge['trade_date']).dt.strftime('%m-%d')

In [794]:
# Create a boolean variable for if the player was traded in the off-season
sixth_merge['off-season_trade'] = np.where(sixth_merge['trade_month_day'].between('04-01', '09-30'), 1, 0)

In [795]:
sixth_merge['off-season_trade'].value_counts()

off-season_trade
0    1051
1     419
Name: count, dtype: int64

In [796]:
# Assign pre-trade year based on the trade year
sixth_merge['pre_trade_season'] = sixth_merge['traded_season'].apply(lambda x: '23-24' if x == '24-25' else ('22-23' if x == '23-24' else ('21-22' if x == '22-23' else ('20-21' if x == '21-22' else ('19-20' if x == '20-21' else ('18-19' if x == '19-20' else ('17-18' if x == '18-19' else ('16-17' if x == '17-18' else ('15-16' if x == '16-17' else ('14-15' if x == '15-16' else ('13-14' if x == '14-15' else ('12-13' if x == '13-14' else ('11-12' if x == '12-13' else ('10-11' if x == '11-12' else ('09-10' if x == '10-11' else ('08-09' if x == '09-10' else (None)))))))))))))))))

In [797]:
# Change data types from int32 to int64 for consistency
sixth_merge['off-season_trade'] = sixth_merge['off-season_trade'].astype('int64')

In [798]:
# filter for players traded in the off-season only
off_season_traded_players = sixth_merge[sixth_merge['off-season_trade'] == 1]

In [799]:
off_season_traded_players.shape

(419, 56)

In [800]:
# Import skater data from 08-09 season to 24-25 season
skater_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Skater_Rates.csv')

In [801]:
skater_data.columns

Index(['Player', 'EH_ID', 'API ID', 'Season', 'Team', 'Position', 'Shoots',
       'Birthday', 'Age', 'Draft Yr', 'Draft Rd', 'Draft Ov', 'GP', 'TOI',
       'G/60', 'A1/60', 'A2/60', 'Points/60', 'iSF/60', 'iFF/60', 'iCF/60',
       'ixG/60', 'Sh%', 'FSh%', 'xFSh%', 'iBLK/60', 'GIVE/60', 'TAKE/60',
       'iHF/60', 'iHA/60', 'iPENT2/60', 'iPEND2/60', 'iPENT5/60', 'iPEND5/60',
       'iPEN±/60', 'FOW/60', 'FOL/60', 'FO±/60'],
      dtype='object')

In [802]:
sixth_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick', 'contractId', 'season', 'seasonStart', 'seasonEnd',
       'aav', 'toTeamId', 'fromTeamId', 'amountOfCapRetained', 'acquiringCap',
       'teamId_pre_trade', 'teamName_pre_trade', 'teamAbbreviation_pre_trade',
       'conferenceAbbreviation_pre_trade', 'divisionName_pre_trade',
       'teamId_post_trade', 'teamName_post_trade',
       'teamAbbreviation_post_trade', 'conferen

In [803]:
# Assign pre-trade stats based on the trade year
pre_season_stats_merged = pd.merge(off_season_traded_players, skater_data, left_on=['nhlPlayerId','pre_trade_season','draftYear','draftRound'], right_on=['API ID','Season','Draft Yr','Draft Rd'], how='inner')

In [804]:
# Read in new dataset for team standings before the offseason in which the player was traded
team_standings = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Team_Standings.csv')

In [805]:
# Assign pre-trade stats from players old team based on the trade year
all_pre_season_stats = pd.merge(pre_season_stats_merged, team_standings, left_on=['Team','pre_trade_season'], right_on=['Team','Season'], how='inner',suffixes=('indv','team'))

In [806]:
# Read in team level +-G data for pre trade season
PMG_team_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_P_M_G_Teams.csv')

In [807]:
# Assign pre-trade stats from players old team based on the trade year
all_pre_season_stats = pd.merge(all_pre_season_stats, PMG_team_data, left_on=['Team','pre_trade_season'], right_on=['Team','Season'], how='inner',suffixes=('_1','_2'))

In [808]:
all_pre_season_stats.shape

(384, 126)

In [809]:
# Remove all duplicate columns from previous merges that end in _y
'''all_pre_season_stats.drop(columns=['positionGeneral_y','Player_y', 'EH_ID_y', 'API ID_y',
       'Season_y', 'Position_y', 'Shoots_y', 'Birthday_y', 'Age_y',
       'Draft Yr_y', 'Draft Rd_y', 'Draft Ov_y', 'GP_y', 'TOI_y'], inplace=True)'''

"all_pre_season_stats.drop(columns=['positionGeneral_y','Player_y', 'EH_ID_y', 'API ID_y',\n       'Season_y', 'Position_y', 'Shoots_y', 'Birthday_y', 'Age_y',\n       'Draft Yr_y', 'Draft Rd_y', 'Draft Ov_y', 'GP_y', 'TOI_y'], inplace=True)"

In [810]:
# Get rid of all _x suffixes from the columns
# all_pre_season_stats.columns = [col.replace('_x','') for col in all_pre_season_stats.columns]

In [811]:
all_pre_season_stats.shape

(384, 126)

In [812]:
# Read in after the season after the trades gar stats
after_trade_gar_stats = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_GAR_Stats.csv')

In [813]:
after_trade_gar_stats.columns

Index(['Player', 'EH_ID', 'API ID', 'Season', 'Team', 'Position', 'Shoots',
       'Birthday', 'Age', 'Draft Yr', 'Draft Rd', 'Draft Ov', 'GP', 'TOI_All',
       'TOI_EV', 'TOI_PP', 'TOI_SH', 'EVO_GAR/60', 'EVD_GAR/60', 'PPO_GAR/60',
       'SHD_GAR/60', 'Take_GAR/60', 'Draw_GAR/60', 'Off_GAR/60', 'Def_GAR/60',
       'Pens_GAR/60', 'GAR/60', 'WAR/60', 'SPAR/60'],
      dtype='object')

In [814]:
# merge the pre season stats with the after trade gar stats
final_data = pd.merge(all_pre_season_stats, after_trade_gar_stats, left_on=['nhlPlayerId','traded_season','EH_ID'], right_on=['API ID','Season','EH_ID'], how='inner')

In [815]:
final_data.shape

(335, 154)

In [816]:
final_data[['nhlPlayerId','tradeId','traded_season']].value_counts()

nhlPlayerId  tradeId  traded_season
8475283      867      13-14            4
8477406      235      21-22            4
8475233      172      21-22            4
8467925      749      13-14            3
8474688      322      21-22            3
                                      ..
8474001      1931     13-14            1
8474037      1246     12-13            1
8474038      870      11-12            1
8474056      423      19-20            1
8482125      2340     24-25            1
Name: count, Length: 290, dtype: int64

In [817]:
final_data.drop_duplicates(inplace=True)

In [818]:
final_data[(final_data['nhlPlayerId'] == 8475283) & (final_data['tradeId'] == 867)]

Unnamed: 0,playerId,position,positionGeneral_x,league,leagueLevel,tradeId,regularGamesPlayed,regularGoals,regularAssists,regularPenaltyMinutes,...,PPO_GAR/60,SHD_GAR/60,Take_GAR/60,Draw_GAR/60,Off_GAR/60,Def_GAR/60,Pens_GAR/60,GAR/60,WAR/60,SPAR/60
314,634157,LW,F,nhl,professional,867,9,2,0,19,...,0.0,0.0,0.149,-0.138,0.105,-0.23,0.011,-0.113,-0.021,-0.04
315,634157,LW,F,nhl,professional,867,9,2,0,19,...,-0.21,0.0,0.149,-0.138,0.706,0.605,0.011,1.321,0.25,0.468
316,634157,LW,F,nhl,professional,867,9,2,0,19,...,0.0,0.0,0.149,-0.138,0.105,-0.23,0.011,-0.113,-0.021,-0.04
317,634157,LW,F,nhl,professional,867,9,2,0,19,...,-0.21,0.0,0.149,-0.138,0.706,0.605,0.011,1.321,0.25,0.468


In [819]:
final_data[(final_data['nhlPlayerId'] == 8467925) & (final_data['tradeId'] == 749)]

Unnamed: 0,playerId,position,positionGeneral_x,league,leagueLevel,tradeId,regularGamesPlayed,regularGoals,regularAssists,regularPenaltyMinutes,...,PPO_GAR/60,SHD_GAR/60,Take_GAR/60,Draw_GAR/60,Off_GAR/60,Def_GAR/60,Pens_GAR/60,GAR/60,WAR/60,SPAR/60
235,740013,RW,F,nhl,professional,749,37,0,4,23,...,-0.114,-0.271,0.012,-0.044,-0.109,0.004,-0.032,-0.104,-0.02,-0.037
236,740013,RW,F,nhl,professional,749,37,0,4,23,...,-0.114,-0.271,0.012,-0.044,-0.109,0.004,-0.032,-0.104,-0.02,-0.037
237,740013,RW,F,nhl,professional,749,37,0,4,23,...,-0.114,-0.271,0.012,-0.044,-0.109,0.004,-0.032,-0.104,-0.02,-0.037


In [820]:
final_data[(final_data['nhlPlayerId'] == 8477986) & (final_data['tradeId'] == 271)]

Unnamed: 0,playerId,position,positionGeneral_x,league,leagueLevel,tradeId,regularGamesPlayed,regularGoals,regularAssists,regularPenaltyMinutes,...,PPO_GAR/60,SHD_GAR/60,Take_GAR/60,Draw_GAR/60,Off_GAR/60,Def_GAR/60,Pens_GAR/60,GAR/60,WAR/60,SPAR/60
260,126984,RD,D,nhl,professional,271,50,7,11,40,...,1.133,0.167,-0.03,0.017,0.307,-0.079,-0.014,0.208,0.037,0.069
261,126984,RD,D,nhl,professional,271,50,7,11,40,...,1.133,0.167,-0.03,0.017,0.307,-0.079,-0.014,0.208,0.037,0.069


In [821]:
third_merge[(third_merge['nhlPlayerId'] == 8477986) & (third_merge['tradeId'] == 271)]

Unnamed: 0,playerId,position,positionGeneral_x,league,leagueLevel,tradeId,regularGamesPlayed,regularGoals,regularAssists,regularPenaltyMinutes,...,trade_date,positionGeneral_y,height_cm,weight_kg,handedness,dateOfBirth,birthCountry,draftYear,draftRound,draftOverallPick
1109,126984,RD,D,nhl,professional,271,50,7,11,40,...,2021-04-10,D,182.88,90.0,R,1994-04-11,Canada,2014.0,2.0,55.0
