In season: 10-01 through 03-31
Off season: 04-01 through 09-30

In [291]:
import pandas as pd
import numpy as np

In [292]:
# Import the map to nhl player id data
nhl_id_mapping = pd.read_csv(r'../Data/map_player_nhl.csv')

In [293]:
# Import the trade dates data
trade_dates = pd.read_csv(r'../Data/trade_dates.csv')

In [294]:
# Import the traded players stats from the season before being traded.
traded_players_pre_season = pd.read_csv(r'../Data/traded_skater_platform.csv')

In [295]:
# Import the gen info data
gen_info = pd.read_csv(r'../Data/traded_player_general_info.csv')

In [296]:
# Merge trade dates and players data
first_merge = pd.merge(traded_players_pre_season, nhl_id_mapping, on='playerId')

In [297]:
# Merge last result with trade dates data
second_merge = pd.merge(first_merge, trade_dates, on='tradeId', how='inner')

In [298]:
# Merge last result with the general player info data
third_merge = pd.merge(second_merge, gen_info, on=['playerId','position'], how='inner')

Now, let's format some of the data and merge all of the evolving hockey data for the pre-trade stats.

In [299]:
# Convert trade date column to date data type
third_merge['trade_date'] = pd.to_datetime(third_merge['trade_date'], format='%Y-%m-%d')

In [300]:
# Create a trade season column instead of just converting the seasons in the skater data
# This is because of a year mix up in the data pulled from the platform
# This column will encapsulate the season in which either a player was traded during or
# the season after a player was traded during the off season
third_merge['traded_season'] = third_merge['trade_date'].apply(lambda x: '24-25' if (pd.to_datetime('2024-04-01') <= x <= pd.to_datetime('2025-03-31')) else ('23-24' if pd.to_datetime('2023-04-01') <= x <= pd.to_datetime('2024-03-31') else ('22-23' if pd.to_datetime('2022-04-01') <= x <= pd.to_datetime('2023-03-31') else ('21-22' if pd.to_datetime('2021-04-01') <= x <= pd.to_datetime('2022-03-31') else ('20-21' if pd.to_datetime('2020-04-01') <= x <= pd.to_datetime('2021-03-31') else ('19-20' if pd.to_datetime('2019-04-01') <= x <= pd.to_datetime('2020-03-31') else ('18-19' if pd.to_datetime('2018-04-01') <= x <= pd.to_datetime('2019-03-31') else ('17-18' if pd.to_datetime('2017-04-01') <= x <= pd.to_datetime('2018-03-31') else ('16-17' if pd.to_datetime('2016-04-01') <= x <= pd.to_datetime('2017-03-31') else ('15-16' if pd.to_datetime('2015-04-01') <= x <= pd.to_datetime('2016-03-31') else ('14-15' if pd.to_datetime('2014-04-01') <= x <= pd.to_datetime('2015-03-31') else ('13-14' if pd.to_datetime('2013-04-01') <= x <= pd.to_datetime('2014-03-31') else ('12-13' if pd.to_datetime('2012-04-01') <= x <= pd.to_datetime('2013-03-31') else ('11-12' if pd.to_datetime('2011-04-01') <= x <= pd.to_datetime('2012-03-31') else ('10-11' if pd.to_datetime('2010-04-01') <= x <= pd.to_datetime('2011-03-31') else ('09-10' if pd.to_datetime('2009-04-01') <= x <= pd.to_datetime('2010-03-31') else ('08-09' if pd.to_datetime('2008-04-01') <= x <= pd.to_datetime('2009-03-31') else (None))))))))))))))))))

In [301]:
# Make a separate column with just the season month and day combination
third_merge['trade_month_day'] = pd.to_datetime(third_merge['trade_date']).dt.strftime('%m-%d')

In [302]:
# Create a boolean variable for if the player was traded in the off-season
third_merge['off-season_trade'] = np.where(third_merge['trade_month_day'].between('04-01', '09-30'), 1, 0)

In [303]:
third_merge['off-season_trade'].value_counts()

off-season_trade
0    1058
1     430
Name: count, dtype: int64

In [304]:
# Assign pre-trade year based on the trade year
third_merge['pre_trade_season'] = third_merge['traded_season'].apply(lambda x: '23-24' if x == '24-25' else ('22-23' if x == '23-24' else ('21-22' if x == '22-23' else ('20-21' if x == '21-22' else ('19-20' if x == '20-21' else ('18-19' if x == '19-20' else ('17-18' if x == '18-19' else ('16-17' if x == '17-18' else ('15-16' if x == '16-17' else ('14-15' if x == '15-16' else ('13-14' if x == '14-15' else ('12-13' if x == '13-14' else ('11-12' if x == '12-13' else ('10-11' if x == '11-12' else ('09-10' if x == '10-11' else ('08-09' if x == '09-10' else (None)))))))))))))))))

In [305]:
# Change data types from int32 to int64 for consistency
third_merge['off-season_trade'] = third_merge['off-season_trade'].astype('int64')

In [306]:
# filter for players traded in the off-season only
off_season_traded_players = third_merge[third_merge['off-season_trade'] == 1]

In [307]:
off_season_traded_players.shape

(430, 36)

In [308]:
# Import skater data from 08-09 season to 24-25 season
skater_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Skater_Rates.csv')

In [309]:
# Assign pre-trade stats based on the trade year
pre_season_stats_merged = pd.merge(off_season_traded_players, skater_data, left_on=['nhlPlayerId','pre_trade_season'], right_on=['API ID','Season'], how='inner')

In [310]:
# Read in new dataset for team standings before the offseason in which the player was traded
team_standings = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Team_Standings.csv')

In [311]:
# Assign pre-trade stats from players old team based on the trade year
all_pre_season_stats = pd.merge(pre_season_stats_merged, team_standings, left_on=['Team','pre_trade_season'], right_on=['Team','Season'], how='inner',suffixes=('indv','team'))

In [312]:
# Read in team level +-G data for pre trade season
PMG_team_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_P_M_G_Teams.csv')

In [313]:
# Assign pre-trade stats from players old team based on the trade year
all_pre_season_stats = pd.merge(all_pre_season_stats, PMG_team_data, left_on=['Team','pre_trade_season'], right_on=['Team','Season'], how='inner',suffixes=('_1','_2'))

In [314]:
all_pre_season_stats.shape

(451, 106)

In [315]:
# Remove all duplicate columns from previous merges that end in _y
'''all_pre_season_stats.drop(columns=['positionGeneral_y','Player_y', 'EH_ID_y', 'API ID_y',
       'Season_y', 'Position_y', 'Shoots_y', 'Birthday_y', 'Age_y',
       'Draft Yr_y', 'Draft Rd_y', 'Draft Ov_y', 'GP_y', 'TOI_y'], inplace=True)'''

"all_pre_season_stats.drop(columns=['positionGeneral_y','Player_y', 'EH_ID_y', 'API ID_y',\n       'Season_y', 'Position_y', 'Shoots_y', 'Birthday_y', 'Age_y',\n       'Draft Yr_y', 'Draft Rd_y', 'Draft Ov_y', 'GP_y', 'TOI_y'], inplace=True)"

In [316]:
# Get rid of all _x suffixes from the columns
# all_pre_season_stats.columns = [col.replace('_x','') for col in all_pre_season_stats.columns]

In [317]:
all_pre_season_stats.shape

(451, 106)

In [318]:
# Read in after the season after the trades gar stats
after_trade_gar_stats = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_GAR_Stats.csv')

In [319]:
after_trade_gar_stats.columns

Index(['Player', 'EH_ID', 'API ID', 'Season', 'Team', 'Position', 'Shoots',
       'Birthday', 'Age', 'Draft Yr', 'Draft Rd', 'Draft Ov', 'GP', 'TOI_All',
       'TOI_EV', 'TOI_PP', 'TOI_SH', 'EVO_GAR/60', 'EVD_GAR/60', 'PPO_GAR/60',
       'SHD_GAR/60', 'Take_GAR/60', 'Draw_GAR/60', 'Off_GAR/60', 'Def_GAR/60',
       'Pens_GAR/60', 'GAR/60', 'WAR/60', 'SPAR/60'],
      dtype='object')

In [320]:
# merge the pre season stats with the after trade gar stats
final_data = pd.merge(all_pre_season_stats, after_trade_gar_stats, left_on=['nhlPlayerId','traded_season'], right_on=['API ID','Season'], how='inner')

In [321]:
final_data.shape

(390, 135)

In [324]:
final_data[['nhlPlayerId','tradeId','traded_season']].value_counts()

nhlPlayerId  tradeId  traded_season
8475283      867      13-14            4
8474688      322      21-22            3
8467925      749      13-14            3
8475690      4511     24-25            2
8476994      210      21-22            2
                                      ..
8473571      1075     13-14            1
8473548      613      19-20            1
8473534      354      18-19            1
8473533      724      12-13            1
8483512      3229     24-25            1
Name: count, Length: 341, dtype: int64

In [326]:
final_data.drop_duplicates(inplace=True)

In [327]:
final_data[(final_data['nhlPlayerId'] == 8475283) & (final_data['tradeId'] == 867)]

Unnamed: 0,playerId,position,positionGeneral_x,league,leagueLevel,tradeId,regularGamesPlayed,regularGoals,regularAssists,regularPenaltyMinutes,...,PPO_GAR/60,SHD_GAR/60,Take_GAR/60,Draw_GAR/60,Off_GAR/60,Def_GAR/60,Pens_GAR/60,GAR/60,WAR/60,SPAR/60
366,634157,LW,F,nhl,professional,867,9,2,0,19,...,0.0,0.0,0.149,-0.138,0.105,-0.23,0.011,-0.113,-0.021,-0.04
367,634157,LW,F,nhl,professional,867,9,2,0,19,...,-0.21,0.0,0.149,-0.138,0.706,0.605,0.011,1.321,0.25,0.468
368,634157,LW,F,nhl,professional,867,9,2,0,19,...,0.0,0.0,0.149,-0.138,0.105,-0.23,0.011,-0.113,-0.021,-0.04
369,634157,LW,F,nhl,professional,867,9,2,0,19,...,-0.21,0.0,0.149,-0.138,0.706,0.605,0.011,1.321,0.25,0.468
