In season: 10-01 through 03-31
Off season: 04-01 through 09-30

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Import the map to nhl player id data
nhl_id_mapping = pd.read_csv(r'../Data/map_player_nhl.csv')

In [None]:
# Import the trade dates data
trade_dates = pd.read_csv(r'../Data/trade_dates.csv')

In [None]:
# Import the traded players stats from the season before being traded.
traded_players_pre_season = pd.read_csv(r'../Data/traded_skater_platform.csv')

In [None]:
# Import the gen info data
gen_info = pd.read_csv(r'../Data/traded_player_general_info.csv')

In [None]:
# Merge trade dates and players data
first_merge = pd.merge(traded_players_pre_season, nhl_id_mapping, on='playerId')

In [None]:
# Merge last result with trade dates data
second_merge = pd.merge(first_merge, trade_dates, on='tradeId', how='inner')

In [None]:
# Merge last result with the general player info data
third_merge = pd.merge(second_merge, gen_info, on=['playerId','position'], how='inner')

In [None]:
# Pull the year out of the trade date
third_merge['trade_year'] = pd.to_datetime(third_merge['trade_date']).dt.year

In [None]:
# Convert trade date column to date data type
third_merge['trade_date'] = pd.to_datetime(third_merge['trade_date'], format='%Y-%m-%d')

In [None]:
# Create a trade season column instead of just converting the seasons in the skater data
# This is because of a year mix up in the data pulled from the platform
# This column will encapsulate the season in which either a player was traded during or
# the season after a player was traded during the off season
third_merge['traded_season'] = third_merge['trade_date'].apply(lambda x: '24-25' if (pd.to_datetime('2024-04-01') <= x <= pd.to_datetime('2025-03-31')) else ('23-24' if pd.to_datetime('2023-04-01') <= x <= pd.to_datetime('2024-03-31') else ('22-23' if pd.to_datetime('2022-04-01') <= x <= pd.to_datetime('2023-03-31') else ('21-22' if pd.to_datetime('2021-04-01') <= x <= pd.to_datetime('2022-03-31') else ('20-21' if pd.to_datetime('2020-04-01') <= x <= pd.to_datetime('2021-03-31') else ('19-20' if pd.to_datetime('2019-04-01') <= x <= pd.to_datetime('2020-03-31') else ('18-19' if pd.to_datetime('2018-04-01') <= x <= pd.to_datetime('2019-03-31') else ('17-18' if pd.to_datetime('2017-04-01') <= x <= pd.to_datetime('2018-03-31') else ('16-17' if pd.to_datetime('2016-04-01') <= x <= pd.to_datetime('2017-03-31') else ('15-16' if pd.to_datetime('2015-04-01') <= x <= pd.to_datetime('2016-03-31') else ('14-15' if pd.to_datetime('2014-04-01') <= x <= pd.to_datetime('2015-03-31') else ('13-14' if pd.to_datetime('2013-04-01') <= x <= pd.to_datetime('2014-03-31') else ('12-13' if pd.to_datetime('2012-04-01') <= x <= pd.to_datetime('2013-03-31') else ('11-12' if pd.to_datetime('2011-04-01') <= x <= pd.to_datetime('2012-03-31') else ('10-11' if pd.to_datetime('2010-04-01') <= x <= pd.to_datetime('2011-03-31') else ('09-10' if pd.to_datetime('2009-04-01') <= x <= pd.to_datetime('2010-03-31') else ('08-09' if pd.to_datetime('2008-04-01') <= x <= pd.to_datetime('2009-03-31') else (None))))))))))))))))))

In [None]:
# Make a separate column with just the season month and day combination
third_merge['trade_month_day'] = pd.to_datetime(third_merge['trade_date']).dt.strftime('%m-%d')

In [None]:
# Create a boolean variable for if the player was traded in the off-season
third_merge['off-season_trade'] = np.where(third_merge['trade_month_day'].between('04-01', '09-30'), 1, 0)

In [None]:
third_merge['off-season_trade'].value_counts()

In [None]:
# Import skater data from 08-09 season to 24-25 season
skater_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Skater_Rates.csv')

In [None]:
# Assign pre-trade year based on the trade year
third_merge['pre_trade_season'] = third_merge['traded_season'].apply(lambda x: '23-24' if x == '24-25' else ('22-23' if x == '23-24' else ('21-22' if x == '22-23' else ('20-21' if x == '21-22' else ('19-20' if x == '20-21' else ('18-19' if x == '19-20' else ('17-18' if x == '18-19' else ('16-17' if x == '17-18' else ('15-16' if x == '16-17' else ('14-15' if x == '15-16' else ('13-14' if x == '14-15' else ('12-13' if x == '13-14' else ('11-12' if x == '12-13' else ('10-11' if x == '11-12' else ('09-10' if x == '10-11' else ('08-09' if x == '09-10' else (None)))))))))))))))))

In [None]:
# Change data types from int32 to int64 for consistency
third_merge['off-season_trade'] = third_merge['off-season_trade'].astype('int64')

In [None]:
# Assign pre-trade stats based on the trade year
pre_season_stats_merged = pd.merge(third_merge, skater_data, left_on=['nhlPlayerId','pre_trade_season'], right_on=['API ID','Season'], how='inner')

In [None]:
# Read in new dataset
RAPM_skater_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Skater_RAPM_Rates.csv')

In [None]:
# Assign pre-trade stats based on the trade year
all_pre_season_stats = pd.merge(pre_season_stats_merged, RAPM_skater_data, left_on=['nhlPlayerId','pre_trade_season','Team'], right_on=['API ID','Season','Team'], how='inner')

In [None]:
# Remove all duplicate columns from previous merges that end in _y
all_pre_season_stats.drop(columns=['positionGeneral_y','Player_y', 'EH_ID_y', 'API ID_y',
       'Season_y', 'Position_y', 'Shoots_y', 'Birthday_y', 'Age_y',
       'Draft Yr_y', 'Draft Rd_y', 'Draft Ov_y', 'GP_y', 'TOI_y'], inplace=True)

In [None]:
# Get rid of all _x suffixes from the columns
all_pre_season_stats.columns = [col.replace('_x','') for col in all_pre_season_stats.columns]