In [53]:
# Import libraries
import pandas as pd
import numpy as np
import datetime as dt

In [54]:
# Import the map to nhl player id data
nhl_id_mapping = pd.read_csv(r'../Data/map_player_nhl.csv')

In [55]:
# Import the trade dates data
trade_dates = pd.read_csv(r'../Data/trade_dates.csv')

In [56]:
# Import the traded players stats from the season before being traded.
traded_players_pre_season = pd.read_csv(r'../Data/traded_skater_platform.csv')

In [57]:
# Import the gen info data
gen_info = pd.read_csv(r'../Data/traded_player_general_info.csv')

In [58]:
nhl_id_mapping.columns

Index(['playerId', 'nhlPlayerId'], dtype='object')

In [59]:
# Merge trade dates and players data
first_merge = pd.merge(traded_players_pre_season, nhl_id_mapping, on='playerId')

In [60]:
# Merge last result with trade dates data
second_merge = pd.merge(first_merge, trade_dates, on='tradeId', how='inner')

In [61]:
# Merge last result with the general player info data
third_merge = pd.merge(second_merge, gen_info, on=['playerId','position'], how='inner')

In [62]:
third_merge.shape

(1488, 32)

In [63]:
third_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick'],
      dtype='object')

In [64]:
# Pull the year out of the trade date
third_merge['trade_year'] = pd.to_datetime(third_merge['trade_date']).dt.year

In [65]:
third_merge['trade_year'].value_counts()

trade_year
2023    144
2024    118
2011    102
2025     99
2013     95
2017     93
2018     93
2019     92
2022     92
2010     92
2021     91
2016     85
2020     80
2015     78
2014     66
2012     56
2009     12
Name: count, dtype: int64

In [66]:
# Make a separate column with just the season month and day combination
third_merge['trade_month_day'] = pd.to_datetime(third_merge['trade_date']).dt.strftime('%m-%d')

In [67]:
# Create a boolean variable for if the player was traded in the off-season
third_merge['off-season_trade'] = np.where(third_merge['trade_month_day'].between('04-01', '10-06'), 1, 0)

In [68]:
third_merge['off-season_trade'].value_counts()

off-season_trade
0    1038
1     450
Name: count, dtype: int64

In [69]:
# Import skater data from 08-09 season to 24-25 season
skater_data = pd.read_csv(r'../Evolving-Hockey/skater_data_08_24.csv')

In [70]:
third_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick', 'trade_year', 'trade_month_day',
       'off-season_trade'],
      dtype='object')

In [71]:
# Transform years; For example 22-23 -> 2022
skater_data['Season'] = skater_data['Season'].apply(lambda x: 2024 if x == 24-25 else (2023 if x == '23-24' else (2022 if x == '22-23' else (2021 if x == '21-22' else (2020 if x == '20-21' else (2019 if x == 19-20 else (2018 if x == '18-19' else (2017 if x == 17-18 else (2016 if x == '16-17' else (2015 if x == '15-16' else (2014 if x == '14-15' else (2013 if x == '13-14' else (2012 if x == '12-13' else (2011 if x == '11-12' else (2010 if x == 10-11 else (2009 if x == '09-10' else ('2008' if x == '08-09' else (None))))))))))))))))))

In [72]:
skater_data.columns

Index(['Player', 'EH_ID', 'API ID', 'Season', 'Team', 'Position', 'Shoots',
       'Birthday', 'Age', 'Draft Yr', 'Draft Rd', 'Draft Ov', 'GP', 'TOI', 'G',
       'A1', 'A2', 'Points', 'iSF', 'iFF', 'iCF', 'ixG', 'Sh%', 'FSh%',
       'xFSh%', 'iBLK', 'GIVE', 'TAKE', 'iHF', 'iHA', 'iPENT2', 'iPEND2',
       'iPENT5', 'iPEND5', 'iPEN±', 'FOW', 'FOL', 'FO±'],
      dtype='object')

In [73]:
third_merge.shape

(1488, 35)

In [74]:
# Assign pre-trade year based on the trade year
third_merge['pre_trade_year'] = third_merge['trade_year'] - 1

In [76]:
third_merge.shape

(1488, 36)

In [77]:
# Assign pre-trade stats based on the trade year
pre_season_stats_merged = pd.merge(third_merge, skater_data, left_on=['nhlPlayerId','pre_trade_year'], right_on=['API ID','Season'], how='inner')

In [78]:
pre_season_stats_merged.shape

(1412, 74)

In [None]:
# Assign pre-trade stats based on the trade year
pre_season_stats_merged = pd.merge(third_merge, skater_data, left_on=['nhlPlayerId','trade_year'], right_on=['API ID','Season'], how='inner')