In [1072]:
# Import libraries
import pandas as pd
import numpy as np
import datetime as dt

In [1073]:
# Import the map to nhl player id data
nhl_id_mapping = pd.read_csv(r'../Data/map_player_nhl.csv')

In [1074]:
# Import the trade dates data
trade_dates = pd.read_csv(r'../Data/trade_dates.csv')

In [1075]:
# Import the traded players stats from the season before being traded.
traded_players_pre_season = pd.read_csv(r'../Data/traded_skater_platform.csv')

In [1076]:
# Import the gen info data
gen_info = pd.read_csv(r'../Data/traded_player_general_info.csv')

In [1077]:
nhl_id_mapping.columns

Index(['playerId', 'nhlPlayerId'], dtype='object')

In [1078]:
# Merge trade dates and players data
first_merge = pd.merge(traded_players_pre_season, nhl_id_mapping, on='playerId')

In [1079]:
# Merge last result with trade dates data
second_merge = pd.merge(first_merge, trade_dates, on='tradeId', how='inner')

In [1080]:
# Merge last result with the general player info data
third_merge = pd.merge(second_merge, gen_info, on=['playerId','position'], how='inner')

In [1081]:
third_merge.shape

(1488, 32)

In [1082]:
third_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick'],
      dtype='object')

In [1083]:
# Pull the year out of the trade date
third_merge['trade_year'] = pd.to_datetime(third_merge['trade_date']).dt.year

In [1084]:
third_merge['trade_year'].value_counts()

trade_year
2023    144
2024    118
2011    102
2025     99
2013     95
2017     93
2018     93
2019     92
2022     92
2010     92
2021     91
2016     85
2020     80
2015     78
2014     66
2012     56
2009     12
Name: count, dtype: int64

In [1085]:
# Make a separate column with just the season month and day combination
third_merge['trade_month_day'] = pd.to_datetime(third_merge['trade_date']).dt.strftime('%m-%d')

In [1086]:
# Create a boolean variable for if the player was traded in the off-season
third_merge['off-season_trade'] = np.where(third_merge['trade_month_day'].between('04-01', '10-06'), 1, 0)

In [1087]:
third_merge['off-season_trade'].value_counts()

off-season_trade
0    1038
1     450
Name: count, dtype: int64

In [1088]:
# Import skater data from 08-09 season to 24-25 season
skater_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Skater_Rates.csv')

In [1089]:
third_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick', 'trade_year', 'trade_month_day',
       'off-season_trade'],
      dtype='object')

In [1090]:
# Transform years; For example 22-23 -> 2022
skater_data['Season'] = skater_data['Season'].apply(lambda x: 2024 if x == '24-25' else (2023 if x == '23-24' else (2022 if x == '22-23' else (2021 if x == '21-22' else (2020 if x == '20-21' else (2019 if x == '19-20' else (2018 if x == '18-19' else (2017 if x == '17-18' else (2016 if x == '16-17' else (2015 if x == '15-16' else (2014 if x == '14-15' else (2013 if x == '13-14' else (2012 if x == '12-13' else (2011 if x == '11-12' else (2010 if x == '10-11' else (2009 if x == '09-10' else (2008 if x == '08-09' else (None))))))))))))))))))

In [1091]:
skater_data['Season'].isna().sum()

0

In [1092]:
skater_data.columns

Index(['Player', 'EH_ID', 'API ID', 'Season', 'Team', 'Position', 'Shoots',
       'Birthday', 'Age', 'Draft Yr', 'Draft Rd', 'Draft Ov', 'GP', 'TOI',
       'G/60', 'A1/60', 'A2/60', 'Points/60', 'iSF/60', 'iFF/60', 'iCF/60',
       'ixG/60', 'Sh%', 'FSh%', 'xFSh%', 'iBLK/60', 'GIVE/60', 'TAKE/60',
       'iHF/60', 'iHA/60', 'iPENT2/60', 'iPEND2/60', 'iPENT5/60', 'iPEND5/60',
       'iPEN±/60', 'FOW/60', 'FOL/60', 'FO±/60'],
      dtype='object')

In [1093]:
third_merge.shape

(1488, 35)

In [1094]:
# Assign pre-trade year based on the trade year
third_merge['pre_trade_year'] = third_merge['trade_year'] - 1

In [1095]:
third_merge.shape

(1488, 36)

In [1096]:
third_merge.dtypes

playerId                      int64
position                     object
positionGeneral_x            object
league                       object
leagueLevel                  object
tradeId                       int64
regularGamesPlayed            int64
regularGoals                  int64
regularAssists                int64
regularPenaltyMinutes         int64
postseasonGamesPlayed       float64
postseasonGoals             float64
postseasonAssists           float64
postseasonPenaltyMinutes    float64
regular_hits                float64
regular_blockedShots        float64
regular_powerplayGoals      float64
regular_shots               float64
regular_atoi                float64
regular_pp_atoi             float64
regular_pk_atoi             float64
nhlPlayerId                   int64
trade_date                   object
positionGeneral_y            object
height_cm                   float64
weight_kg                   float64
handedness                   object
dateOfBirth                 

In [1097]:
third_merge['trade_year'] =third_merge['trade_year'].astype('int64')

third_merge['off-season_trade'] = third_merge['off-season_trade'].astype('int64')

third_merge['pre_trade_year'] = third_merge['pre_trade_year'].astype('int64')

In [1098]:
third_merge.dtypes

playerId                      int64
position                     object
positionGeneral_x            object
league                       object
leagueLevel                  object
tradeId                       int64
regularGamesPlayed            int64
regularGoals                  int64
regularAssists                int64
regularPenaltyMinutes         int64
postseasonGamesPlayed       float64
postseasonGoals             float64
postseasonAssists           float64
postseasonPenaltyMinutes    float64
regular_hits                float64
regular_blockedShots        float64
regular_powerplayGoals      float64
regular_shots               float64
regular_atoi                float64
regular_pp_atoi             float64
regular_pk_atoi             float64
nhlPlayerId                   int64
trade_date                   object
positionGeneral_y            object
height_cm                   float64
weight_kg                   float64
handedness                   object
dateOfBirth                 

In [1099]:
skater_data.dtypes

Player        object
EH_ID         object
API ID         int64
Season         int64
Team          object
Position      object
Shoots        object
Birthday      object
Age            int64
Draft Yr       int64
Draft Rd     float64
Draft Ov     float64
GP             int64
TOI          float64
G/60         float64
A1/60        float64
A2/60        float64
Points/60    float64
iSF/60       float64
iFF/60       float64
iCF/60       float64
ixG/60       float64
Sh%          float64
FSh%         float64
xFSh%        float64
iBLK/60      float64
GIVE/60      float64
TAKE/60      float64
iHF/60       float64
iHA/60       float64
iPENT2/60    float64
iPEND2/60    float64
iPENT5/60    float64
iPEND5/60    float64
iPEN±/60     float64
FOW/60       float64
FOL/60       float64
FO±/60       float64
dtype: object

In [1124]:
third_merge[['pre_trade_year','nhlPlayerId']].value_counts()

pre_trade_year  nhlPlayerId
2012            8473908        2
2017            8473914        2
2023            8475690        2
2018            8478839        2
2024            8474568        2
                              ..
2015            8469531        1
                8469490        1
                8468639        1
                8467514        1
2024            8484221        1
Name: count, Length: 1422, dtype: int64

In [1100]:
# Assign pre-trade stats based on the trade year
pre_season_stats_merged = pd.merge(third_merge, skater_data, left_on=['nhlPlayerId','pre_trade_year'], right_on=['API ID','Season'], how='inner')

In [1101]:
pre_season_stats_merged.shape

(1939, 74)

In [1102]:
# Read in new dataset
RAPM_skater_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Skater_RAPM_Rates.csv')

In [1103]:
RAPM_skater_data['Season'].isna().sum()

0

In [1104]:
RAPM_skater_data['Season'].unique()

array(['16-17', '17-18', '18-19', '20-21', '21-22', '22-23', '23-24',
       '24-25', '08-09', '14-15', '15-16', '19-20', '09-10', '10-11',
       '11-12', '12-13', '13-14'], dtype=object)

In [1105]:
RAPM_skater_data.columns

Index(['Player', 'EH_ID', 'API ID', 'Season', 'Team', 'Position', 'Shoots',
       'Birthday', 'Age', 'Draft Yr', 'Draft Rd', 'Draft Ov', 'GP', 'TOI',
       'G±/60', 'xG±/60', 'C±/60', 'GF/60', 'GA/60', 'xGF/60', 'xGA/60',
       'CF/60', 'CA/60'],
      dtype='object')

In [1106]:
RAPM_skater_data['Season'] = RAPM_skater_data['Season'].apply(lambda x: 2024 if x == '24-25' else (2023 if x == '23-24' else (2022 if x == '22-23' else (2021 if x == '21-22' else (2020 if x == '20-21' else (2019 if x == '19-20' else (2018 if x == '18-19' else (2017 if x == '17-18' else (2016 if x == '16-17' else (2015 if x == '15-16' else (2014 if x == '14-15' else (2013 if x == '13-14' else (2012 if x == '12-13' else (2011 if x == '11-12' else (2010 if x == '10-11' else (2009 if x == '09-10' else (2008 if x == '08-09' else (None))))))))))))))))))

In [1107]:
RAPM_skater_data['Season'].isna().sum()

0

In [1108]:
# Assign pre-trade stats based on the trade year
all_pre_season_stats = pd.merge(pre_season_stats_merged, RAPM_skater_data, left_on=['nhlPlayerId','pre_trade_year','Team'], right_on=['API ID','Season','Team'], how='inner')

In [1109]:
all_pre_season_stats.shape

(1939, 96)

### Now, let's clean up some of the duplicate information

In [1110]:
all_pre_season_stats.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick', 'trade_year', 'trade_month_day', 'off-season_trade',
       'pre_trade_year', 'Player_x', 'EH_ID_x', 'API ID_x', 'Season_x', 'Team',
       'Position_x', 'Shoots_x', 'Birthday_x', 'Age_x', 'Draft Yr_x',
       'Draft Rd_x', 'Draft Ov_x', 'GP_x', 'TOI_x', 'G/60', 'A1/60', 'A2/60',
       'Points/60', 'iSF/60', 'iFF/60', 'iCF/60', 'ixG/60', 'Sh%', 'FSh%',
       'xFSh%', 'iBLK

In [1111]:
# Remove all duplicate columns from previous merges that end in _y
all_pre_season_stats.drop(columns=['positionGeneral_y','Player_y', 'EH_ID_y', 'API ID_y',
       'Season_y', 'Position_y', 'Shoots_y', 'Birthday_y', 'Age_y',
       'Draft Yr_y', 'Draft Rd_y', 'Draft Ov_y', 'GP_y', 'TOI_y'], inplace=True)

In [1112]:
all_pre_season_stats.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'height_cm', 'weight_kg', 'handedness', 'dateOfBirth',
       'birthCountry', 'draftYear', 'draftRound', 'draftOverallPick',
       'trade_year', 'trade_month_day', 'off-season_trade', 'pre_trade_year',
       'Player_x', 'EH_ID_x', 'API ID_x', 'Season_x', 'Team', 'Position_x',
       'Shoots_x', 'Birthday_x', 'Age_x', 'Draft Yr_x', 'Draft Rd_x',
       'Draft Ov_x', 'GP_x', 'TOI_x', 'G/60', 'A1/60', 'A2/60', 'Points/60',
       'iSF/60', 'iFF/60', 'iCF/60', 'ixG/60', 'Sh%', 'FSh%', 'xFSh%',
       'iBLK/60', 'GIVE/60', 'TAK

In [1113]:
# Get rid of all _x suffixes from the columns
all_pre_season_stats.columns = [col.replace('_x','') for col in all_pre_season_stats.columns]

In [1114]:
all_pre_season_stats.columns

Index(['playerId', 'position', 'positionGeneral', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'height_cm', 'weight_kg', 'handedness', 'dateOfBirth',
       'birthCountry', 'draftYear', 'draftRound', 'draftOverallPick',
       'trade_year', 'trade_month_day', 'off-season_trade', 'pre_trade_year',
       'Player', 'EH_ID', 'API ID', 'Season', 'Team', 'Position', 'Shoots',
       'Birthday', 'Age', 'Draft Yr', 'Draft Rd', 'Draft Ov', 'GP', 'TOI',
       'G/60', 'A1/60', 'A2/60', 'Points/60', 'iSF/60', 'iFF/60', 'iCF/60',
       'ixG/60', 'Sh%', 'FSh%', 'xFSh%', 'iBLK/60', 'GIVE/60', 'TAKE/60',
       'iHF/60', 'iHA/60', '

## All Pre-trade season stats have now been assigned. Now, let's isolate the players traded in the offseason and assign them their post-trade RAPM stats. 

In [1115]:
# Isolate players traded in the off-season
off_season_traded_players = all_pre_season_stats[all_pre_season_stats['off-season_trade'] == 1]

# Isolate players traded during the season
in_season_traded_players = all_pre_season_stats[all_pre_season_stats['off-season_trade'] == 0]

In [1116]:
# Check that they split correctly
off_season_traded_players.shape, in_season_traded_players.shape

((468, 82), (1471, 82))

Bad because I am trying to merge a transaction in one year 

In [1117]:
off_season_traded_players.head()

Unnamed: 0,playerId,position,positionGeneral,league,leagueLevel,tradeId,regularGamesPlayed,regularGoals,regularAssists,regularPenaltyMinutes,...,FO±/60,G±/60,xG±/60,C±/60,GF/60,GA/60,xGF/60,xGA/60,CF/60,CA/60
6,287958,LW,F,nhl,professional,380,65,7,13,40,...,-0.1,0.106,-0.013,-0.31,0.013,-0.094,-0.116,-0.103,-0.8,-0.49
14,391986,RD,D,nhl,professional,1446,36,1,7,20,...,0.0,-0.291,-0.253,-1.61,-0.115,0.176,-0.157,0.096,-1.64,-0.03
15,58520,LW,F,nhl,professional,1681,78,20,32,103,...,-0.47,0.187,0.081,5.73,0.087,-0.1,0.127,0.045,5.17,-0.56
17,691859,F,F,nhl,professional,335,50,10,15,21,...,-0.69,0.24,0.213,4.7,0.142,-0.098,0.226,0.013,5.15,0.45
18,55106,LW,F,nhl,professional,20,35,1,3,37,...,0.0,0.153,0.025,1.12,-0.076,-0.229,0.083,0.058,0.96,-0.16


In [1118]:
RAPM_skater_data['Season'].isna().sum()

0

In [1119]:
# Merge the next seasons stats for the off-season traded players
#off_season_traded_players = pd.merge(off_season_traded_players, RAPM_skater_data, left_on=['nhlPlayerId','trade_year'], right_on=['API ID','Season'], how='inner')

In [1120]:
off_season_traded_players.shape

(468, 82)