In season: 10-01 through 03-31
Off season: 04-01 through 09-30

In [730]:
# Import libraries
import pandas as pd
import numpy as np
import datetime as dt

In [731]:
# Import the map to nhl player id data
nhl_id_mapping = pd.read_csv(r'../Data/map_player_nhl.csv')

In [732]:
# Import the trade dates data
trade_dates = pd.read_csv(r'../Data/trade_dates.csv')

In [733]:
# Import the traded players stats from the season before being traded.
traded_players_pre_season = pd.read_csv(r'../Data/traded_skater_platform.csv')

In [734]:
# Import the gen info data
gen_info = pd.read_csv(r'../Data/traded_player_general_info.csv')

In [735]:
nhl_id_mapping.columns

Index(['playerId', 'nhlPlayerId'], dtype='object')

In [736]:
# Merge trade dates and players data
first_merge = pd.merge(traded_players_pre_season, nhl_id_mapping, on='playerId')

In [737]:
# Merge last result with trade dates data
second_merge = pd.merge(first_merge, trade_dates, on='tradeId', how='inner')

In [738]:
# Merge last result with the general player info data
third_merge = pd.merge(second_merge, gen_info, on=['playerId','position'], how='inner')

In [739]:
third_merge.shape

(1488, 32)

In [740]:
third_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick'],
      dtype='object')

In [741]:
# Pull the year out of the trade date
third_merge['trade_year'] = pd.to_datetime(third_merge['trade_date']).dt.year

In [742]:
# Convert trade date column to date data type
third_merge['trade_date'] = pd.to_datetime(third_merge['trade_date'], format='%Y-%m-%d')

In [743]:
# Create a trade season column instead of just converting the seasons in the skater data
# This is because of a year mix up in the data pulled from the platform
# This column will encapsulate the season in which either a player was traded during or
# the season after a player was traded during the off season
third_merge['traded_season'] = third_merge['trade_date'].apply(lambda x: '24-25' if (pd.to_datetime('2024-04-01') <= x <= pd.to_datetime('2025-03-31')) else ('23-24' if pd.to_datetime('2023-04-01') <= x <= pd.to_datetime('2024-03-31') else ('22-23' if pd.to_datetime('2022-04-01') <= x <= pd.to_datetime('2023-03-31') else ('21-22' if pd.to_datetime('2021-04-01') <= x <= pd.to_datetime('2022-03-31') else ('20-21' if pd.to_datetime('2020-04-01') <= x <= pd.to_datetime('2021-03-31') else ('19-20' if pd.to_datetime('2019-04-01') <= x <= pd.to_datetime('2020-03-31') else ('18-19' if pd.to_datetime('2018-04-01') <= x <= pd.to_datetime('2019-03-31') else ('17-18' if pd.to_datetime('2017-04-01') <= x <= pd.to_datetime('2018-03-31') else ('16-17' if pd.to_datetime('2016-04-01') <= x <= pd.to_datetime('2017-03-31') else ('15-16' if pd.to_datetime('2015-04-01') <= x <= pd.to_datetime('2016-03-31') else ('14-15' if pd.to_datetime('2014-04-01') <= x <= pd.to_datetime('2015-03-31') else ('13-14' if pd.to_datetime('2013-04-01') <= x <= pd.to_datetime('2014-03-31') else ('12-13' if pd.to_datetime('2012-04-01') <= x <= pd.to_datetime('2013-03-31') else ('11-12' if pd.to_datetime('2011-04-01') <= x <= pd.to_datetime('2012-03-31') else ('10-11' if pd.to_datetime('2010-04-01') <= x <= pd.to_datetime('2011-03-31') else ('09-10' if pd.to_datetime('2009-04-01') <= x <= pd.to_datetime('2010-03-31') else ('08-09' if pd.to_datetime('2008-04-01') <= x <= pd.to_datetime('2009-03-31') else (None))))))))))))))))))

In [744]:
third_merge['traded_season'].isna().sum()

0

In [745]:
# Make a separate column with just the season month and day combination
third_merge['trade_month_day'] = pd.to_datetime(third_merge['trade_date']).dt.strftime('%m-%d')

In [746]:
# Create a boolean variable for if the player was traded in the off-season
third_merge['off-season_trade'] = np.where(third_merge['trade_month_day'].between('04-01', '09-30'), 1, 0)

In [747]:
third_merge['off-season_trade'].value_counts()

off-season_trade
0    1058
1     430
Name: count, dtype: int64

In [748]:
third_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick', 'trade_year', 'traded_season', 'trade_month_day',
       'off-season_trade'],
      dtype='object')

In [749]:
third_merge['league'].value_counts()

league
nhl    1488
Name: count, dtype: int64

In [750]:
third_merge['leagueLevel'].value_counts()

leagueLevel
professional    1488
Name: count, dtype: int64

In [751]:
# Import skater data from 08-09 season to 24-25 season
skater_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Skater_Rates.csv')

In [752]:
third_merge.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick', 'trade_year', 'traded_season', 'trade_month_day',
       'off-season_trade'],
      dtype='object')

In [753]:
# Transform years; For example 22-23 -> 2022
#skater_data['Season'] = skater_data['Season'].apply(lambda x: 2024 if x == '24-25' else (2023 if x == '23-24' else (2022 if x == '22-23' else (2021 if x == '21-22' else (2020 if x == '20-21' else (2019 if x == '19-20' else (2018 if x == '18-19' else (2017 if x == '17-18' else (2016 if x == '16-17' else (2015 if x == '15-16' else (2014 if x == '14-15' else (2013 if x == '13-14' else (2012 if x == '12-13' else (2011 if x == '11-12' else (2010 if x == '10-11' else (2009 if x == '09-10' else (2008 if x == '08-09' else (None))))))))))))))))))

In [754]:
skater_data['Season'].isna().sum()

0

In [755]:
skater_data.columns

Index(['Player', 'EH_ID', 'API ID', 'Season', 'Team', 'Position', 'Shoots',
       'Birthday', 'Age', 'Draft Yr', 'Draft Rd', 'Draft Ov', 'GP', 'TOI',
       'G/60', 'A1/60', 'A2/60', 'Points/60', 'iSF/60', 'iFF/60', 'iCF/60',
       'ixG/60', 'Sh%', 'FSh%', 'xFSh%', 'iBLK/60', 'GIVE/60', 'TAKE/60',
       'iHF/60', 'iHA/60', 'iPENT2/60', 'iPEND2/60', 'iPENT5/60', 'iPEND5/60',
       'iPEN±/60', 'FOW/60', 'FOL/60', 'FO±/60'],
      dtype='object')

In [756]:
third_merge.shape

(1488, 36)

In [757]:
# Assign pre-trade year based on the trade year
third_merge['pre_trade_season'] = third_merge['traded_season'].apply(lambda x: '23-24' if x == '24-25' else ('22-23' if x == '23-24' else ('21-22' if x == '22-23' else ('20-21' if x == '21-22' else ('19-20' if x == '20-21' else ('18-19' if x == '19-20' else ('17-18' if x == '18-19' else ('16-17' if x == '17-18' else ('15-16' if x == '16-17' else ('14-15' if x == '15-16' else ('13-14' if x == '14-15' else ('12-13' if x == '13-14' else ('11-12' if x == '12-13' else ('10-11' if x == '11-12' else ('09-10' if x == '10-11' else ('08-09' if x == '09-10' else (None)))))))))))))))))

In [758]:
third_merge['pre_trade_season'].isna().sum()

0

In [759]:
third_merge.shape

(1488, 37)

In [760]:
third_merge.dtypes

playerId                             int64
position                            object
positionGeneral_x                   object
league                              object
leagueLevel                         object
tradeId                              int64
regularGamesPlayed                   int64
regularGoals                         int64
regularAssists                       int64
regularPenaltyMinutes                int64
postseasonGamesPlayed              float64
postseasonGoals                    float64
postseasonAssists                  float64
postseasonPenaltyMinutes           float64
regular_hits                       float64
regular_blockedShots               float64
regular_powerplayGoals             float64
regular_shots                      float64
regular_atoi                       float64
regular_pp_atoi                    float64
regular_pk_atoi                    float64
nhlPlayerId                          int64
trade_date                  datetime64[ns]
positionGen

In [761]:
# Change data types from int32 to int64 for consistency
#third_merge['trade_year'] = third_merge['trade_year'].astype('int64')

third_merge['off-season_trade'] = third_merge['off-season_trade'].astype('int64')

#third_merge['pre_trade_year'] = third_merge['pre_trade_year'].astype('int64')

In [762]:
third_merge.dtypes

playerId                             int64
position                            object
positionGeneral_x                   object
league                              object
leagueLevel                         object
tradeId                              int64
regularGamesPlayed                   int64
regularGoals                         int64
regularAssists                       int64
regularPenaltyMinutes                int64
postseasonGamesPlayed              float64
postseasonGoals                    float64
postseasonAssists                  float64
postseasonPenaltyMinutes           float64
regular_hits                       float64
regular_blockedShots               float64
regular_powerplayGoals             float64
regular_shots                      float64
regular_atoi                       float64
regular_pp_atoi                    float64
regular_pk_atoi                    float64
nhlPlayerId                          int64
trade_date                  datetime64[ns]
positionGen

In [763]:
skater_data.dtypes

Player        object
EH_ID         object
API ID         int64
Season        object
Team          object
Position      object
Shoots        object
Birthday      object
Age            int64
Draft Yr       int64
Draft Rd     float64
Draft Ov     float64
GP             int64
TOI          float64
G/60         float64
A1/60        float64
A2/60        float64
Points/60    float64
iSF/60       float64
iFF/60       float64
iCF/60       float64
ixG/60       float64
Sh%          float64
FSh%         float64
xFSh%        float64
iBLK/60      float64
GIVE/60      float64
TAKE/60      float64
iHF/60       float64
iHA/60       float64
iPENT2/60    float64
iPEND2/60    float64
iPENT5/60    float64
iPEND5/60    float64
iPEN±/60     float64
FOW/60       float64
FOL/60       float64
FO±/60       float64
dtype: object

In [764]:
third_merge[['pre_trade_season','nhlPlayerId']].value_counts()

pre_trade_season  nhlPlayerId
14-15             8475214        3
18-19             8479447        2
17-18             8477427        2
                  8477449        2
21-22             8481849        2
                                ..
14-15             8471693        1
                  8471476        1
                  8471426        1
                  8471392        1
23-24             8484259        1
Name: count, Length: 1432, dtype: int64

In [765]:
third_merge[['pre_trade_season','nhlPlayerId','tradeId']].value_counts()

pre_trade_season  nhlPlayerId  tradeId
08-09             8451224      4373       1
20-21             8475209      7          1
                  8475752      229        1
                  8475750      297        1
                  8475728      117        1
                                         ..
14-15             8470877      856        1
                  8470740      849        1
                  8470714      1183       1
                  8470599      581        1
23-24             8484259      2138       1
Name: count, Length: 1488, dtype: int64

In [766]:
third_merge[(third_merge['nhlPlayerId'] == 8475214) & (third_merge['pre_trade_season'] == '14-15')]

Unnamed: 0,playerId,position,positionGeneral_x,league,leagueLevel,tradeId,regularGamesPlayed,regularGoals,regularAssists,regularPenaltyMinutes,...,dateOfBirth,birthCountry,draftYear,draftRound,draftOverallPick,trade_year,traded_season,trade_month_day,off-season_trade,pre_trade_season
102,271153,LW,F,nhl,professional,1073,43,2,4,28,...,1991-04-16,United States,2009.0,2.0,45.0,2016,15-16,01-03,0,14-15
342,271153,LW,F,nhl,professional,916,43,2,4,28,...,1991-04-16,United States,2009.0,2.0,45.0,2015,15-16,06-30,1,14-15
1336,271153,LW,F,nhl,professional,834,43,2,4,28,...,1991-04-16,United States,2009.0,2.0,45.0,2016,15-16,02-27,0,14-15


In [767]:
third_merge = third_merge.drop(third_merge.index[0])

In [768]:
third_merge[['pre_trade_season','nhlPlayerId','tradeId']].value_counts()

pre_trade_season  nhlPlayerId  tradeId
08-09             8451224      4373       1
20-21             8475186      56         1
                  8475750      297        1
                  8475728      117        1
                  8475714      128        1
                                         ..
14-15             8470877      856        1
                  8470740      849        1
                  8470714      1183       1
                  8470599      581        1
23-24             8484259      2138       1
Name: count, Length: 1487, dtype: int64

In [769]:
# Assign pre-trade stats based on the trade year
pre_season_stats_merged = pd.merge(third_merge, skater_data, left_on=['nhlPlayerId','pre_trade_season'], right_on=['API ID','Season'], how='inner')

In [770]:
pre_season_stats_merged.shape

(1439, 75)

In [771]:
# Read in new dataset
RAPM_skater_data = pd.read_csv(r'../Evolving-Hockey/NHL_08_24_Skater_RAPM_Rates.csv')

In [772]:
RAPM_skater_data['Season'].isna().sum()

0

In [773]:
RAPM_skater_data['Season'].unique()

array(['16-17', '17-18', '18-19', '20-21', '21-22', '22-23', '23-24',
       '24-25', '08-09', '14-15', '15-16', '19-20', '09-10', '10-11',
       '11-12', '12-13', '13-14'], dtype=object)

In [774]:
RAPM_skater_data.columns

Index(['Player', 'EH_ID', 'API ID', 'Season', 'Team', 'Position', 'Shoots',
       'Birthday', 'Age', 'Draft Yr', 'Draft Rd', 'Draft Ov', 'GP', 'TOI',
       'G±/60', 'xG±/60', 'C±/60', 'GF/60', 'GA/60', 'xGF/60', 'xGA/60',
       'CF/60', 'CA/60'],
      dtype='object')

In [775]:
#RAPM_skater_data['Season'] = RAPM_skater_data['Season'].apply(lambda x: 2024 if x == '24-25' else (2023 if x == '23-24' else (2022 if x == '22-23' else (2021 if x == '21-22' else (2020 if x == '20-21' else (2019 if x == '19-20' else (2018 if x == '18-19' else (2017 if x == '17-18' else (2016 if x == '16-17' else (2015 if x == '15-16' else (2014 if x == '14-15' else (2013 if x == '13-14' else (2012 if x == '12-13' else (2011 if x == '11-12' else (2010 if x == '10-11' else (2009 if x == '09-10' else (2008 if x == '08-09' else (None))))))))))))))))))

In [776]:
RAPM_skater_data['Season'].isna().sum()

0

In [777]:
# Assign pre-trade stats based on the trade year
all_pre_season_stats = pd.merge(pre_season_stats_merged, RAPM_skater_data, left_on=['nhlPlayerId','pre_trade_season','Team'], right_on=['API ID','Season','Team'], how='inner')

In [778]:
all_pre_season_stats.shape

(1439, 97)

### Now, let's clean up some of the duplicate information

In [779]:
all_pre_season_stats.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'positionGeneral_y', 'height_cm', 'weight_kg',
       'handedness', 'dateOfBirth', 'birthCountry', 'draftYear', 'draftRound',
       'draftOverallPick', 'trade_year', 'traded_season', 'trade_month_day',
       'off-season_trade', 'pre_trade_season', 'Player_x', 'EH_ID_x',
       'API ID_x', 'Season_x', 'Team', 'Position_x', 'Shoots_x', 'Birthday_x',
       'Age_x', 'Draft Yr_x', 'Draft Rd_x', 'Draft Ov_x', 'GP_x', 'TOI_x',
       'G/60', 'A1/60', 'A2/60', 'Points/60', 'iSF/60', 'iFF/60', 'iCF/60',
       'ixG/60', 'Sh%', 'FS

In [780]:
# Remove all duplicate columns from previous merges that end in _y
all_pre_season_stats.drop(columns=['positionGeneral_y','Player_y', 'EH_ID_y', 'API ID_y',
       'Season_y', 'Position_y', 'Shoots_y', 'Birthday_y', 'Age_y',
       'Draft Yr_y', 'Draft Rd_y', 'Draft Ov_y', 'GP_y', 'TOI_y'], inplace=True)

In [781]:
all_pre_season_stats.columns

Index(['playerId', 'position', 'positionGeneral_x', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'height_cm', 'weight_kg', 'handedness', 'dateOfBirth',
       'birthCountry', 'draftYear', 'draftRound', 'draftOverallPick',
       'trade_year', 'traded_season', 'trade_month_day', 'off-season_trade',
       'pre_trade_season', 'Player_x', 'EH_ID_x', 'API ID_x', 'Season_x',
       'Team', 'Position_x', 'Shoots_x', 'Birthday_x', 'Age_x', 'Draft Yr_x',
       'Draft Rd_x', 'Draft Ov_x', 'GP_x', 'TOI_x', 'G/60', 'A1/60', 'A2/60',
       'Points/60', 'iSF/60', 'iFF/60', 'iCF/60', 'ixG/60', 'Sh%', 'FSh%',
       'xFSh%', 'iBLK/6

In [782]:
# Get rid of all _x suffixes from the columns
all_pre_season_stats.columns = [col.replace('_x','') for col in all_pre_season_stats.columns]

In [783]:
all_pre_season_stats.columns

Index(['playerId', 'position', 'positionGeneral', 'league', 'leagueLevel',
       'tradeId', 'regularGamesPlayed', 'regularGoals', 'regularAssists',
       'regularPenaltyMinutes', 'postseasonGamesPlayed', 'postseasonGoals',
       'postseasonAssists', 'postseasonPenaltyMinutes', 'regular_hits',
       'regular_blockedShots', 'regular_powerplayGoals', 'regular_shots',
       'regular_atoi', 'regular_pp_atoi', 'regular_pk_atoi', 'nhlPlayerId',
       'trade_date', 'height_cm', 'weight_kg', 'handedness', 'dateOfBirth',
       'birthCountry', 'draftYear', 'draftRound', 'draftOverallPick',
       'trade_year', 'traded_season', 'trade_month_day', 'off-season_trade',
       'pre_trade_season', 'Player', 'EH_ID', 'API ID', 'Season', 'Team',
       'Position', 'Shoots', 'Birthday', 'Age', 'Draft Yr', 'Draft Rd',
       'Draft Ov', 'GP', 'TOI', 'G/60', 'A1/60', 'A2/60', 'Points/60',
       'iSF/60', 'iFF/60', 'iCF/60', 'ixG/60', 'Sh%', 'FSh%', 'xFSh%',
       'iBLK/60', 'GIVE/60', 'TAKE/60', 'i

## All Pre-trade season stats have now been assigned. Now, let's isolate the players traded in the offseason and assign them their post-trade RAPM stats. 

In [784]:
# Isolate players traded in the off-season
off_season_traded_players = all_pre_season_stats[all_pre_season_stats['off-season_trade'] == 1]

# Isolate players traded during the season
in_season_traded_players = all_pre_season_stats[all_pre_season_stats['off-season_trade'] == 0]

In [785]:
# Check that they split correctly
off_season_traded_players.shape, in_season_traded_players.shape

((451, 83), (988, 83))

Bad because I am trying to merge a transaction in one year 

In [786]:
off_season_traded_players.head()

Unnamed: 0,playerId,position,positionGeneral,league,leagueLevel,tradeId,regularGamesPlayed,regularGoals,regularAssists,regularPenaltyMinutes,...,FO±/60,G±/60,xG±/60,C±/60,GF/60,GA/60,xGF/60,xGA/60,CF/60,CA/60
2,287958,LW,F,nhl,professional,380,65,7,13,40,...,-0.1,0.106,-0.013,-0.31,0.013,-0.094,-0.116,-0.103,-0.8,-0.49
7,391986,RD,D,nhl,professional,1446,36,1,7,20,...,0.0,-0.291,-0.253,-1.61,-0.115,0.176,-0.157,0.096,-1.64,-0.03
8,58520,LW,F,nhl,professional,1681,78,20,32,103,...,-0.47,0.187,0.081,5.73,0.087,-0.1,0.127,0.045,5.17,-0.56
12,691859,F,F,nhl,professional,335,50,10,15,21,...,-0.69,0.24,0.213,4.7,0.142,-0.098,0.226,0.013,5.15,0.45
13,55106,LW,F,nhl,professional,20,35,1,3,37,...,0.0,0.153,0.025,1.12,-0.076,-0.229,0.083,0.058,0.96,-0.16


In [787]:
RAPM_skater_data['Season'].isna().sum()

0

In [788]:
# Merge the next seasons stats for the off-season traded players
#off_season_traded_players = pd.merge(off_season_traded_players, RAPM_skater_data, left_on=['nhlPlayerId','trade_year'], right_on=['API ID','Season'], how='inner')

In [789]:
off_season_traded_players.shape

(451, 83)