# Data

Can grab all data from https://nhl.com/stats goal is to predict daily fantasy output based off counting stats and more specific split stats, days rest, opponent, month, etc.

## Feature Engineering and Stuff

In [1]:
import os
import importlib
from datetime import *
from dateutil import *
import pandas as pd
import numpy as np
pd.set_option('display.max_rows',None)
import data_grab, data_proc, data_prep, data_explor

In [2]:
## most recent update: 11-01-2022

In [46]:
importlib.reload(data_grab)
importlib.reload(data_prep)
importlib.reload(data_explor)
importlib.reload(data_proc)

<module 'data_proc' from '/Users/nickdimmitt/dfs/hockey/data_proc.py'>

In [42]:
end_date = "2022-11-01"
yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")

In [43]:
df_skaters = data_grab.main(yesterday, end_date, 'skater', "~/dfs/hockey/data/df_skaters.csv", 0, 10000, 100, saveData=False)
df_skate_misc = data_grab.main(yesterday, end_date, 'misc', "~/dfs/hockey/data/df_skaters_misc.csv", 0, 10000, 100, saveData=False)
df_skate_shot = data_grab.main(yesterday, end_date, 'shots', "~/dfs/hockey/data/df_skaters_shot.csv", 0, 10000, 100, saveData=False)

In [47]:
df_merged = data_prep.main(df_skaters, df_skate_misc, df_skate_shot, ['gameId','playerId'], 'Unnamed: 0_y, goals_y, shots_y, gameDate_y, shootingPct_y, gamesPlayed_y, homeRoad_y, lastName_y, opponentTeamAbbrev_y, otGoals_y, positionCode_y, shootsCatches_y, skaterFullName_y, teamAbbrev_y, timeOnIcePerGame_y'.split(", "), ['Unnamed: 0', 'emptyNetAssists', 'emptyNetPoints', 'faceoffWinPct', 'shootingPctBackhand', 'shootingPctDeflected', 'shootingPctSlap', 'shootingPctSnap', 'shootingPctTipIn', 'shootingPctWrapAround', 'shootingPctWrist'], saveData=False)

In [None]:
data_prep.data_review(df_merged)

### Add Calculated Columns

In [48]:
df_merged['fanPoints'] = data_explor.fan_points(df_merged)
df_merged['overPerform'] = data_explor.overperform(df_merged, 'fanPoints', 'playerId')
df_merged['overPerformDummy'] = data_explor.over_perf_dummy(df_merged, 'overPerform')
df_merged['underPerformDummy'] = data_explor.under_perf_dummy(df_merged, 'overPerform')
df_merged['samePerfDummy'] = data_explor.same_perf_dummy(df_merged, 'overPerform')
df_merged['homeRoadPerf'] = data_explor.home_away_perf(df_merged, 'overPerform', ['playerId', 'homeRoad'])

In [49]:
better_home_skater = list(np.where((df_merged['homeRoad'] == 'H') & (df_merged['homeRoadPerf'] > 0), df_merged['playerId'], None))
better_away_skater = list(np.where((df_merged['homeRoad'] == 'R') & (df_merged['homeRoadPerf'] > 0), df_merged['playerId'], None))
better_home_skater = [*set(better_home_skater)]
better_away_skater = [*set(better_away_skater)]

In [50]:
df_skaters['OpHomeDummy'] = np.where(df_skaters['playerId'].isin(better_home_skater), 1, 0)
df_skaters['OpRoadDummy'] = np.where(df_skaters['playerId'].isin(better_away_skater), 1, 0)
df_skaters['OpNowhereDummy'] = np.where((df_skaters['OpHomeDummy'] == 0) & (df_skaters['OpRoadDummy'] == 0), 1, 0)

In [51]:
feature_list = ['assists', 'goals', 'plusMinus', 'points', 'fanPoints', 'shootingPct', 'shots']

In [52]:
for feature in feature_list:
    df_merged[f'{feature}Ma7'] = data_proc.moving_average(df_merged, feature, 'playerId', 7)
    df_merged[f'{feature}Ma7'] = df_merged[f'{feature}Ma7'].shift(1)
    
for feature in feature_list:
    df_merged[f'{feature}Ma3'] = data_proc.moving_average(df_merged, feature, 'playerId', 3)
    df_merged[f'{feature}Ma3'] = df_merged[f'{feature}Ma3'].shift(1)
    
for feature in feature_list:
    df_merged[f'{feature}LastGame'] = df_merged[feature].shift(1)
    
for feature in feature_list:
    df_merged[f'{feature}Ma10'] = data_proc.moving_average(df_merged, feature, 'playerId', 10)
    df_merged[f'{feature}Ma10'] = df_merged[f'{feature}Ma10'].shift(1)

for feature in feature_list:
    df_merged[f'{feature}Ma14'] = data_proc.moving_average(df_merged, feature, 'playerId', 14)
    df_merged[f'{feature}Ma14'] = df_merged[f'{feature}Ma14'].shift(1)

### Drop Columns

In [67]:
drop_cols = 'shootingPctMa7 shootingPctMa3 shootingPctLastGame shootingPctMa10 shootingPctMa14'.split()
drop_cols.append('Unnamed: 0')
drop_cols

df_merged = data_prep.remove_columns(df_merged, drop_cols)

### Mean Imputation

In [77]:
impute_by_player = 'hitsPer60 blockedShotsPer60 giveawaysPer60 takeawaysPer60 assistsMa3 assistsMa7 assistsMa10 assistsMa14 assistsLastGame goalsMa3 goalsMa7 goalsMa10 goalsMa14 goalsLastGame shotsMa3 shotsMa7 shotsMa10 shotsMa14 shotsLastGame plusMinusMa3 plusMinusMa7 plusMinusMa10 plusMinusMa14 plusMinusLastGame pointsMa3 pointsMa7 pointsMa10 pointsMa14 pointsLastGame fanPointsMa3 fanPointsMa7 fanPointsMa10 fanPointsMa14 fanPointsLastGame'.split()
impute_by_perf = 'missedShotCrossbar missedShotGoalpost missedShotOverNet missedShotWideOfNet goalsBackhand goalsDeflected goalsSlap goalsSnap goalsTipIn goalsWrapAround goalsWrist shootingPct shotsOnNetBackhand shotsOnNetDeflected shotsOnNetSlap shotsOnNetSnap shotsOnNetTipIn shotsOnNetWrapAround shotsOnNetWrist hitsPer60 blockedShotsPer60 giveawaysPer60 takeawaysPer60 assistsMa3 assistsMa7 assistsMa10 assistsMa14 assistsLastGame goalsMa3 goalsMa7 goalsMa10 goalsMa14 goalsLastGame shotsMa3 shotsMa7 shotsMa10 shotsMa14 shotsLastGame plusMinusMa3 plusMinusMa7 plusMinusMa10 plusMinusMa14 plusMinusLastGame pointsMa3 pointsMa7 pointsMa10 pointsMa14 pointsLastGame fanPointsMa3 fanPointsMa7 fanPointsMa10 fanPointsMa14 fanPointsLastGame'.split()

In [78]:
for col in impute_by_player:
    data_prep.handle_missing(df_merged, 'playerId', col)

In [79]:
for col in impute_by_perf:
    try:
        data_prep.handle_missing(df_merged, 'overPerformDummy', col)
    except:
        continue

## Final Scrub and Rinse

In [81]:
drop_cols = 'shootingPctMa7 shootingPctMa3 shootingPctLastGame shootingPctMa10 shootingPctMa14'.split()

df_merged = data_prep.remove_columns(df_merged, drop_cols)

In [82]:
df_merged.isna().sum()

gameId                  0
playerId                0
gameDate                0
gamesPlayed             0
goals                   0
goalsBackhand           0
goalsDeflected          0
goalsSlap               0
goalsSnap               0
goalsTipIn              0
goalsWrapAround         0
goalsWrist              0
homeRoad                0
lastName                0
opponentTeamAbbrev      0
shootingPct             0
shots                   0
shotsOnNetBackhand      0
shotsOnNetDeflected     0
shotsOnNetSlap          0
shotsOnNetSnap          0
shotsOnNetTipIn         0
shotsOnNetWrapAround    0
shotsOnNetWrist         0
skaterFullName          0
teamAbbrev              0
assists                 0
evGoals                 0
evPoints                0
gameWinningGoals        0
otGoals                 0
penaltyMinutes          0
plusMinus               0
points                  0
pointsPerGame           0
positionCode            0
ppGoals                 0
ppPoints                0
shGoals     

In [83]:
df_merged.to_csv('clean_df_merged.csv')