# Training Notebook


Notebook is used to experiment with various models and features to optimize the current predictive model which, on avergae, is off by ~5 fantasy points but seems to have strong ranking skills and differentiating between high scoring players and low scoring.

## Packages

In [1]:
import pandas as pd
import numpy as np
import sys
import os
import json
import requests
import importlib
from lxml import etree, html
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib
from datetime import *
htmlparser = etree.HTMLParser()

pd.options.mode.chained_assignment = None

In [2]:
sys.path.append("/Users/nickdimmitt/hockey/scripts/")
import data_proc, data_explor, data_prep
sys.path.append("/Users/nickdimmitt/hockey/data_grab/")
import data_grab

In [3]:
importlib.reload(data_grab)

<module 'data_grab' from '/Users/nickdimmitt/hockey/data_grab/data_grab.py'>

### Daily Lineups

In [4]:
daily_url = "https://www.rotowire.com/hockey/nhl-lineups.php"
daily_results = requests.get(daily_url)
daily_results_tree = html.fromstring(daily_results.content)

In [5]:
away_teams_xpath = '/html/body/div[1]/div/main/div[3]//div//div//div//div//a[1]//div//text()'
away_teams = daily_results_tree.xpath(away_teams_xpath)

In [6]:
home_teams_xpath = '/html/body/div[1]/div/main/div[3]//div//div//div//div//a[2]//div//text()'
home_teams = daily_results_tree.xpath(home_teams_xpath)

In [7]:
nhl_teams = 'ANA ANH ARI BOS BUF CAR CGY CHI CLS CBJ COL DAL DET EDM FLA LA LAK MIN MON MTL NJ NSH NYI NYR OTT PHI PIT SEA SJ SJS STL TB TBL TOR VAN VGK WAS WPG'.split()

In [8]:
team_map = {'ANH':'ANA', 
            'ARI':'ARI', 
            'BOS':'BOS', 
            'BUF': 'BUF', 
            'CAR':'CAR', 
            'CGY':'CGY', 
            'CHI':'CHI', 
            'CLS': 'CBJ', 
            'COL':'COL', 
            'DAL':'DAL', 
            'DET':'DET', 
            'EDM':'EDM', 
            'FLA':'FLA', 
            'LA':'LAK', 
            'MIN':'MIN', 
            'MON': 'MTL', 
            'NJ':'NJD', 
            'NSH':'NSH', 
            'NYI':'NYI', 
            'NYR': 'NYR', 
            'OTT':'OTT', 
            'PHI':'PHI', 
            'PIT':'PIT', 
            'SEA':'SEA', 
            'SJ': 'SJS', 
            'STL': 'STL', 
            'TB':'TBL', 
            'TOR':'TOR', 
            'VAN':'VAN', 
            'VGK':'VGK', 
            'WAS':'WSH', 
            'WPG':'WPG'}

In [9]:
away_teams = [team_map[x] for x in away_teams if x in nhl_teams]
home_teams = [team_map[x] for x in home_teams if x in nhl_teams]

In [10]:
games_away = [(x,y) for x,y in zip(away_teams, home_teams)]
games_home = [(x,y) for x,y in zip(home_teams, away_teams)]

games_dict_away = dict(games_away)
games_dict_home = dict(games_home)

In [11]:
home_goalie_xpath = '/html/body/div[1]/div/main/div[3]//div//div//div//ul[2]//li[1]//div[1]/a[1]/text()'
home_goalies = daily_results_tree.xpath(home_goalie_xpath)

In [12]:
away_goalie_xpath = '/html/body/div[1]/div/main/div[3]//div//div//div//ul[1]//li[1]//div[1]/a[1]/text()'
away_goalies = daily_results_tree.xpath(away_goalie_xpath)

In [13]:
goalie_dict = {}

for x in range(len(home_goalies)):
    goalie_dict[away_teams[x]] = home_goalies[x]
    goalie_dict[home_teams[x]] = away_goalies[x]

## Data

In [14]:
end_date = "2022-11-30"
yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
today = datetime.today().strftime("%Y-%m-%d")

In [15]:
df_skater = data_grab.main(yesterday, end_date, 'skater', "~/hockey/data/df_skater.csv", 0, 10000, 100, update=True, saveData=False)
df_misc = data_grab.main(yesterday, end_date, 'misc', "~/hockey/data/df_misc.csv", 0, 10000, 100, update=True, saveData=False)
df_shot = data_grab.main(yesterday, end_date, 'shots', "~/hockey/data/df_shots.csv", 0, 10000, 100, update=True, saveData=False)
df_toi = data_grab.main(yesterday, end_date, 'toi', "~/hockey/data/df_toi.csv", 0, 10000, 100, update=True, saveData=False)
df_goalie = data_grab.main(yesterday, end_date, 'goalie', "/Users/nickdimmitt/hockey/data/df_goalies.csv", 0, 10000, 100, update=True, saveData=False)
df_team = data_grab.main(yesterday, end_date, 'team', "~/hockey/data/df_teams.csv", 0, 10000, 100, update=True, saveData=False)

### Remove Unnecessary Columns

In [16]:
df_goalie = df_goalie.drop(['Unnamed: 0', 'assists', 'gamesStarted', 'goals', 'goalsAgainstAverage', 'lastName', 'points', 'saves', 'ties', 'timeOnIce', 'wins'], axis=1)

In [17]:
df_misc = df_misc.drop(['Unnamed: 0.1', 'blockedShotsPer60', 'emptyNetAssists', 'homeRoad', 'emptyNetGoals', 'emptyNetPoints', 'firstGoals', 'gamesPlayed', 'giveaways', 'giveawaysPer60', 'hits', 'hitsPer60', 'missedShotCrossbar', 'missedShotGoalpost', 'missedShotOverNet', 'missedShotWideOfNet', 'missedShots', 'opponentTeamAbbrev', 'otGoals', 'takeaways', 'takeawaysPer60', 'Unnamed: 0'], axis=1)

In [18]:
df_shot = df_shot.drop(['Unnamed: 0.1', 'gamesPlayed', 'goals', 'homeRoad', 'lastName', 'opponentTeamAbbrev', 'teamAbbrev', 'skaterFullName', 'Unnamed: 0'], axis=1)

In [19]:
df_skater = df_skater.drop(['Unnamed: 0.1', 'evGoals', 'evPoints','faceoffWinPct', 'gameWinningGoals', 'gamesPlayed', 'lastName', 'otGoals', 'pointsPerGame', 'timeOnIcePerGame', 'Unnamed: 0'], axis=1)

In [20]:
df_toi = df_toi.drop(['Unnamed: 0.1', 'evTimeOnIce', 'evTimeOnIcePerGame', 'gameDate', 'gamesPlayed', 'homeRoad', 'lastName', 'opponentTeamAbbrev','otTimeOnIce', 'otTimeOnIcePerOtGame', 'positionCode', 'shootsCatches','skaterFullName', 'teamAbbrev', 'timeOnIcePerGame', 'Unnamed: 0'], axis=1)

In [21]:
df_team = df_team.drop(['Unnamed: 0', 'faceoffWinPct', 'gamesPlayed', 'goalsAgainstPerGame', 'goalsForPerGame', 'losses', 'otLosses', 'penaltyKillNetPct', 'pointPct', 'powerPlayNetPct', 'powerPlayPct', 'regulationAndOtWins', 'ties', 'wins', 'winsInRegulation', 'winsInShootout'], axis=1)

#### Merging

In [22]:
df_goalie['goalieId'] = df_goalie['playerId'].copy()
df_goalie['teamAbbrevMerge'] = df_goalie['opponentTeamAbbrev'].copy()
df_team['teamAbbrevMerge'] = df_team['opponentTeamAbbrev'].copy()

In [23]:
df_skater['teamAbbrevMerge'] = df_skater['teamAbbrev']

In [24]:
df_goalie = df_goalie[['gameId', 'goalieId','goalieFullName','teamAbbrevMerge','savePct']]
df_team = df_team[['gameId', 'teamId', 'teamAbbrevMerge', 'goalsAgainst', 'shotsAgainstPerGame']]

In [100]:
df_merged = pd.merge(df_skater, df_misc, on=['gameId', 'playerId'])

In [101]:
df_merged = pd.merge(df_merged, df_shot, on=['gameId', 'playerId'])

In [102]:
df_merged = pd.merge(df_merged, df_toi, on=['gameId', 'playerId'])

  df_merged = pd.merge(df_merged, df_toi, on=['gameId', 'playerId'])


In [103]:
df_merged = pd.merge(df_merged, df_goalie, on=['gameId', 'teamAbbrevMerge'])

In [104]:
df_merged = pd.merge(df_merged, df_team, on=['gameId', 'teamAbbrevMerge'])

In [105]:
df_merged.drop_duplicates(inplace=True)

In [106]:
df_merged = df_merged[['gameId', 'gameDate','playerId', 'opponentTeamAbbrev', 'teamAbbrevMerge', 'homeRoad', 'goalieId', 'goalieFullName', 'goals', 'assists', 'plusMinus',
       'points', 'positionCode_x', 'ppGoals', 'ppPoints', 'shGoals',
       'shPoints', 'shootingPct_x', 'shootsCatches_x', 'shots_x',
       'skaterFullName_x', 'blockedShots',
       'ppTimeOnIce', 'shTimeOnIce', 'shifts', 'timeOnIce',
       'timeOnIcePerShift', 'savePct', 'goalsAgainst', 'shotsAgainstPerGame']]

In [107]:
df_merged.columns = df_merged.columns.str.rstrip('_x')

## Add Today's Rows

In [108]:
today_home_df = df_merged[(df_merged['gameDate'] > "2022-11-01") & (df_merged['teamAbbrevMerge'].isin(home_teams))]
today_away_df = df_merged[(df_merged['gameDate'] > "2022-11-01") & (df_merged['teamAbbrevMerge'].isin(away_teams))]

In [109]:
today_home_df['gameDate'] = today
today_away_df['gameDate'] = today

today_home_df['homeRoad'] = 'H'
today_away_df['homeRoad'] = 'R'

In [110]:
today_home_df[['goals', 'assists',
       'plusMinus', 'points', 'ppGoals', 'ppPoints', 'shGoals',
       'shPoints', 'shootingPct', 'shots', 'blockedShots', 'ppTimeOnIce', 'shTimeOnIce', 'shifts', 'timeOnIce',
       'timeOnIcePerShift', 'savePct', 'goalsAgainst', 'shotsAgainstPerGame']] = 0

today_away_df[['goals', 'assists',
       'plusMinus', 'points', 'ppGoals', 'ppPoints', 'shGoals',
       'shPoints', 'shootingPct', 'shots', 'blockedShots', 'ppTimeOnIce', 'shTimeOnIce', 'shifts', 'timeOnIce',
       'timeOnIcePerShift', 'savePct', 'goalsAgainst', 'shotsAgainstPerGame']] = 0

In [111]:
today_away_df['opponentTeamAbbrev'] = today_away_df['teamAbbrevMerge'].map(games_dict_away)
today_home_df['opponentTeamAbbrev'] = today_home_df['teamAbbrevMerge'].map(games_dict_home)

In [112]:
today_away_df['goalieFullName'] = today_away_df['teamAbbrevMerge'].map(goalie_dict)
today_home_df['goalieFullName'] = today_home_df['teamAbbrevMerge'].map(goalie_dict)

In [113]:
today_df = pd.concat([today_home_df, today_away_df])
today_df.drop_duplicates(subset='playerId', inplace=True)

In [114]:
goalies = list(df_goalie['goalieFullName'])
goalieId = list(df_goalie['goalieId'])

goalie_map = {}

for i in range(len(goalies)):
    goalie_map[goalies[i]] = goalieId[i]

In [115]:
today_df['goalieId'] = today_df['goalieFullName'].map(goalie_map)

In [116]:
df_merged = pd.concat([df_merged, today_df])

### Create Calculated Features

In [117]:
df_merged['savePercLastGame'] = df_merged.groupby('goalieId')['savePct'].shift(1)
df_merged['savePercMa3'] = df_merged.groupby('goalieId')['savePct'].transform(lambda x: x.rolling(3).mean()).shift(1)
df_merged['savePercMa7'] = df_merged.groupby('goalieId')['savePct'].transform(lambda x: x.rolling(7).mean()).shift(1)
df_merged['savePercMa16'] = df_merged.groupby('goalieId')['savePct'].transform(lambda x: x.rolling(16).mean()).shift(1)

In [118]:
df_merged['goalsPerGameLastGame'] = df_merged.groupby('opponentTeamAbbrev')['goalsAgainst'].shift()
df_merged['shotsPerGameLastGame'] = df_merged.groupby('opponentTeamAbbrev')['shotsAgainstPerGame'].shift()

df_merged['goalsPerGameMa3'] = df_merged.groupby('opponentTeamAbbrev')['goalsAgainst'].transform(lambda x: x.rolling(3).mean()).shift()
df_merged['shotsPerGameMa3'] = df_merged.groupby('opponentTeamAbbrev')['shotsAgainstPerGame'].transform(lambda x: x.rolling(3).mean()).shift()

df_merged['goalsPerGameMa7'] = df_merged.groupby('opponentTeamAbbrev')['goalsAgainst'].transform(lambda x: x.rolling(7).mean()).shift()
df_merged['shotsPerGameMa7'] = df_merged.groupby('opponentTeamAbbrev')['shotsAgainstPerGame'].transform(lambda x: x.rolling(7).mean()).shift()

df_merged['goalsPerGameMa16'] = df_merged.groupby('opponentTeamAbbrev')['goalsAgainst'].transform(lambda x: x.rolling(16).mean()).shift()
df_merged['shotsPerGameMa16'] = df_merged.groupby('opponentTeamAbbrev')['shotsAgainstPerGame'].transform(lambda x: x.rolling(16).mean()).shift()

#### Rolling Averages

In [119]:
df_merged['fanPoints'] = data_explor.fan_points(df_merged)
df_merged['overPerform'] = data_explor.overperform(df_merged, 'fanPoints', 'playerId')
df_merged['homeRoadPerf'] = data_explor.home_away_perf(df_merged, 'overPerform', ['playerId', 'homeRoad'])

In [120]:
better_home_skater = list(np.where((df_merged['homeRoad'] == 'H') & (df_merged['homeRoadPerf'] > 0), df_merged['playerId'], None))
better_away_skater = list(np.where((df_merged['homeRoad'] == 'R') & (df_merged['homeRoadPerf'] > 0), df_merged['playerId'], None))
better_home_skater = [*set(better_home_skater)]
better_away_skater = [*set(better_away_skater)]

In [121]:
df_merged['OpHomeDummy'] = np.where(df_merged['playerId'].isin(better_home_skater), 1, 0)
df_merged['OpRoadDummy'] = np.where(df_merged['playerId'].isin(better_away_skater), 1, 0)
df_merged['OpNowhereDummy'] = np.where((df_merged['OpHomeDummy'] == 0) & (df_merged['OpRoadDummy'] == 0), 1, 0)

In [122]:
feature_list = ['assists', 'goals', 'plusMinus', 'points', 'ppPoints', 'fanPoints', 'blockedShots','shootingPct', 'shots', 'timeOnIce', 'ppTimeOnIce', 'shifts', 'timeOnIcePerShift']

In [123]:
for feature in feature_list:
    df_merged[f'{feature}Ma7'] = data_proc.moving_average(df_merged, feature, 'playerId', 7)
    df_merged[f'{feature}Ma7'] = df_merged[f'{feature}Ma7'].shift(1)
    
for feature in feature_list:
    df_merged[f'{feature}Ma3'] = data_proc.moving_average(df_merged, feature, 'playerId', 3)
    df_merged[f'{feature}Ma3'] = df_merged[f'{feature}Ma3'].shift(1)
    
for feature in feature_list:
    df_merged[f'{feature}LastGame'] = df_merged[feature].shift(1)
    
for feature in feature_list:
    df_merged[f'{feature}Ma16'] = data_proc.moving_average(df_merged, feature, 'playerId', 16)
    df_merged[f'{feature}Ma16'] = df_merged[f'{feature}Ma16'].shift(1)

### All Time Average

In [124]:
df_merged['avgFanPoints'] = df_merged.groupby('playerId')['fanPoints'].transform(lambda x: x.mean())

## Cleaning

In [125]:
df_merged.columns

Index(['gameId', 'gameDate', 'playerId', 'opponentTeamAbbrev',
       'teamAbbrevMerge', 'homeRoad', 'goalieId', 'goalieFullName', 'goals',
       'assists',
       ...
       'ppPointsMa16', 'fanPointsMa16', 'blockedShotsMa16', 'shootingPctMa16',
       'shotsMa16', 'timeOnIceMa16', 'ppTimeOnIceMa16', 'shiftsMa16',
       'timeOnIcePerShiftMa16', 'avgFanPoints'],
      dtype='object', length=101)

In [126]:
impute_by_player = ['assistsMa7', 'goalsMa7', 'plusMinusMa7', 'pointsMa7',
       'ppPointsMa7', 'fanPointsMa7', 'blockedShotsMa7', 'shootingPctMa7',
       'shotsMa7', 'timeOnIceMa7', 'ppTimeOnIceMa7', 'shiftsMa7',
       'timeOnIcePerShiftMa7', 'assistsMa3', 'goalsMa3', 'plusMinusMa3',
       'pointsMa3', 'ppPointsMa3', 'fanPointsMa3', 'blockedShotsMa3',
       'shootingPctMa3', 'shotsMa3', 'timeOnIceMa3', 'ppTimeOnIceMa3',
       'shiftsMa3', 'timeOnIcePerShiftMa3', 'assistsLastGame', 'goalsLastGame',
       'plusMinusLastGame', 'pointsLastGame', 'ppPointsLastGame',
       'fanPointsLastGame', 'blockedShotsLastGame', 'shootingPctLastGame',
       'shotsLastGame', 'timeOnIceLastGame', 'ppTimeOnIceLastGame',
       'shiftsLastGame', 'timeOnIcePerShiftLastGame', 'assistsMa16',
       'goalsMa16', 'plusMinusMa16', 'pointsMa16', 'ppPointsMa16',
       'fanPointsMa16', 'blockedShotsMa16', 'shootingPctMa16', 'shotsMa16',
       'timeOnIceMa16', 'ppTimeOnIceMa16', 'shiftsMa16',
       'timeOnIcePerShiftMa16']

impute_by_goalie = ['savePercLastGame', 'savePercMa3', 'savePercMa7', 'savePercMa16']

impute_by_team = ['goalsPerGameLastGame', 'goalsPerGameMa3',
       'goalsPerGameMa7', 'goalsPerGameMa16', 'shotsPerGameLastGame',
       'shotsPerGameMa3', 'shotsPerGameMa7', 'shotsPerGameMa16']

In [127]:
for col in impute_by_player:
    df_merged[col] = df_merged.groupby('playerId')[col].transform(lambda x: x.fillna(x.mean()))
    
for col in impute_by_goalie:
    df_merged[col] = df_merged.groupby('playerId')[col].transform(lambda x: x.fillna(x.mean()))

    
for col in impute_by_team:
    df_merged[col] = df_merged.groupby('playerId')[col].transform(lambda x: x.fillna(x.mean()))

In [128]:
df_merged.drop('shootingPct', axis=1, inplace=True)

In [129]:
impute_by_perf = ['assistsMa7', 'goalsMa7', 'plusMinusMa7', 'pointsMa7',
       'ppPointsMa7', 'fanPointsMa7', 'blockedShotsMa7', 'shootingPctMa7',
       'shotsMa7', 'timeOnIceMa7', 'ppTimeOnIceMa7', 'shiftsMa7',
       'timeOnIcePerShiftMa7', 'assistsMa3', 'goalsMa3', 'plusMinusMa3',
       'pointsMa3', 'ppPointsMa3', 'fanPointsMa3', 'blockedShotsMa3',
       'shootingPctMa3', 'shotsMa3', 'timeOnIceMa3', 'ppTimeOnIceMa3',
       'shiftsMa3', 'timeOnIcePerShiftMa3', 'assistsLastGame', 'goalsLastGame',
       'plusMinusLastGame', 'pointsLastGame', 'ppPointsLastGame',
       'fanPointsLastGame', 'blockedShotsLastGame', 'shootingPctLastGame',
       'shotsLastGame', 'timeOnIceLastGame', 'ppTimeOnIceLastGame',
       'shiftsLastGame', 'timeOnIcePerShiftLastGame', 'assistsMa16',
       'goalsMa16', 'plusMinusMa16', 'pointsMa16', 'ppPointsMa16',
       'fanPointsMa16', 'blockedShotsMa16', 'shootingPctMa16', 'shotsMa16',
       'timeOnIceMa16', 'ppTimeOnIceMa16', 'shiftsMa16',
       'timeOnIcePerShiftMa16', 'savePercLastGame', 'savePercMa3', 'savePercMa7', 'savePercMa16', 'goalsPerGameLastGame', 'goalsPerGameMa3',
       'goalsPerGameMa7', 'goalsPerGameMa16', 'shotsPerGameLastGame',
       'shotsPerGameMa3', 'shotsPerGameMa7', 'shotsPerGameMa16']

In [130]:
for col in impute_by_perf:
    df_merged[col] = df_merged.groupby('playerId')[col].transform(lambda x: x.fillna(x.mean()))

In [131]:
df_merged.dropna(axis=0, inplace=True)

In [132]:
df_merged['positionCodeCopy'] = df_merged['positionCode'].copy()
df_merged['homeRoadCopy'] = df_merged['homeRoad'].copy()

In [133]:
df_merged = pd.get_dummies(df_merged, columns=['homeRoadCopy', 'positionCodeCopy', 'shootsCatches'])

## Splitting

In [134]:
predictable_df = df_merged[df_merged['gameDate'] == today]

In [135]:
home_df = pd.concat([predictable_df[((predictable_df['homeRoadPerf'] > 0) & (predictable_df['homeRoad'] == 'H'))], predictable_df[(predictable_df['homeRoadPerf'] <= 0) & (predictable_df['homeRoad'] == 'R')]])
away_df = pd.concat([predictable_df[((predictable_df['homeRoadPerf'] > 0) & (predictable_df['homeRoad'] == 'R'))], predictable_df[(predictable_df['homeRoadPerf'] <= 0) & (predictable_df['homeRoad'] == 'H')]])

In [137]:
home_df_d = home_df[home_df['positionCode'] == 'D']
home_df_c = home_df[home_df['positionCode'] == 'C']
home_df_w = home_df[(home_df['positionCode'] == 'L') | (home_df['positionCode'] == 'R') ]

In [138]:
away_df_d = away_df[away_df['positionCode'] == 'D']
away_df_c = away_df[away_df['positionCode'] == 'C']
away_df_w = away_df[(away_df['positionCode'] == 'L') | (away_df['positionCode'] == 'R') ]

In [139]:
home_df_d_good = home_df_d[home_df_d['avgFanPoints'] >= home_df_d['avgFanPoints'].mean()]
home_df_d_bad = home_df_d[home_df_d['avgFanPoints'] < home_df_d['avgFanPoints'].mean()]

home_df_c_good = home_df_c[home_df_c['avgFanPoints'] >= home_df_c['avgFanPoints'].mean()]
home_df_c_bad = home_df_c[home_df_c['avgFanPoints'] < home_df_c['avgFanPoints'].mean()]

home_df_w_good = home_df_w[home_df_w['avgFanPoints'] >= home_df_w['avgFanPoints'].mean()]
home_df_w_bad = home_df_w[home_df_w['avgFanPoints'] < home_df_w['avgFanPoints'].mean()]

In [140]:
away_df_d_good = away_df_d[away_df_d['avgFanPoints'] >= away_df_d['avgFanPoints'].mean()]
away_df_d_bad = away_df_d[away_df_d['avgFanPoints'] < away_df_d['avgFanPoints'].mean()]

away_df_c_good = away_df_c[away_df_c['avgFanPoints'] >= away_df_c['avgFanPoints'].mean()]
away_df_c_bad = away_df_c[away_df_c['avgFanPoints'] < away_df_c['avgFanPoints'].mean()]

away_df_w_good = away_df_w[away_df_w['avgFanPoints'] >= away_df_w['avgFanPoints'].mean()]
away_df_w_bad = away_df_w[away_df_w['avgFanPoints'] < away_df_w['avgFanPoints'].mean()]

## Training

In [141]:
features = ['assistsMa7', 'goalsMa7', 'plusMinusMa7', 'pointsMa7',
       'ppPointsMa7', 'fanPointsMa7', 'blockedShotsMa7', 'shootingPctMa7',
       'shotsMa7', 'timeOnIceMa7', 'ppTimeOnIceMa7', 'shiftsMa7',
       'timeOnIcePerShiftMa7', 'assistsMa3', 'goalsMa3', 'plusMinusMa3',
       'pointsMa3', 'ppPointsMa3', 'fanPointsMa3', 'blockedShotsMa3',
       'shootingPctMa3', 'shotsMa3', 'timeOnIceMa3', 'ppTimeOnIceMa3',
       'shiftsMa3', 'timeOnIcePerShiftMa3', 'assistsLastGame', 'goalsLastGame',
       'plusMinusLastGame', 'pointsLastGame', 'ppPointsLastGame',
       'fanPointsLastGame', 'blockedShotsLastGame', 'shootingPctLastGame',
       'shotsLastGame', 'timeOnIceLastGame', 'ppTimeOnIceLastGame',
       'shiftsLastGame', 'timeOnIcePerShiftLastGame', 'assistsMa16',
       'goalsMa16', 'plusMinusMa16', 'pointsMa16', 'ppPointsMa16',
       'fanPointsMa16', 'blockedShotsMa16', 'shootingPctMa16', 'shotsMa16',
       'timeOnIceMa16', 'ppTimeOnIceMa16', 'shiftsMa16',
       'timeOnIcePerShiftMa16', 'savePercLastGame', 'savePercMa3', 'savePercMa7', 'savePercMa16', 'goalsPerGameLastGame', 'goalsPerGameMa3',
       'goalsPerGameMa7', 'goalsPerGameMa16', 'shotsPerGameLastGame',
       'shotsPerGameMa3', 'shotsPerGameMa7', 'shotsPerGameMa16', 'homeRoadCopy_H']
target = 'fanPoints'

In [142]:
X = home_df_d_good[features].values

In [143]:
model = joblib.load('/Users/nickdimmitt/hockey/models/home_df_d_good.pkl')

In [144]:
predictions = model.predict(X)

In [147]:
home_df_d_good['prediction'] = predictions

In [148]:
X = home_df_d_bad[features].values

In [149]:
model = joblib.load('/Users/nickdimmitt/hockey/models/home_df_d_bad.pkl')

In [150]:
predictions = model.predict(X)

In [151]:
home_df_d_bad['prediction'] = predictions

In [152]:
X = home_df_c_good[features].values

In [153]:
model = joblib.load('/Users/nickdimmitt/hockey/models/home_df_c_good.pkl')

In [154]:
predictions = model.predict(X)

In [155]:
home_df_c_good['prediction'] = predictions

In [156]:
X = home_df_c_bad[features].values

In [157]:
model = joblib.load('/Users/nickdimmitt/hockey/models/home_df_c_bad.pkl')

In [158]:
predictions = model.predict(X)

In [159]:
home_df_c_bad['prediction'] = predictions

In [160]:
X = home_df_w_good[features].values

In [161]:
model = joblib.load('/Users/nickdimmitt/hockey/models/home_df_w_good.pkl')

In [162]:
predictions = model.predict(X)

In [163]:
home_df_w_good['prediction'] = predictions

In [164]:
X = home_df_w_bad[features].values

In [165]:
model = joblib.load('/Users/nickdimmitt/hockey/models/home_df_w_bad.pkl')

In [166]:
predictions = model.predict(X)

In [167]:
home_df_w_bad['prediction'] = predictions

In [168]:
X = away_df_d_good[features].values

In [169]:
model = joblib.load('/Users/nickdimmitt/hockey/models/away_df_d_good.pkl')

In [170]:
predictions = model.predict(X)

In [171]:
away_df_d_good['prediction'] = predictions

In [172]:
X = away_df_d_bad[features].values

In [173]:
model = joblib.load('/Users/nickdimmitt/hockey/models/away_df_d_bad.pkl')

In [174]:
predictions = model.predict(X)

In [175]:
away_df_d_bad['prediction'] = predictions

In [176]:
X = away_df_c_good[features].values

In [177]:
model = joblib.load('/Users/nickdimmitt/hockey/models/away_df_c_good.pkl')

In [178]:
predictions = model.predict(X)

In [179]:
away_df_c_good['prediction'] = predictions

In [180]:
X = away_df_c_bad[features].values

In [181]:
model = joblib.load('/Users/nickdimmitt/hockey/models/away_df_c_bad.pkl')

In [182]:
predictions = model.predict(X)

In [183]:
away_df_c_bad['prediction'] = predictions

In [184]:
X = away_df_w_good[features].values

In [185]:
model = joblib.load('/Users/nickdimmitt/hockey/models/away_df_w_good.pkl')

In [186]:
predictions = model.predict(X)

In [187]:
away_df_w_good['prediction'] = predictions

In [188]:
X = away_df_w_bad[features].values

In [189]:
model = joblib.load('/Users/nickdimmitt/hockey/models/away_df_w_bad.pkl')

In [190]:
predictions = model.predict(X)

In [191]:
away_df_w_bad['prediction'] = predictions

In [192]:
all_pred_df = pd.concat([home_df_d_good, home_df_d_bad, home_df_c_good, home_df_c_bad, home_df_w_good, home_df_w_bad, away_df_d_good, away_df_d_bad, away_df_c_good, away_df_c_bad, away_df_w_good, away_df_w_bad])

In [194]:
final_pred = all_pred_df[['skaterFullName', 'positionCode', 'teamAbbrevMerge', 'prediction']].sort_values(by='prediction', ascending=False)

In [197]:
final_pred.to_csv(f'/Users/nickdimmitt/hockey/predictions/predictions_{today}.csv')