In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import numpy as np
import statsmodels.api as sm
import hockey_scraper
import pickle
import time
import random
import datetime

pd.set_option('display.max_columns', None)

In [120]:
#set date variables
yesterday = (datetime.date.today()- datetime.timedelta(days=1)).strftime('%Y-%m-%d')
today = datetime.date.today().strftime('%Y-%m-%d')
year = 20202021

In [121]:
today

'2021-06-24'

In [None]:
def master_function():
    # bring in goalie game logs 
    infile = open("data/goalie_game_log_df.pickle",'rb')
    goalie_game_log = pickle.load(infile)
    infile.close()
    
    #get goalie stats from prior days games
    get_goalie_data(get_goalie_ids())
    

In [50]:
#game boxscore url
url = 'https://statsapi.web.nhl.com/api/v1/game/{}/boxscore'.format(game_id)

In [None]:
#roster url for future uses 
'https://statsapi.web.nhl.com/api/v1/teams?expand=team.roster&season=20142015'

In [2]:
#schedule url, date format: 2018-01-09

def get_yesterdays_game_ids():
    yesterday = datetime.date.today()- datetime.timedelta(days=1)
    url = 'https://statsapi.web.nhl.com/api/v1/schedule?date={}'.format(yesterday.strftime('%Y-%m-%d'))
    r = requests.get(url)
    y_sched = r.json()

    game_id = []
    for game in y_sched['dates'][0]['games']:
        game_id.append(game['gamePk'])
    
    return game_id

In [3]:
get_yesterdays_game_ids()

[2020030323]

In [73]:
#get goalie ids from yesterdays games
def get_goalie_ids():
    goalie_id = {}
    for game_id in get_yesterdays_game_ids():
        url = 'https://statsapi.web.nhl.com/api/v1/game/{}/boxscore'.format(game_id)
        r = requests.get(url)
        box_score = r.json()
        for e in ['home', 'away']:
            temp_id = []
            temp_id.extend(box_score['teams'][e]['goalies'])
            for pid in temp_id:
                name = box_score['teams'][e]['players']['ID'+str(pid)]['person']['fullName']
                goalie_id[name] = pid
    return goalie_id
    

        
    
    

In [74]:
get_goalie_ids()

{'Semyon Varlamov': 8473575, 'Andrei Vasilevskiy': 8476883}

In [99]:
def get_goalie_data(goalie_ids, start_year =year, end_year = year):
    counter = 0
    for name, gid in goalie_ids.items():

        sequence = [x/10 for x in range(60, 120)]
        time.sleep(random.choice(sequence))
        url = 'https://www.naturalstattrick.com/playerreport.php?fromseason={}&thruseason={}&playerid={}&sit=all&stype=2&stdoi=oi&rate=n&v=g'.format(start_year, end_year, gid)
        #due to number of http requests, NST may ban your IP before the loop finishes. I needed to use a VPN to get around this. If IP gets banned, this function will still return the current DF and you can call the function again and pass in an updated goalie dictionary to get the rest
        try:
            individual_df = pd.read_html(url)[0]
            individual_df['Name'] = name
            individual_df['ID'] = gid

        except:
            print(f'Ended before {name}')
            return all_goalies

        if counter == 0:
            all_goalies = individual_df

        elif counter != 0:
            all_goalies = pd.concat([all_goalies, individual_df])



        counter +=1
    
    return all_goalies

In [76]:
def goalie_features(df, rolling_games = 40):
    rolling_games = rolling_games
    min_games = 10
    df['Date'] = df['Game'].apply(lambda x: pd.to_datetime(x[0:10]))
    df['Team_Key'] = df['Team'].astype(str)+'_'+df['Date'].astype(str)
    
    df['Rolling_TOI'] = df.groupby('ID')['TOI'].transform(lambda x: x.rolling(rolling_games, min_games).sum().shift())
    df['Rolling_FA'] = df.groupby('ID')['FA'].transform(lambda x: x.rolling(rolling_games, min_games).sum().shift())
    df['Rolling_SA'] = df.groupby('ID')['SA'].transform(lambda x: x.rolling(rolling_games, min_games).sum().shift())
    df['Rolling_GA'] = df.groupby('ID')['GA'].transform(lambda x: x.rolling(rolling_games, min_games).sum().shift())
    df['Rolling_xGA'] = df.groupby('ID')['xGA'].transform(lambda x: x.rolling(rolling_games, min_games).sum().shift())
    df['Rolling_HDCA'] = df.groupby('ID')['HDCA'].transform(lambda x: x.rolling(rolling_games, min_games).sum().shift())
    df['Rolling_HDGA'] = df.groupby('ID')['HDGA'].transform(lambda x: x.rolling(rolling_games, min_games).sum().shift())
    
    df['Goalie_FenwickSV%'] =  (df['Rolling_FA'] - df['Rolling_GA']) /  df['Rolling_FA']
    df['Goalie_GSAx'] = df['Rolling_xGA'] - df['Rolling_GA']
    df['Goalie_GSAx/60'] =  df['Goalie_GSAx']*60 /  df['Rolling_TOI']
    df['Goalie_HDCSV%'] = (df['Rolling_HDCA'] - df['Rolling_HDGA'] ) / df['Rolling_HDCA'] 
    return df

In [79]:
infile = open("data/goalie_game_log_df.pickle",'rb')
goalie_game_log = pickle.load(infile)
infile.close()

In [102]:
def update_goalie_logs():
    # bring in goalie game logs 
    infile = open("data/goalie_game_log_df.pickle",'rb')
    goalie_game_log = pickle.load(infile)
    infile.close()
    
    #get goalie stats for goalies that played yesterday
    yesterdays_goalies_df = get_goalie_data(get_goalie_ids())
    yesterdays_goalies_df['Date'] = yesterdays_goalies_df['Game'].apply(lambda x: pd.to_datetime(x[0:10]))
    
    #concact yesterdays goalies stats for just yesterday's games 
    goalie_game_log = pd.concat([goalie_game_log, yesterdays_goalies_df[yesterdays_goalies_df['Date'] == yesterday]])
    #recalculate stats for features
    goalie_game_log = goalie_features(goalie_game_log)
    
    return goalie_game_log

In [103]:
def get_and_format_nst_team_stats(season, sit, rate):
    #dict to convert team names from NST to team abbreviations from NHL API
    nst_to_sched = {'Anaheim Ducks': 'ANA',
                     'Arizona Coyotes': 'ARI',
                     'Boston Bruins': 'BOS',
                     'Buffalo Sabres': 'BUF',
                     'Calgary Flames': 'CGY',
                     'Carolina Hurricanes': 'CAR',
                     'Chicago Blackhawks': 'CHI',
                     'Colorado Avalanche': 'COL',
                     'Columbus Blue Jackets': 'CBJ',
                     'Dallas Stars': 'DAL',
                     'Detroit Red Wings': 'DET',
                     'Edmonton Oilers': 'EDM',
                     'Florida Panthers': 'FLA',
                     'Los Angeles Kings': 'L.A',
                     'Minnesota Wild': 'MIN',
                     'Montreal Canadiens': 'MTL',
                     'Nashville Predators': 'NSH',
                     'New Jersey Devils': 'N.J',
                     'New York Islanders': 'NYI',
                     'New York Rangers': 'NYR',
                     'Ottawa Senators': 'OTT',
                     'Philadelphia Flyers': 'PHI',
                     'Pittsburgh Penguins': 'PIT',
                     'San Jose Sharks': 'S.J',
                     'St Louis Blues': 'STL',
                     'Tampa Bay Lightning': 'T.B',
                     'Toronto Maple Leafs': 'TOR',
                     'Vancouver Canucks': 'VAN',
                     'Vegas Golden Knights': 'VGK',
                     'Washington Capitals': 'WSH',
                     'Winnipeg Jets': 'WPG'}
    #dyanmic URL
    url = 'https://www.naturalstattrick.com/games.php?fromseason={}&thruseason={}&stype=2&sit={}&loc=B&team=All&rate={}'.format(
        season,
        season,
        sit,
        rate)
    #scrape html table from webpage
    df = pd.read_html(url, header=0, index_col = 0, na_values=["-"])[0]
    #reset index
    df.reset_index(inplace = True)
    #format date
    df['Date'] = df['Game'].apply(lambda x: pd.to_datetime(x[0:10]))
    #add team game number
    df['Game_Number'] = df.groupby('Team').cumcount() + 1
    #replcate Team name with team abbreviation
    df = df.replace({'Team': nst_to_sched})
    #add team key to merge with game results df
    df['Team_Key'] = df['Team'].astype(str)+'_'+df['Date'].astype(str)
    return df

In [104]:
#merge 5v5, PP, and PK team game logs from NST
def merge_team_stats(primary_df, pp_df, pk_df):
    primary_df = primary_df.merge(pk_df[['Team_Key', 'TOI', 'xGA', 'GA']], on = 'Team_Key', how = 'left', suffixes = ('','_pk') )
    primary_df = primary_df.merge(pp_df[['Team_Key', 'TOI', 'xGF', 'GF']], on = 'Team_Key', how = 'left', suffixes = ('','_pp') )
    return primary_df

In [105]:
#calculate team features. Number of rolling games used is dynamic to test if different numbers of game work better.
def calculate_team_features(df, rolling_games = 40):

    df[f'sum_rolling{rolling_games}_TOI_5v5'] = df.groupby('Team')['TOI'].transform(lambda x: x.rolling(rolling_games, rolling_games).sum().shift())
    df[f'sum_rolling{rolling_games}_FF_5v5'] = df.groupby('Team')['FF'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'sum_rolling{rolling_games}_FA_5v5'] = df.groupby('Team')['FA'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'sum_rolling{rolling_games}_GF_5v5'] = df.groupby('Team')['GF'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'sum_rolling{rolling_games}_GA_5v5'] = df.groupby('Team')['GA'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'sum_rolling{rolling_games}_xGF_5v5'] = df.groupby('Team')['xGF'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'sum_rolling{rolling_games}_xGA_5v5'] = df.groupby('Team')['xGA'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'sum_rolling{rolling_games}_SF_5v5'] = df.groupby('Team')['SF'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'last_{rolling_games}_FF%_5v5'] = df[f'sum_rolling{rolling_games}_FF_5v5']*100/ (df[f'sum_rolling{rolling_games}_FF_5v5']+df[f'sum_rolling{rolling_games}_FA_5v5'])
    df[f'last_{rolling_games}_GF%_5v5'] = df[f'sum_rolling{rolling_games}_GF_5v5']*100/ (df[f'sum_rolling{rolling_games}_GF_5v5']+df[f'sum_rolling{rolling_games}_GA_5v5'])
    df[f'last_{rolling_games}_xGF%_5v5'] = df[f'sum_rolling{rolling_games}_xGF_5v5']*100/ (df[f'sum_rolling{rolling_games}_xGF_5v5']+df[f'sum_rolling{rolling_games}_xGA_5v5'])
    df[f'last_{rolling_games}_SH%'] = df[f'sum_rolling{rolling_games}_GF_5v5']*100 / df[f'sum_rolling{rolling_games}_SF_5v5']
    
    
    #fix NaNs in pp and pk features. If team wasnt on PP or PK in a game that game is missing from dataframe.
    df['TOI_pp'] = np.where(df['TOI_pp'].isna(), 0, df['TOI_pp'])
    df['TOI_pk'] = np.where(df['TOI_pk'].isna(), 0, df['TOI_pk'])
    df['xGF_pp'] = np.where(df['xGF_pp'].isna(), 0, df['xGF_pp'])
    df['GF_pp']  = np.where(df['GF_pp'].isna(), 0, df['GF_pp'])
    df['xGA_pk'] = np.where(df['xGA_pk'].isna(), 0, df['xGA_pk'])
    df['GA_pk']  = np.where(df['GA_pk'].isna(), 0, df['GA_pk'])
    
    #pp features
    df[f'sum_rolling{rolling_games}_TOI_pp'] = df.groupby('Team')['TOI_pp'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'sum_rolling{rolling_games}_xGF_pp'] = df.groupby('Team')['xGF_pp'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'sum_rolling{rolling_games}_GF_pp'] = df.groupby('Team')['GF_pp'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'last{rolling_games}_pp_TOI_per_game'] = df.groupby('Team')['TOI_pp'].transform(lambda x: x.rolling(rolling_games, rolling_games ).mean().shift())
    df[f'last{rolling_games}_xGF_per_min_pp'] = df[f'sum_rolling{rolling_games}_xGF_pp'] / df[f'sum_rolling{rolling_games}_TOI_pp'] 
    df[f'last{rolling_games}_GF_per_min_pp'] = df[f'sum_rolling{rolling_games}_GF_pp'] / df[f'sum_rolling{rolling_games}_TOI_pp'] 
    
    
    #pk features
    df[f'sum_rolling{rolling_games}_TOI_pk'] = df.groupby('Team')['TOI_pk'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'sum_rolling{rolling_games}_xGA_pk'] = df.groupby('Team')['xGA_pk'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'sum_rolling{rolling_games}_GA_pk'] = df.groupby('Team')['GA_pk'].transform(lambda x: x.rolling(rolling_games, rolling_games ).sum().shift())
    df[f'last{rolling_games}_pk_TOI_per_game'] = df.groupby('Team')['TOI_pk'].transform(lambda x: x.rolling(rolling_games, rolling_games ).mean().shift())
    df[f'last{rolling_games}_xGA_per_min_pk'] = df[f'sum_rolling{rolling_games}_xGA_pk'] / df[f'sum_rolling{rolling_games}_TOI_pk'] 
    df[f'last{rolling_games}_GA_per_min_pk'] = df[f'sum_rolling{rolling_games}_GA_pk'] / df[f'sum_rolling{rolling_games}_TOI_pk'] 
    
    #to get back to back category
    df['Last_Game_Date'] = df.groupby('Team')['Date'].shift()
    df['Days_Since_Last_Game'] = df['Date'] - df['Last_Game_Date']
    df['B2B'] = np.where(df['Days_Since_Last_Game'] == '1 days', 1, 0)
    
    

    
    return df

In [111]:
def get_curent_season_team_stats():
    #scraping team stats from NST
    sequence = [x/10 for x in range(60, 120)]
    time.sleep(random.choice(sequence))

    primarysva = get_and_format_nst_team_stats(year,'sva', 'n')

    time.sleep(random.choice(sequence))

    pp = get_and_format_nst_team_stats(year,'pp', 'n')

    time.sleep(random.choice(sequence))

    pk = get_and_format_nst_team_stats(year,'pk', 'n')
    
    current_df = merge_team_stats(primarysva, pp, pk)
    
    return current_df
    

In [127]:
def get_todays_sched(today = today):
    sched_df = hockey_scraper.scrape_schedule(today,today)
    sched_df['Home_Team_Won'] = np.where(sched_df['home_score'] > sched_df['away_score'], 1, 0)
    #create keys for home and away team in order to import their features
    sched_df['Home_Team_Key'] = sched_df['home_team'].astype(str)+'_'+sched_df['date'].astype(str)
    sched_df['Away_Team_Key'] = sched_df['away_team'].astype(str)+'_'+sched_df['date'].astype(str)
    return sched_df

In [129]:
get_todays_sched()

Scraping the schedule between 2021-06-24 and 2021-06-24


Unnamed: 0,game_id,date,venue,home_team,away_team,start_time,home_score,away_score,status,Home_Team_Won,Home_Team_Key,Away_Team_Key
0,2020030316,2021-06-24,Centre Bell,MTL,VGK,2021-06-25,0,0,Preview,0,MTL_2021-06-24,VGK_2021-06-24


In [113]:
team_stats = pd.read_csv('data/team_stats_all_seasons')

In [131]:
team_stats.head()

Unnamed: 0.1,Unnamed: 0,Game,Team,Unnamed: 2,TOI,CF,CA,CF%,FF,FA,FF%,SF,SA,SF%,GF,GA,GF%,xGF,xGA,xGF%,SCF,SCA,SCF%,HDCF,HDCA,HDCF%,HDSF,HDSA,HDSF%,HDGF,HDGA,HDGF%,HDSH%,HDSV%,MDCF,MDCA,MDCF%,MDSF,MDSA,MDSF%,MDGF,MDGA,MDGF%,MDSH%,MDSV%,LDCF,LDCA,LDCF%,LDSF,LDSA,LDSF%,LDGF,LDGA,LDGF%,LDSH%,LDSV%,SH%,SV%,PDO,Attendance,Date,Game_Number,Team_Key,TOI_pk,xGA_pk,GA_pk,TOI_pp,xGF_pp,GF_pp,sum_rolling40_TOI_5v5,sum_rolling40_FF_5v5,sum_rolling40_FA_5v5,sum_rolling40_GF_5v5,sum_rolling40_GA_5v5,sum_rolling40_xGF_5v5,sum_rolling40_xGA_5v5,sum_rolling40_SF_5v5,last_40_FF%_5v5,last_40_GF%_5v5,last_40_xGF%_5v5,last_40_SH%,sum_rolling40_TOI_pp,sum_rolling40_xGF_pp,sum_rolling40_GF_pp,last40_pp_TOI_per_game,last40_xGF_per_min_pp,last40_GF_per_min_pp,sum_rolling40_TOI_pk,sum_rolling40_xGA_pk,sum_rolling40_GA_pk,last40_pk_TOI_per_game,last40_xGA_per_min_pk,last40_GA_per_min_pk,Last_Game_Date,Days_Since_Last_Game,B2B
0,0,"2016-10-12 - Maple Leafs 4, Senators 5",OTT,Limited ReportFull Report,46.0,33.76,72.36,31.81,29.13,47.78,37.88,20.74,32.65,38.85,3.71,4.18,47.05,1.24,2.21,35.92,13.99,26.84,34.27,5.5,10.76,33.85,3.67,5.33,40.75,0.92,2.12,30.25,25.05,60.28,8.44,16.3,34.12,4.77,4.24,52.93,1.92,1.05,64.62,40.27,75.21,16.81,34.8,32.57,11.31,21.9,34.06,0.95,0.94,50.21,8.37,95.72,17.91,87.19,1.051,17618,2016-10-12,1,OTT_2016-10-12,8.0,0.58,0.0,4.0,0.46,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0
1,1,"2016-10-12 - Maple Leafs 4, Senators 5",TOR,Limited ReportFull Report,46.0,72.36,33.76,68.19,47.78,29.13,62.12,32.65,20.74,61.15,4.18,3.71,52.95,2.21,1.24,64.08,26.84,13.99,65.73,10.76,5.5,66.15,5.33,3.67,59.25,2.12,0.92,69.75,39.72,74.95,16.3,8.44,65.88,4.24,4.77,47.07,1.05,1.92,35.38,24.79,59.73,34.8,16.81,67.43,21.9,11.31,65.94,0.94,0.95,49.79,4.28,91.63,12.81,82.09,0.949,17618,2016-10-12,1,TOR_2016-10-12,4.0,0.46,0.0,8.0,0.58,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0
2,2,"2016-10-12 - Blues 5, Blackhawks 2",CHI,Limited ReportFull Report,43.9,26.55,34.34,43.61,14.59,27.2,34.91,10.56,19.1,35.6,0.94,0.0,100.0,0.8,1.95,29.12,10.62,20.67,33.94,4.75,8.38,36.17,3.79,7.31,34.13,0.94,0.0,100.0,24.86,100.0,5.88,12.23,32.48,1.93,7.12,21.28,0.0,0.0,,0.0,100.0,11.65,10.05,53.68,4.76,4.79,49.82,0.0,0.0,,0.0,100.0,8.94,100.0,1.089,21729,2016-10-12,1,CHI_2016-10-12,6.866667,1.24,3.0,7.116667,0.75,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0
3,3,"2016-10-12 - Blues 5, Blackhawks 2",STL,Limited ReportFull Report,43.9,34.34,26.55,56.39,27.2,14.59,65.09,19.1,10.56,64.4,0.0,0.94,0.0,1.95,0.8,70.88,20.67,10.62,66.06,8.38,4.75,63.83,7.31,3.79,65.87,0.0,0.94,0.0,0.0,75.14,12.23,5.88,67.52,7.12,1.93,78.72,0.0,0.0,,0.0,100.0,10.05,11.65,46.32,4.79,4.76,50.18,0.0,0.0,,0.0,100.0,0.0,91.06,0.911,21729,2016-10-12,1,STL_2016-10-12,7.116667,0.75,1.0,6.866667,1.24,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0
4,4,"2016-10-12 - Flames 4, Oilers 7",CGY,Limited ReportFull Report,40.85,39.9,36.2,52.44,32.94,29.38,52.86,24.54,23.09,51.53,1.01,3.82,20.94,1.46,1.77,45.27,11.7,18.23,39.08,3.98,10.93,26.71,3.93,11.07,26.21,1.01,2.87,26.01,25.69,74.05,7.5,7.31,50.63,6.88,3.02,69.47,0.0,0.95,0.0,0.0,68.64,26.53,15.17,63.61,12.47,7.84,61.4,0.0,0.0,,0.0,100.0,4.12,83.46,0.876,18347,2016-10-12,1,CGY_2016-10-12,11.183333,0.51,1.0,5.25,1.17,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0


In [132]:
def concat_current_to_master_team_features(master_df):
    current_df = get_curent_season_team_stats()
    return pd.concat([master_df, current_df[current_df['Date'] == Yesterday]])

In [None]:
def merge_stats_with_schedule_df(sched_df, goalies_df, team_stats_df, elo_df, feature_columns, goalie_feature_columns):
    goalies_df = goalies_df[goalies_df['TOI'] >=28.5]
    df = game_results_df.merge(goalies_df[goalie_feature_columns].add_prefix('home_'), left_on = 'Home_Team_Key', right_on = 'home_Team_Key', how = 'left').rename(columns ={'home_Name':'home_goalie'}).drop(columns = 'home_Team_Key')
    df = df.merge(goalies_df[goalie_feature_columns].add_prefix('away_'), left_on = 'Away_Team_Key', right_on = 'away_Team_Key', how = 'left').rename(columns ={'away_Name':'away_goalie'}).drop(columns = 'away_Team_Key')
    df = df.merge(features_df[feature_columns].add_prefix('home_'), left_on = 'Home_Team_Key', right_on = 'home_Team_Key', how = 'left')
    df = df.merge(features_df[feature_columns].add_prefix('away_'), left_on = 'Away_Team_Key', right_on = 'away_Team_Key', how = 'left')
    df = df.merge(elo[['elo_Team_Key', 'Rating.A.Pre']].add_prefix('home_'), left_on='Home_Team_Key', right_on='home_elo_Team_Key', how = 'left').drop(columns = 'home_elo_Team_Key')
    df = df.merge(elo[['elo_Team_Key', 'Rating.A.Pre']].add_prefix('away_'), left_on='Away_Team_Key', right_on='away_elo_Team_Key', how= 'left').drop(columns = 'away_elo_Team_Key')
    
    #categorize B2B
    conditions = [((df['home_B2B'] == 0) & (df['away_B2B'] == 0)),
                  ((df['home_B2B'] == 1) & (df['away_B2B'] == 0)),
                  ((df['home_B2B'] == 0) & (df['away_B2B'] == 1)),
                  ((df['home_B2B'] == 1) & (df['away_B2B'] == 1))
                 ]
    
    choices = ['Neither',
               'Home_only',
               'Away_only',
               'Both']

    df['B2B_Status'] = np.select(conditions, choices)
    
    
    #season
    conditions = [((df['date'] >= '2017-10-04') & (df['date'] <= '2018-04-08')),
                  ((df['date'] >= '2018-10-03') & (df['date'] <= '2019-04-06')),
                  ((df['date'] >= '2019-10-02') & (df['date'] <= '2020-03-12')),
                  ((df['date'] >= '2021-01-13') & (df['date'] <= '2021-06-29'))
                 ]
    
    choices = ['2017-2018',
               '2018-2019',
               '2019-2020',
               '2020-2021']

    df['Season'] = np.select(conditions, choices)

    return df)

In [112]:
get_curent_season_team_stats()

Unnamed: 0,Game,Team,Unnamed: 2,TOI,CF,CA,CF%,FF,FA,FF%,SF,SA,SF%,GF,GA,GF%,xGF,xGA,xGF%,SCF,SCA,SCF%,HDCF,HDCA,HDCF%,HDSF,HDSA,HDSF%,HDGF,HDGA,HDGF%,HDSH%,HDSV%,MDCF,MDCA,MDCF%,MDSF,MDSA,MDSF%,MDGF,MDGA,MDGF%,MDSH%,MDSV%,LDCF,LDCA,LDCF%,LDSF,LDSA,LDSF%,LDGF,LDGA,LDGF%,LDSH%,LDSV%,SH%,SV%,PDO,Attendance,Date,Game_Number,Team_Key,TOI_pk,xGA_pk,GA_pk,TOI_pp,xGF_pp,GF_pp
0,"2021-01-13 - Penguins 3, Flyers 6",PHI,Limited ReportFull Report,51.933333,37.46,47.95,43.86,29.37,37.07,44.21,21.57,29.99,41.84,3.86,2.07,65.12,1.88,1.60,54.15,17.59,19.09,47.95,9.56,7.15,57.21,7.71,5.95,56.44,3.84,1.06,78.38,49.75,82.22,8.03,11.80,40.50,1.94,6.91,21.95,0.00,1.06,0.00,0.00,84.68,15.50,23.31,39.94,9.84,15.21,39.28,0.00,0.00,,0.0,100.00,17.90,93.10,1.110,0,2021-01-13,1,PHI_2021-01-13,5.783333,0.28,1.0,2.133333,0.28,2.0
1,"2021-01-13 - Penguins 3, Flyers 6",PIT,Limited ReportFull Report,51.933333,47.95,37.46,56.14,37.07,29.37,55.79,29.99,21.57,58.16,2.07,3.86,34.88,1.60,1.88,45.85,19.09,17.59,52.05,7.15,9.56,42.79,5.95,7.71,43.56,1.06,3.84,21.62,17.78,50.25,11.80,8.03,59.50,6.91,1.94,78.05,1.06,0.00,100.00,15.32,100.00,23.31,15.50,60.06,15.21,9.84,60.72,0.00,0.00,,0.0,100.00,6.90,82.10,0.890,0,2021-01-13,1,PIT_2021-01-13,2.133333,0.28,2.0,5.783333,0.28,1.0
2,"2021-01-13 - Blackhawks 1, Lightning 5",CHI,Limited ReportFull Report,45.583333,30.32,37.17,44.92,21.50,29.03,42.55,17.82,23.51,43.11,0.00,2.97,0.00,1.03,1.88,35.29,18.48,25.28,42.22,1.93,7.20,21.16,1.90,7.30,20.67,0.00,1.97,0.00,0.00,73.06,16.09,18.55,46.44,11.69,9.45,55.31,0.00,0.98,0.00,0.00,89.66,6.95,9.51,42.23,4.29,6.36,40.31,0.00,0.00,,0.0,100.00,0.00,87.37,0.874,0,2021-01-13,1,CHI_2021-01-13,7.350000,0.93,2.0,6.300000,0.49,1.0
3,"2021-01-13 - Blackhawks 1, Lightning 5",T.B,Limited ReportFull Report,45.583333,37.17,30.32,55.08,29.03,21.50,57.45,23.51,17.82,56.89,2.97,0.00,100.00,1.88,1.03,64.71,25.28,18.48,57.78,7.20,1.93,78.84,7.30,1.90,79.33,1.97,0.00,100.00,26.94,100.00,18.55,16.09,53.56,9.45,11.69,44.69,0.98,0.00,100.00,10.34,100.00,9.51,6.95,57.77,6.36,4.29,59.69,0.00,0.00,,0.0,100.00,12.63,100.00,1.126,0,2021-01-13,1,T.B_2021-01-13,6.300000,0.49,1.0,7.350000,0.93,2.0
4,"2021-01-13 - Canadiens 4, Maple Leafs 5",MTL,Limited ReportFull Report,48.600000,50.47,47.93,51.29,35.13,30.63,53.42,23.29,18.73,55.42,2.11,1.85,53.35,1.92,1.62,54.35,27.02,27.68,49.39,9.91,5.32,65.07,5.47,3.51,60.90,1.06,0.92,53.56,19.36,73.84,17.21,22.39,43.46,7.36,8.61,46.06,1.05,0.00,100.00,14.30,100.00,19.23,18.34,51.18,8.42,6.64,55.92,0.00,0.89,0.0,0.0,86.61,9.07,90.14,0.992,0,2021-01-13,1,MTL_2021-01-13,5.983333,1.64,2.0,5.416667,0.88,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1731,"2021-05-08 - Blues 1, Golden Knights 4",VGK,Limited ReportFull Report,44.350000,49.24,31.11,61.28,36.23,21.07,63.23,22.61,11.20,66.87,0.94,0.00,100.00,2.22,1.19,65.12,21.07,18.80,52.84,10.83,3.97,73.18,7.95,1.95,80.31,0.94,0.00,100.00,11.84,100.00,10.31,14.47,41.62,3.99,3.90,50.61,0.00,0.00,,0.00,100.00,20.09,11.21,64.18,7.49,5.26,58.73,0.00,0.00,,0.0,100.00,4.17,100.00,1.042,7567,2021-05-08,56,VGK_2021-05-08,4.416667,0.63,0.0,4.600000,0.64,1.0
1732,"2021-05-08 - Avalanche 3, Kings 2",COL,Limited ReportFull Report,48.750000,41.84,31.85,56.78,30.58,21.48,58.73,21.59,13.52,61.49,2.13,1.90,52.85,1.83,1.06,63.22,18.62,14.25,56.65,7.77,4.39,63.92,6.63,4.40,60.15,0.00,0.93,0.00,0.00,78.86,10.97,9.66,53.15,5.27,4.62,53.32,2.09,1.03,67.05,39.67,77.73,15.30,13.42,53.28,7.50,3.63,67.36,0.00,0.00,,0.0,100.00,9.88,85.93,0.958,2482,2021-05-08,56,COL_2021-05-08,4.000000,0.50,0.0,5.183333,0.81,1.0
1733,"2021-05-08 - Avalanche 3, Kings 2",L.A,Limited ReportFull Report,48.750000,31.85,41.84,43.22,21.48,30.58,41.27,13.52,21.59,38.51,1.90,2.13,47.15,1.06,1.83,36.78,14.25,18.62,43.35,4.39,7.77,36.08,4.40,6.63,39.85,0.93,0.00,100.00,21.14,100.00,9.66,10.97,46.85,4.62,5.27,46.68,1.03,2.09,32.95,22.27,60.33,13.42,15.30,46.72,3.63,7.50,32.64,0.00,0.00,,0.0,100.00,14.07,90.12,1.042,2482,2021-05-08,56,L.A_2021-05-08,5.183333,0.81,1.0,4.000000,0.50,0.0
1734,"2021-05-08 - Coyotes 5, Sharks 4",ARI,Limited ReportFull Report,50.783333,39.08,48.55,44.60,33.02,42.89,43.50,26.60,36.29,42.30,2.07,2.79,42.56,2.82,2.67,51.32,23.01,29.94,43.46,11.72,12.88,47.64,9.49,11.11,46.05,1.06,1.86,36.28,11.16,83.27,11.32,16.96,40.03,8.20,12.41,39.79,0.00,0.96,0.00,0.00,92.26,15.35,16.51,48.18,8.02,10.85,42.51,0.94,0.00,100.0,11.7,100.00,7.77,92.31,1.001,1249,2021-05-08,56,ARI_2021-05-08,4.000000,0.60,0.0,2.116667,0.13,1.0


In [136]:
def get_elo():
    elo = pd.read_csv('https://raw.githubusercontent.com/NeilPaine538/NHL-Player-And-Team-Ratings/master/nhl_elo_historical.csv')
    elo_conversion = {'VEG' : 'VGK', 'NJD': 'N.J',  'SJS': 'S.J', 'TBL' : 'T.B',  'LAK' : 'L.A'}
    elo = elo.replace({'Franch.A': elo_conversion})
    elo['elo_Team_Key'] = elo['Franch.A'].astype(str)+'_'+elo['Date'].astype(str)
    
    return elo

In [133]:
elo = pd.read_csv('https://raw.githubusercontent.com/NeilPaine538/NHL-Player-And-Team-Ratings/master/nhl_elo_historical.csv')

In [135]:
elo.tail()

Unnamed: 0,Game.ID,Date,Year,Team.A,Franch.A,Rating.A.Pre,Rating.A.Post,Goals.A,Team.B,Franch.B,Rating.B.Pre,Rating.B.Post,Goals.B,OT,Playoff,Neutral,Home,Win,Margin,Elo_diff,HomeIce,Forecast,Delta,Multiplier,Shift
126625,202100625TBL,2021-06-25,2021,New York Islanders,NYI,1568.43,1565.74,0.0,Tampa Bay Lightning,TBL,1609.62,1612.31,1.0,,1,0,0,0.0,-1,-91.19,-1,37.2%,-0.372,0.8048,-2.6923
126626,202100628TBL,2021-06-28,2021,Tampa Bay Lightning,TBL,1612.31,1617.08,5.0,Montreal Canadiens,MTL,1520.12,1515.35,1.0,,1,0,1,1.0,4,142.19,1,69.4%,0.306,1.7317,4.7704
126627,202100628TBL,2021-06-28,2021,Montreal Canadiens,MTL,1520.12,1515.35,1.0,Tampa Bay Lightning,TBL,1612.31,1617.08,5.0,,1,0,0,0.0,-4,-142.19,-1,30.6%,-0.306,1.7317,-4.7704
126628,202100630TBL,2021-06-30,2021,Tampa Bay Lightning,TBL,1617.08,1620.44,3.0,Montreal Canadiens,MTL,1515.35,1511.99,1.0,,1,0,1,1.0,2,151.73,1,70.5%,0.295,1.2682,3.362
126629,202100630TBL,2021-06-30,2021,Montreal Canadiens,MTL,1515.35,1511.99,1.0,Tampa Bay Lightning,TBL,1617.08,1620.44,3.0,,1,0,0,0.0,-2,-151.73,-1,29.5%,-0.295,1.2682,-3.362


In [251]:
def scrape_expected_starters():
    starters = []
    #expected starts courtesy of goaliepost.com
    url = 'https://goaliepost.com/'
    req = requests.get(url)
    soup = BeautifulSoup(req.content)
    body = list(soup.children)[1]
    for i in range(len(body.find_all('span',{'class':'starterName'}))):
        starters.append(body.find_all('span',{'class':'starterName'})[i].text)
    return starters
        

In [252]:
scrape_expected_starters()

['Andrei Vasilevskiy', 'Carey Price']

In [339]:
def active_goalie_and_team_dict():
    url = 'http://statsapi.web.nhl.com/api/v1/teams?expand=team.roster'
    r = requests.get(url)
    players = r.json()
    goalie_to_team = {}
    for team in players['teams']:
        for player in team['roster']['roster']:
            if player['position']['code'] == 'G':
                goalie_to_team[player['person']['fullName']] = team['abbreviation']

    return goalie_to_team

In [340]:
active_goalie_and_team_dict()

KeyError: 'roster'

In [342]:
url = 'http://statsapi.web.nhl.com/api/v1/teams?expand=team.roster'
r = requests.get(url)
players = r.json()



In [270]:
if  players['teams'][0]['roster']['roster'][0]['position']['code'] == 'G'
    dict[player['person']['fullName']]

'D'

In [284]:
players['teams'][0]

{'id': 1,
 'name': 'New Jersey Devils',
 'link': '/api/v1/teams/1',
 'venue': {'name': 'Prudential Center',
  'link': '/api/v1/venues/null',
  'city': 'Newark',
  'timeZone': {'id': 'America/New_York', 'offset': -4, 'tz': 'EDT'}},
 'abbreviation': 'NJD',
 'teamName': 'Devils',
 'locationName': 'New Jersey',
 'firstYearOfPlay': '1982',
 'division': {'id': 25,
  'name': 'MassMutual East',
  'link': '/api/v1/divisions/25'},
 'conference': {'id': 6, 'name': 'Eastern', 'link': '/api/v1/conferences/6'},
 'franchise': {'franchiseId': 23,
  'teamName': 'Devils',
  'link': '/api/v1/franchises/23'},
 'roster': {'roster': [{'person': {'id': 8477972,
     'fullName': 'Josh Jacobs',
     'link': '/api/v1/people/8477972'},
    'jerseyNumber': '40',
    'position': {'code': 'D',
     'name': 'Defenseman',
     'type': 'Defenseman',
     'abbreviation': 'D'}},
   {'person': {'id': 8478421,
     'fullName': 'A.J. Greer',
     'link': '/api/v1/people/8478421'},
    'jerseyNumber': '42',
    'position': 

In [273]:
players['teams'][0]['abbreviation']

'NJD'

In [346]:
players['teams'][2]['roster']['roster'][5]['position']['code']

'C'

In [345]:
players['teams'][2]['roster']['roster'][5]['person']['fullName']

'Ryan Strome'

In [347]:
goalie_to_team = {}
for team in players['teams']:
    for player in team['roster']['roster']:
        if player['position']['code'] == 'G':

            goalie_to_team[player['person']['fullName']] = team['abbreviation']
            
        
        

KeyError: 'roster'

In [334]:
goalie_to_team

{'Scott Wedgewood': 'NJD',
 'Aaron Dell': 'NJD',
 'Mackenzie Blackwood': 'NJD',
 'Cory Schneider': 'NYI',
 'Semyon Varlamov': 'NYI',
 'Ilya Sorokin': 'NYI',
 'Ken Appleby': 'NYI',
 'Keith Kinkaid': 'NYR',
 'Igor Shesterkin': 'NYR',
 'Alexandar Georgiev': 'NYR',
 'Brian Elliott': 'PHI',
 'Alex Lyon': 'PHI',
 'Carter Hart': 'PHI',
 'Maxime Lagace': 'PIT',
 'Tristan Jarry': 'PIT',
 'Casey DeSmith': 'PIT',
 "Alex D'Orio": 'PIT',
 'Jaroslav Halak': 'BOS',
 'Tuukka Rask': 'BOS',
 'Dan Vladar': 'BOS',
 'Callum Booth': 'BOS',
 'Jeremy Swayman': 'BOS',
 'Dustin Tokarski': 'BUF',
 'Carter Hutton': 'BUF',
 'Linus Ullmark': 'BUF',
 'Ukko-Pekka Luukkonen': 'BUF',
 'Stefanos Lekkas': 'BUF',
 'Carey Price': 'MTL',
 'Jake Allen': 'MTL',
 'Charlie Lindgren': 'MTL',
 'Joey Daccord': 'OTT',
 'Anton Forsberg': 'OTT',
 'Matt Murray': 'OTT',
 'Marcus Hogberg': 'OTT',
 'Michael Hutchinson': 'TOR',
 'Jack Campbell': 'TOR',
 'Frederik Andersen': 'TOR',
 'Joseph Woll': 'TOR',
 'David Rittich': 'TOR',
 'Veini Ve