# PART 4: Current Data Retreival/ETL

## Get Data for last 5 days

In [89]:
# imports
import pandas as pd
import requests

In [90]:
# simulate login request to fantasydata
frames = []
session_requests = requests.session()

headers = {
    'authority': 'fantasydata.com',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-language': 'en-US,en;q=0.9',
    'cache-control': 'max-age=0',
    'origin': 'https://fantasydata.com',
    'referer': 'https://fantasydata.com/user/login',
}

data = {
    'Email': null,
    'Password': null,
    'RedirectUri': '',
    'secondary_email': '',
}

response = session_requests.post('https://fantasydata.com/user/login', headers=headers, data=data)


url = 'https://fantasydata.com/NHL_FantasyStats/FantasyStats_Read'
headers = {
        'accept': 'application/json, text/javascript, */*; q=0.01',
        'accept-encoding': 'gzip, deflate'
    }

# get data
positions = {'5' : 'G', '1' : 'C', '6' : 'D', '2' : 'LW', '3' : 'RW', '4' : 'C_W'}
seasons = list(range(2022, 2023, 1))

from datetime import date, timedelta

# get data
frames = []
for season in seasons:
    sdate = date.today()-timedelta(5)   # start date
    edate = date.today()   # end date
    dates = pd.date_range(sdate,edate-timedelta(days=1),freq='d').tolist()
    for i in range(len(positions)):
        for dateobj in dates:
            month = str(dateobj.month)
            day = str(dateobj.day)
            year = str(dateobj.year)
            if len(month) == 1:
                month = '0' + month
            if len(day) == 1:
                day = '0' + day
            myTuple = month, day, year
            datestr = ('-').join(myTuple)
            formdata = {
                        'sort': 'FantasyPoints-desc',
                        'pageSize': '300',
                        'filters.position': list(positions.keys())[i],
                        'filters.season': str(season),
                        'filters.seasontype': '1',
                        'filters.scope': '2',
                        'filters.seasontype' : '1',
                        'filters.date' : datestr,
                        'filters.conference': 1
                        }
            resp = session_requests.get(url, headers = headers, data = formdata) #Mimic Web Browser Request To Gather Data
            
            data = resp.json() #Grab Data In JSON format
            df = pd.json_normalize(data, record_path=['Data']) #Transform Json to dataframe
            if len(df) > 0:
                df['Date'] = dateobj
                frames.append(df)
                print('Got data for', list(positions.values())[i], 'on', datestr)
df_all = pd.concat(frames, axis=0)
df_all.reset_index(drop=True, inplace=True)
df_all.to_csv('player_stats_recent.csv')

Got data for G on 01-23-2023
Got data for G on 01-24-2023
Got data for G on 01-25-2023
Got data for G on 01-26-2023
Got data for G on 01-27-2023
Got data for C on 01-23-2023
Got data for C on 01-24-2023
Got data for C on 01-25-2023
Got data for C on 01-26-2023
Got data for C on 01-27-2023
Got data for D on 01-23-2023
Got data for D on 01-24-2023
Got data for D on 01-25-2023
Got data for D on 01-26-2023
Got data for D on 01-27-2023
Got data for LW on 01-23-2023
Got data for LW on 01-24-2023
Got data for LW on 01-25-2023
Got data for LW on 01-26-2023
Got data for LW on 01-27-2023
Got data for RW on 01-23-2023
Got data for RW on 01-24-2023
Got data for RW on 01-25-2023
Got data for RW on 01-26-2023
Got data for RW on 01-27-2023
Got data for C_W on 01-23-2023
Got data for C_W on 01-24-2023
Got data for C_W on 01-25-2023
Got data for C_W on 01-26-2023
Got data for C_W on 01-27-2023


## Merge recent data with data for whole season

In [91]:
# read in dataframes with year to date stats and stats retrieved in previous step
YTD_df = pd.read_csv('player_stats_YTD.csv', index_col = 0)
recent_df = pd.read_csv('player_stats_recent.csv', index_col = 0)

#combine YTD and recent dataframes
frames = [YTD_df, recent_df]
df_all = pd.concat(frames, axis=0)

# drop overlapping entries
df_all = df_all.drop_duplicates(subset = ['Name', 'Date'])

In [92]:
# write to csv
df_all.to_csv('player_stats_YTD.csv')
df_all.to_csv('player_stats_current.csv')

## Current Data ETL

In [93]:
#Load Our Modules & read in data set
import os 
import pandas as pd
import numpy as np
import datetime as dt

working_directory = 'D:/machine_learning/DFS/NHL'
os.chdir(working_directory)
data_dir = 'CurrentData/' #Where is your data located?
etl_dir = 'CurrentData/ETL/' #Where is your output data going?
player_stats = pd.read_csv('player_stats_current.csv', index_col = 0) #Read In Our Main Dataset

# convert date from object dtype to datetime
player_stats['Date'] = pd.to_datetime(player_stats['Date'])

# get month and year from datetime type date
player_stats['Month'], player_stats['Year'] = player_stats['Date'].dt.month, player_stats['Date'].dt.year

# convert date from datetime to string
player_stats['Date'] = player_stats['Date'].dt.strftime('%Y%m%d')

# create a new column and use np.select to assign values to it using our lists as arguments
player_stats['Season'] = 2022

# display updated DataFrame
display(player_stats)

Unnamed: 0,StatID,TeamID,PlayerID,Name,Team,Position,Games,Started,Season,Goals,...,Game.Quarter,Game.Status,Game.IsOver,Game.TimeRemainingMinutes,Game.TimeRemainingSeconds,Game.DateTime,Game.TimeRemaining,Game.QuarterDisplay,Month,Year
0,2650647,16,30002616,Elvis Merzlikins,CBJ,G,1,0,2022,0.0,...,,Final,True,,,/Date(1664665200000)/,,F,10,2022
1,2650135,2,30003673,Ukko-Pekka Luukkonen,BUF,G,1,0,2022,0.0,...,,Final,True,,,/Date(1664643600000)/,,F,10,2022
2,2650225,8,30004494,Spencer Knight,FLA,G,1,0,2022,0.0,...,,Final,True,,,/Date(1664643600000)/,,F,10,2022
3,2650725,25,30003729,Stuart Skinner,EDM,G,1,0,2022,0.0,...,,F/SO,True,,,/Date(1664668800000)/,,F/SO,10,2022
4,2650080,14,30000392,Tristan Jarry,PIT,G,1,0,2022,0.0,...,,Final,True,,,/Date(1664643600000)/,,F,10,2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1665,2760430,24,30004943,Walker Duehr,CGY,RW,0,0,2022,0.0,...,,Final,True,,,/Date(1674874800000)/,,F,1,2023
1666,2760374,16,30003267,Mathieu Olivier,CBJ,RW,1,0,2022,0.0,...,,Final,True,,,/Date(1674874800000)/,,F,1,2023
1667,2760104,3,30004111,Jonatan Berggren,DET,RW,1,0,2022,0.0,...,,Final,True,,,/Date(1674864000000)/,,F,1,2023
1668,2760206,9,30000331,Jesper Fast,CAR,RW,1,0,2022,0.0,...,,F/OT,True,,,/Date(1674864000000)/,,F/OT,1,2023


In [94]:
""" C ETL """
c_yr_dfs = []
for yr in range(2022, 2023): 
    print(yr)
    # filter to current year and C position
    yr_c = player_stats[(player_stats['Season']==yr) & (player_stats['Position']=='C')].copy().reset_index(drop=True)
    yr_c = yr_c.drop_duplicates(subset = ['Name', 'Date'])
    yr_c.drop(['GoaltendingWins', 'GoaltendingLosses', 'GoaltendingOvertimeLosses', 'GoaltendingShotsAgainst', 'GoaltendingGoalsAgainst', 'GoaltendingSaves', 'GoaltendingShutouts',  'GoaltendingGoalsAgainstAverage', 'GoaltendingSavePercentage', 'GoaltendingMinutes'], axis = 1, inplace = True)

    # sort by players & week
    yr_c.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

    # aggregate stats for last 3 games (sums and means)
    # games
    yr_c['GM3'] = yr_c.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['GM_pg3'] = yr_c.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goals
    yr_c['G3'] = yr_c.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['G_pg3'] = yr_c.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # assists
    yr_c['A3'] = yr_c.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['A_pg3'] = yr_c.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # points
    yr_c['PTS3'] = yr_c.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['PTS_pg3'] = yr_c.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # plusminus
    yr_c['plusminus3'] = yr_c.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['plusminus_pg3'] = yr_c.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Hat Tricks
    yr_c['HT3'] = yr_c.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['HT_pg3'] = yr_c.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Penalty Minutes
    yr_c['PIM3'] = yr_c.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['PIM_pg3'] = yr_c.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # PPG
    yr_c['PPG3'] = yr_c.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['PPG_pg3'] = yr_c.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SHG
    yr_c['SHG3'] = yr_c.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['SHG_pg3'] = yr_c.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SOG
    yr_c['SOG3'] = yr_c.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['SOG_pg3'] = yr_c.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # BLocks
    yr_c['BLK3'] = yr_c.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['BLK_pg3'] = yr_c.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # FP
    yr_c['FP3'] = yr_c.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['FP_pg3'] = yr_c.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    
    # aggregate stats for season
    # games
    yr_c['GM'] = yr_c.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    # goals
    yr_c['G'] = yr_c.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['G_pg'] = yr_c.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # assists
    yr_c['A'] = yr_c.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['A_pg'] = yr_c.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # points
    yr_c['PTS'] = yr_c.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['PTS_pg'] = yr_c.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # plusminus
    yr_c['PM'] = yr_c.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['PM_pg'] = yr_c.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Hat Tricks
    yr_c['HT'] = yr_c.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['HT_pg'] = yr_c.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Penalty Minutes
    yr_c['PIM'] = yr_c.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['PIM_pg'] = yr_c.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # PPG
    yr_c['PPG'] = yr_c.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['PPG_pg'] = yr_c.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SHG
    yr_c['SHG3'] = yr_c.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['SHG_pg'] = yr_c.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SOG
    yr_c['SOG'] = yr_c.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['SOG_pg'] = yr_c.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # BLocks
    yr_c['BLK'] = yr_c.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['BLK_pg'] = yr_c.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # FP
    yr_c['FP'] = yr_c.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['FP_pg'] = yr_c.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    
    yr_c = yr_c.fillna(0)
    
    # DEFENSE
    # aggregate stats for last 3 games (sums and means)
    # goals
    yr_c['def_G3'] = yr_c.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['def_G_pg3'] = yr_c.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # assists
    yr_c['def_A3'] = yr_c.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['def_A_pg3'] = yr_c.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # points
    yr_c['def_PTS3'] = yr_c.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['def_PTS_pg3'] = yr_c.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # plusminus
    yr_c['def_plusminus3'] = yr_c.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['def_plusminus_pg3'] = yr_c.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Hat Tricks
    yr_c['def_HT3'] = yr_c.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['def_HT_pg3'] = yr_c.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Penalty Minutes
    yr_c['def_PIM3'] = yr_c.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['def_PIM_pg3'] = yr_c.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # PPG
    yr_c['def_PPG3'] = yr_c.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['def_PPG_pg3'] = yr_c.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SHG
    yr_c['def_SHG3'] = yr_c.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['def_SHG_pg3'] = yr_c.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SOG
    yr_c['def_SOG3'] = yr_c.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['def_SOG_pg3'] = yr_c.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # BLocks
    yr_c['def_BLK3'] = yr_c.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['def_BLK_pg3'] = yr_c.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # FP
    yr_c['def_FP3'] = yr_c.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_c['def_FP_pg3'] = yr_c.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    
    # aggregate stats for season
    # goals
    yr_c['def_G'] = yr_c.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['def_G_pg'] = yr_c.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # assists
    yr_c['def_A'] = yr_c.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['def_A_pg'] = yr_c.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # points
    yr_c['def_PTS'] = yr_c.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['def_PTS_pg'] = yr_c.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # plusminus
    yr_c['def_PM'] = yr_c.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['def_PM_pg'] = yr_c.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Hat Tricks
    yr_c['def_HT'] = yr_c.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['def_HT_pg'] = yr_c.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Penalty Minutes
    yr_c['def_PIM'] = yr_c.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['def_PIM_pg'] = yr_c.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # PPG
    yr_c['def_PPG'] = yr_c.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['def_PPG_pg'] = yr_c.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SHG
    yr_c['def_SHG3'] = yr_c.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['def_SHG_pg'] = yr_c.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SOG
    yr_c['def_SOG'] = yr_c.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['def_SOG_pg'] = yr_c.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # BLocks
    yr_c['def_BLK'] = yr_c.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['def_BLK_pg'] = yr_c.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # FP
    yr_c['def_FP'] = yr_c.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_c['def_FP_pg'] = yr_c.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    
    yr_c = yr_c.fillna(0)
    
    c_yr_dfs.append(yr_c)

    
c_stats = pd.concat(c_yr_dfs).drop_duplicates().reset_index(drop=True)
c_stats_all = c_stats.drop_duplicates(subset = ['Name', 'Date'])
c_stats_all.reset_index(drop=True, inplace = True)
c_stats_all.to_csv(etl_dir + 'c_stats.csv')
print('Done.')
    

2022
Done.


In [95]:
""" W ETL """
w_yr_dfs = []
for yr in range(2022, 2023): 
    print(yr)
    # filter to current year and PG position
    yr_w = player_stats[(player_stats['Season']==yr) & (player_stats['Position']=='LW') | (player_stats['Season']==yr) & (player_stats['Position']=='RW')].copy().reset_index(drop=True)
    yr_w = yr_w.drop_duplicates(subset = ['Name', 'Date'])
    yr_w.drop(['GoaltendingWins', 'GoaltendingLosses', 'GoaltendingOvertimeLosses', 'GoaltendingShotsAgainst', 'GoaltendingGoalsAgainst', 'GoaltendingSaves', 'GoaltendingShutouts',  'GoaltendingGoalsAgainstAverage', 'GoaltendingSavePercentage', 'GoaltendingMinutes'], axis = 1, inplace = True)

    # sort by players & week
    yr_w.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

    # aggregate stats for last 3 games (sums and means)
    # games
    yr_w['GM3'] = yr_w.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['GM_pg3'] = yr_w.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goals
    yr_w['G3'] = yr_w.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['G_pg3'] = yr_w.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # assists
    yr_w['A3'] = yr_w.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['A_pg3'] = yr_w.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # points
    yr_w['PTS3'] = yr_w.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['PTS_pg3'] = yr_w.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # plusminus
    yr_w['plusminus3'] = yr_w.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['plusminus_pg3'] = yr_w.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Hat Tricks
    yr_w['HT3'] = yr_w.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['HT_pg3'] = yr_w.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Penalty Minutes
    yr_w['PIM3'] = yr_w.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['PIM_pg3'] = yr_w.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # PPG
    yr_w['PPG3'] = yr_w.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['PPG_pg3'] = yr_w.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SHG
    yr_w['SHG3'] = yr_w.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['SHG_pg3'] = yr_w.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SOG
    yr_w['SOG3'] = yr_w.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['SOG_pg3'] = yr_w.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # BLocks
    yr_w['BLK3'] = yr_w.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['BLK_pg3'] = yr_w.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # FP
    yr_w['FP3'] = yr_w.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['FP_pg3'] = yr_w.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    
    # aggregate stats for season
    # games
    yr_w['GM'] = yr_w.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    # goals
    yr_w['G'] = yr_w.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['G_pg'] = yr_w.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # assists
    yr_w['A'] = yr_w.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['A_pg'] = yr_w.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # points
    yr_w['PTS'] = yr_w.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['PTS_pg'] = yr_w.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # plusminus
    yr_w['PM'] = yr_w.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['PM_pg'] = yr_w.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Hat Tricks
    yr_w['HT'] = yr_w.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['HT_pg'] = yr_w.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Penalty Minutes
    yr_w['PIM'] = yr_w.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['PIM_pg'] = yr_w.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # PPG
    yr_w['PPG'] = yr_w.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['PPG_pg'] = yr_w.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SHG
    yr_w['SHG3'] = yr_w.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['SHG_pg'] = yr_w.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SOG
    yr_w['SOG'] = yr_w.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['SOG_pg'] = yr_w.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # BLocks
    yr_w['BLK'] = yr_w.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['BLK_pg'] = yr_w.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # FP
    yr_w['FP'] = yr_w.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['FP_pg'] = yr_w.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    
    yr_w = yr_w.fillna(0)
    
    # DEFENSE
    # aggregate stats for last 3 games (sums and means)
    # goals
    yr_w['def_G3'] = yr_w.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['def_G_pg3'] = yr_w.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # assists
    yr_w['def_A3'] = yr_w.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['def_A_pg3'] = yr_w.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # points
    yr_w['def_PTS3'] = yr_w.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['def_PTS_pg3'] = yr_w.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # plusminus
    yr_w['def_plusminus3'] = yr_w.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['def_plusminus_pg3'] = yr_w.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Hat Tricks
    yr_w['def_HT3'] = yr_w.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['def_HT_pg3'] = yr_w.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Penalty Minutes
    yr_w['def_PIM3'] = yr_w.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['def_PIM_pg3'] = yr_w.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # PPG
    yr_w['def_PPG3'] = yr_w.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['def_PPG_pg3'] = yr_w.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SHG
    yr_w['def_SHG3'] = yr_w.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['def_SHG_pg3'] = yr_w.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SOG
    yr_w['def_SOG3'] = yr_w.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['def_SOG_pg3'] = yr_w.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # BLocks
    yr_w['def_BLK3'] = yr_w.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['def_BLK_pg3'] = yr_w.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # FP
    yr_w['def_FP3'] = yr_w.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_w['def_FP_pg3'] = yr_w.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    
    # aggregate stats for season
    # goals
    yr_w['def_G'] = yr_w.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['def_G_pg'] = yr_w.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # assists
    yr_w['def_A'] = yr_w.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['def_A_pg'] = yr_w.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # points
    yr_w['def_PTS'] = yr_w.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['def_PTS_pg'] = yr_w.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # plusminus
    yr_w['def_PM'] = yr_w.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['def_PM_pg'] = yr_w.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Hat Tricks
    yr_w['def_HT'] = yr_w.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['def_HT_pg'] = yr_w.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Penalty Minutes
    yr_w['def_PIM'] = yr_w.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['def_PIM_pg'] = yr_w.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # PPG
    yr_w['def_PPG'] = yr_w.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['def_PPG_pg'] = yr_w.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SHG
    yr_w['def_SHG3'] = yr_w.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['def_SHG_pg'] = yr_w.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SOG
    yr_w['def_SOG'] = yr_w.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['def_SOG_pg'] = yr_w.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # BLocks
    yr_w['def_BLK'] = yr_w.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['def_BLK_pg'] = yr_w.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # FP
    yr_w['def_FP'] = yr_w.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_w['def_FP_pg'] = yr_w.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    
    yr_w = yr_w.fillna(0)
    
    w_yr_dfs.append(yr_w)

    
w_stats = pd.concat(w_yr_dfs).drop_duplicates().reset_index(drop=True)
w_stats_all = w_stats.drop_duplicates(subset = ['Name', 'Date'])
w_stats_all.reset_index(drop=True, inplace = True)
w_stats_all.to_csv(etl_dir + 'w_stats.csv')
print('Done.')
    

2022
Done.


In [96]:
""" D ETL """
d_yr_dfs = []
for yr in range(2022, 2023): 
    print(yr)
    # filter to current year and PG position
    yr_d = player_stats[(player_stats['Season']==yr) & (player_stats['Position']=='D')].copy().reset_index(drop=True)
    yr_d = yr_d.drop_duplicates(subset = ['Name', 'Date'])
    yr_d.drop(['GoaltendingWins', 'GoaltendingLosses', 'GoaltendingOvertimeLosses', 'GoaltendingShotsAgainst', 'GoaltendingGoalsAgainst', 'GoaltendingSaves', 'GoaltendingShutouts',  'GoaltendingGoalsAgainstAverage', 'GoaltendingSavePercentage', 'GoaltendingMinutes'], axis = 1, inplace = True)

    # sort by players & week
    yr_d.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

    # aggregate stats for last 3 games (sums and means)
    # games
    yr_d['GM3'] = yr_d.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['GM_pg3'] = yr_d.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goals
    yr_d['G3'] = yr_d.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['G_pg3'] = yr_d.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # assists
    yr_d['A3'] = yr_d.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['A_pg3'] = yr_d.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # points
    yr_d['PTS3'] = yr_d.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['PTS_pg3'] = yr_d.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # plusminus
    yr_d['plusminus3'] = yr_d.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['plusminus_pg3'] = yr_d.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Hat Tricks
    yr_d['HT3'] = yr_d.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['HT_pg3'] = yr_d.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Penalty Minutes
    yr_d['PIM3'] = yr_d.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['PIM_pg3'] = yr_d.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # PPG
    yr_d['PPG3'] = yr_d.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['PPG_pg3'] = yr_d.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SHG
    yr_d['SHG3'] = yr_d.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['SHG_pg3'] = yr_d.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SOG
    yr_d['SOG3'] = yr_d.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['SOG_pg3'] = yr_d.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # BLocks
    yr_d['BLK3'] = yr_d.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['BLK_pg3'] = yr_d.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # FP
    yr_d['FP3'] = yr_d.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['FP_pg3'] = yr_d.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    
    # aggregate stats for season
    # games
    yr_d['GM'] = yr_d.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    # goals
    yr_d['G'] = yr_d.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['G_pg'] = yr_d.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # assists
    yr_d['A'] = yr_d.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['A_pg'] = yr_d.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # points
    yr_d['PTS'] = yr_d.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['PTS_pg'] = yr_d.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # plusminus
    yr_d['PM'] = yr_d.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['PM_pg'] = yr_d.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Hat Tricks
    yr_d['HT'] = yr_d.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['HT_pg'] = yr_d.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Penalty Minutes
    yr_d['PIM'] = yr_d.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['PIM_pg'] = yr_d.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # PPG
    yr_d['PPG'] = yr_d.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['PPG_pg'] = yr_d.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SHG
    yr_d['SHG3'] = yr_d.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['SHG_pg'] = yr_d.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SOG
    yr_d['SOG'] = yr_d.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['SOG_pg'] = yr_d.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # BLocks
    yr_d['BLK'] = yr_d.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['BLK_pg'] = yr_d.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # FP
    yr_d['FP'] = yr_d.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['FP_pg'] = yr_d.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    
    yr_d = yr_d.fillna(0)
    
    # DEFENSE
    # aggregate stats for last 3 games (sums and means)
    # goals
    yr_d['def_G3'] = yr_d.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['def_G_pg3'] = yr_d.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # assists
    yr_d['def_A3'] = yr_d.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['def_A_pg3'] = yr_d.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # points
    yr_d['def_PTS3'] = yr_d.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['def_PTS_pg3'] = yr_d.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # plusminus
    yr_d['def_plusminus3'] = yr_d.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['def_plusminus_pg3'] = yr_d.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Hat Tricks
    yr_d['def_HT3'] = yr_d.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['def_HT_pg3'] = yr_d.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # Penalty Minutes
    yr_d['def_PIM3'] = yr_d.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['def_PIM_pg3'] = yr_d.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # PPG
    yr_d['def_PPG3'] = yr_d.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['def_PPG_pg3'] = yr_d.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SHG
    yr_d['def_SHG3'] = yr_d.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['def_SHG_pg3'] = yr_d.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # SOG
    yr_d['def_SOG3'] = yr_d.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['def_SOG_pg3'] = yr_d.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # BLocks
    yr_d['def_BLK3'] = yr_d.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['def_BLK_pg3'] = yr_d.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # FP
    yr_d['def_FP3'] = yr_d.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_d['def_FP_pg3'] = yr_d.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    
    # aggregate stats for season
    # goals
    yr_d['def_G'] = yr_d.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['def_G_pg'] = yr_d.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # assists
    yr_d['def_A'] = yr_d.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['def_A_pg'] = yr_d.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # points
    yr_d['def_PTS'] = yr_d.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['def_PTS_pg'] = yr_d.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # plusminus
    yr_d['def_PM'] = yr_d.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['def_PM_pg'] = yr_d.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Hat Tricks
    yr_d['def_HT'] = yr_d.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['def_HT_pg'] = yr_d.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # Penalty Minutes
    yr_d['def_PIM'] = yr_d.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['def_PIM_pg'] = yr_d.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # PPG
    yr_d['def_PPG'] = yr_d.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['def_PPG_pg'] = yr_d.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SHG
    yr_d['def_SHG3'] = yr_d.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['def_SHG_pg'] = yr_d.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # SOG
    yr_d['def_SOG'] = yr_d.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['def_SOG_pg'] = yr_d.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # BLocks
    yr_d['def_BLK'] = yr_d.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['def_BLK_pg'] = yr_d.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # FP
    yr_d['def_FP'] = yr_d.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_d['def_FP_pg'] = yr_d.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    
    yr_d = yr_d.fillna(0)
    
    d_yr_dfs.append(yr_d)

    
d_stats = pd.concat(d_yr_dfs).drop_duplicates().reset_index(drop=True)
d_stats_all = d_stats.drop_duplicates(subset = ['Name', 'Date'])
d_stats_all.reset_index(drop=True, inplace = True)
d_stats_all.to_csv(etl_dir + 'd_stats.csv')
print('Done.')
    

2022
Done.


In [97]:
d_stats_all

Unnamed: 0,StatID,TeamID,PlayerID,Name,Team,Position,Games,Started,Season,Goals,...,def_PIM_pg,def_PPG,def_PPG_pg,def_SHG_pg,def_SOG,def_SOG_pg,def_BLK,def_BLK_pg,def_FP,def_FP_pg
0,2650187,8,30000239,Aaron Ekblad,FLA,D,0,0,2022,0.0,...,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000
1,2655948,8,30000239,Aaron Ekblad,FLA,D,1,0,2022,0.0,...,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000
2,2659117,8,30000239,Aaron Ekblad,FLA,D,0,0,2022,0.0,...,0.000000,0.0,0.000000,0.0,3.0,3.000000,1.0,1.000000,22.9,22.900000
3,2663787,8,30000239,Aaron Ekblad,FLA,D,1,0,2022,0.0,...,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000
4,2665955,8,30000239,Aaron Ekblad,FLA,D,1,0,2022,1.0,...,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13120,2653169,5,30005771,Zachary Massicotte,OTT,D,0,0,2022,0.0,...,0.243902,0.0,0.000000,0.0,102.0,1.243902,71.0,0.865854,459.8,5.607317
13121,2650211,8,30005212,Zachary Uens,FLA,D,0,0,2022,0.0,...,0.573171,2.0,0.024390,0.0,79.0,0.963415,97.0,1.182927,485.1,5.915854
13122,2655972,8,30005212,Zachary Uens,FLA,D,0,0,2022,0.0,...,0.560976,2.0,0.024390,0.0,104.0,1.268293,82.0,1.000000,582.1,7.098780
13123,2659141,8,30005212,Zachary Uens,FLA,D,0,0,2022,0.0,...,0.560976,2.0,0.024390,0.0,101.0,1.231707,81.0,0.987805,551.7,6.728049


In [98]:
""" G ETL """
g_yr_gfs = []
for yr in range(2022, 2023): 
    print(yr)
    # filter to current year and PG position
    yr_g = player_stats[(player_stats['Season']==yr) & (player_stats['Position']=='G')].copy().reset_index(drop=True)
    yr_g = yr_g.drop_duplicates(subset = ['Name', 'Date'])
    yr_g.drop(['Goals', 'Assists', 'Points', 'PlusMinus', 'HatTricks',
       'PenaltyMinutes', 'PowerPlayGoals', 'ShortHandedGoals', 'ShotsOnGoal',
       'Blocks',], axis = 1, inplace = True)

    # sort by players & week
    yr_g.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

    # aggregate stats for last 3 games (sums and means)
    # games
    yr_g['GM3'] = yr_g.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['GM_pg3'] = yr_g.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goaltending wins
    yr_g['GTW3'] = yr_g.groupby(['Name'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['GTW_pg3'] = yr_g.groupby(['Name'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goaltending losses
    yr_g['GTL3'] = yr_g.groupby(['Name'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['GTL_pg3'] = yr_g.groupby(['Name'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goaltending overtime losses
    yr_g['GTOTL3'] = yr_g.groupby(['Name'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['GTOTL_pg3'] = yr_g.groupby(['Name'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goaltending shots against
    yr_g['GTSA3'] = yr_g.groupby(['Name'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['GTSA_pg3'] = yr_g.groupby(['Name'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goaltending goals against
    yr_g['GTGA3'] = yr_g.groupby(['Name'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['GTGA_pg3'] = yr_g.groupby(['Name'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # GoaltendingSaves
    yr_g['GTS3'] = yr_g.groupby(['Name'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['GTS_pg3'] = yr_g.groupby(['Name'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # GoaltendingShutouts
    yr_g['GTSO3'] = yr_g.groupby(['Name'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['GTSO_pg3'] = yr_g.groupby(['Name'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # GoaltendingGoalsAgainstAverage
    yr_g['GTGAA_pg3'] = yr_g.groupby(['Name'])['GoaltendingGoalsAgainstAverage'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
     # GoaltendingSavePercentage
    yr_g['GTSP_pg3'] = yr_g.groupby(['Name'])['GoaltendingSavePercentage'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
     # GoaltendingMinutes
    yr_g['GTM3'] = yr_g.groupby(['Name'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['GTM_pg3'] = yr_g.groupby(['Name'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # FP
    yr_g['FP3'] = yr_g.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['FP_pg3'] = yr_g.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # win percentage over last 3 games
    yr_g['WP3'] = yr_g['GTW3'] / yr_g['GM3']
    # saves per minute over last 3 games
    yr_g['SPM3'] = yr_g['GTS3'] / yr_g['GTM3']
    
    
    # aggregate stats for season
    # games
    yr_g['GM'] = yr_g.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    # goaltending wins
    yr_g['GTW'] = yr_g.groupby(['Name'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['GTW_pg'] = yr_g.groupby(['Name'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # goaltending losses
    yr_g['GTL'] = yr_g.groupby(['Name'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['GTL_pg'] = yr_g.groupby(['Name'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # goaltending overtime losses
    yr_g['GTOTL'] = yr_g.groupby(['Name'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['GTOTL_pg'] = yr_g.groupby(['Name'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # goaltending shots against
    yr_g['GTSA'] = yr_g.groupby(['Name'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['GTSA_pg'] = yr_g.groupby(['Name'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # goaltending goals against
    yr_g['GTGA'] = yr_g.groupby(['Name'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['GTGA_pg'] = yr_g.groupby(['Name'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # GoaltendingSaves
    yr_g['GTS'] = yr_g.groupby(['Name'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['GTS_pg'] = yr_g.groupby(['Name'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # GoaltendingShutouts
    yr_g['GTSO'] = yr_g.groupby(['Name'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['GTSO_pg'] = yr_g.groupby(['Name'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # GoaltendingGoalsAgainstAverage
    yr_g['GTGAA_pg'] = yr_g.groupby(['Name'])['GoaltendingGoalsAgainstAverage'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
     # GoaltendingSavePercentage
    yr_g['GTSP_pg'] = yr_g.groupby(['Name'])['GoaltendingSavePercentage'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
     # GoaltendingMinutes
    yr_g['GTM'] = yr_g.groupby(['Name'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['GTM_pg'] = yr_g.groupby(['Name'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # FP
    yr_g['FP'] = yr_g.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['FP_pg'] = yr_g.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # win percentage for season
    yr_g['WP'] = yr_g['GTW'] / yr_g['GM']
    # saves per minute for season
    yr_g['SPM'] = yr_g['GTS'] / yr_g['GTM']
    
    yr_g = yr_g.fillna(0)
    
    # DEFENSE
    # aggregate stats for last 3 games (sums and means)
    # games
    yr_g['def_GM3'] = yr_g.groupby(['Opponent'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['def_GM_pg3'] = yr_g.groupby(['Opponent'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goaltending wins
    yr_g['def_GTW3'] = yr_g.groupby(['Opponent'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['def_GTW_pg3'] = yr_g.groupby(['Opponent'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goaltending losses
    yr_g['def_GTL3'] = yr_g.groupby(['Opponent'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['def_GTL_pg3'] = yr_g.groupby(['Opponent'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goaltending overtime losses
    yr_g['def_GTOTL3'] = yr_g.groupby(['Opponent'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['def_GTOTL_pg3'] = yr_g.groupby(['Opponent'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goaltending shots against
    yr_g['def_GTSA3'] = yr_g.groupby(['Opponent'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['def_GTSA_pg3'] = yr_g.groupby(['Opponent'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # goaltending goals against
    yr_g['def_GTGA3'] = yr_g.groupby(['Opponent'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['def_GTGA_pg3'] = yr_g.groupby(['Opponent'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # GoaltendingSaves
    yr_g['def_GTS3'] = yr_g.groupby(['Opponent'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['def_GTS_pg3'] = yr_g.groupby(['Opponent'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # GoaltendingShutouts
    yr_g['def_GTSO3'] = yr_g.groupby(['Opponent'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['def_GTSO_pg3'] = yr_g.groupby(['Opponent'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # GoaltendingGoalsAgainstAverage
    yr_g['def_GTGAA_pg3'] = yr_g.groupby(['Opponent'])['GoaltendingGoalsAgainstAverage'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
     # GoaltendingSavePercentage
    yr_g['def_GTSP_pg3'] = yr_g.groupby(['Opponent'])['GoaltendingSavePercentage'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
     # GoaltendingMinutes
    yr_g['def_GTM3'] = yr_g.groupby(['Opponent'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['def_GTM_pg3'] = yr_g.groupby(['Opponent'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # FP
    yr_g['def_FP3'] = yr_g.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
    yr_g['def_FP_pg3'] = yr_g.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
    # win percentage for last 3 games
    yr_g['def_WP3'] = yr_g['def_GTW3'] / yr_g['def_GM3']
    # saves per minute for last 3 games
    yr_g['def_SPM3'] = yr_g['def_GTS3'] / yr_g['def_GTM3']
    
    
    # aggregate stats for season
    # games
    yr_g['def_GM'] = yr_g.groupby(['Opponent'])['Games'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    # goaltending wins
    yr_g['def_GTW'] = yr_g.groupby(['Opponent'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['def_GTW_pg'] = yr_g.groupby(['Opponent'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # goaltending losses
    yr_g['def_GTL'] = yr_g.groupby(['Opponent'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['def_GTL_pg'] = yr_g.groupby(['Opponent'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # goaltending overtime losses
    yr_g['def_GTOTL'] = yr_g.groupby(['Opponent'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['def_GTOTL_pg'] = yr_g.groupby(['Opponent'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # goaltending shots against
    yr_g['def_GTSA'] = yr_g.groupby(['Opponent'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['def_GTSA_pg'] = yr_g.groupby(['Opponent'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # goaltending goals against
    yr_g['def_GTGA'] = yr_g.groupby(['Opponent'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['def_GTGA_pg'] = yr_g.groupby(['Opponent'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # GoaltendingSaves
    yr_g['def_GTS'] = yr_g.groupby(['Opponent'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['def_GTS_pg'] = yr_g.groupby(['Opponent'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # GoaltendingShutouts
    yr_g['def_GTSO'] = yr_g.groupby(['Opponent'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['def_GTSO_pg'] = yr_g.groupby(['Opponent'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # GoaltendingGoalsAgainstAverage
    yr_g['def_GTGAA_pg'] = yr_g.groupby(['Opponent'])['GoaltendingGoalsAgainstAverage'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
     # GoaltendingSavePercentage
    yr_g['def_GTSP_pg'] = yr_g.groupby(['Opponent'])['GoaltendingSavePercentage'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
     # GoaltendingMinutes
    yr_g['def_GTM'] = yr_g.groupby(['Opponent'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['def_GTM_pg'] = yr_g.groupby(['Opponent'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # FP
    yr_g['def_FP'] = yr_g.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
    yr_g['def_FP_pg'] = yr_g.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
    # win percentage for last 3 games
    yr_g['def_WP'] = yr_g['def_GTW'] / yr_g['def_GM']
    # saves per minute for last 3 games
    yr_g['def_SPM'] = yr_g['def_GTS'] / yr_g['def_GTM']
    yr_g = yr_g.fillna(0)
    
    g_yr_gfs.append(yr_g)

    
g_stats = pd.concat(g_yr_gfs).drop_duplicates().reset_index(drop=True)
g_stats_all = g_stats.drop_duplicates(subset = ['Name', 'Date'])
g_stats_all.reset_index(drop=True, inplace = True)
g_stats_all.to_csv(etl_dir + 'g_stats.csv')
print('Done.')
    

2022
Done.


## Predictions

In [99]:
import os
import pickle
import numpy as np
import pandas as pd
from lightgbm import LGBMRegressor
from datetime import date
from datetime import datetime



working_directory = 'D:/machine_learning/DFS/NHL'
os.chdir(working_directory)
pred_dir = 'predictions/'
data_dir = 'CurrentData/'
etl_dir = 'CurrentData/ETL/'
os.chdir(working_directory)


# get today's date
today = date.today()

curr_date = str(today).replace('-','')

curr_season = 2022

player_stats = pd.read_csv('player_stats_current.csv', index_col = 0)
player_stats['Season'] = curr_season
sched = player_stats[['Season','Date','Team','Opponent']].drop_duplicates().reset_index(drop=True).rename(columns={'Opp':'Defense'})
sched = sched.sort_values(['Season','Date'], ascending = [False, False]).reset_index(drop=True)
sched.to_csv(data_dir + 'schedule_so_far.csv')

In [100]:
# get team abbreviations
player_stats['Team'].value_counts().sort_index().keys().tolist()

['ANA',
 'ARI',
 'BOS',
 'BUF',
 'CAR',
 'CBJ',
 'CGY',
 'CHI',
 'COL',
 'DAL',
 'DET',
 'EDM',
 'FLA',
 'LA',
 'MIN',
 'MON',
 'NAS',
 'NJ',
 'NYI',
 'NYR',
 'OTT',
 'PHI',
 'PIT',
 'SEA',
 'SJ',
 'STL',
 'TB',
 'TOR',
 'VAN',
 'VEG',
 'WAS',
 'WPG']

### Get Today's Schedule

In [150]:
import requests
from bs4 import BeautifulSoup
# CHANGE DAY AND DATE
# day of week retrieval
day_dict = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
dt = datetime.now()
todays_day = day_dict[dt.weekday()]

# set date
str_date = str(today).replace('-', '')

# get this week's NBA schedule
sched_url = 'https://hashtaghockey.com/advanced-nhl-schedule-grid'

session_requests = requests.session()
r = session_requests.get(sched_url)
soup = BeautifulSoup(r.content, 'lxml')

rows = soup.findAll('tr')[1:]

l = []
for tr in rows:
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    
    # make sure that only rows with teams are added to the df
    if len(row[1]) == 1 and row[0] != '# Games Played':
        l.append(row)

# create df
df = pd.DataFrame(l, columns = [
    'Team',
    'Games',
    'Monday',
    'Tuesday',
    'Wednesday',
    'Thursday',
    'Friday',
    'Saturday',
    'Sunday',
    'NextMonday',
    'NextTuesday',
])

In [151]:
# remove @ symbols, denote which teams are off, and correct team abbreviations
df = df.replace({'\xa0':'off', 'FLO' : 'FLA', 'TBL': 'TB', 'VGK':'VEG', 'MTL' : 'MON', 'WPJ' : 'WPG', 'SJS' : 'SJ', 'LAK' : 'LA'}, regex=True)

df['Team'] = ['ANA',
             'ARI',
             'BOS',
             'BUF',
             'CGY',
             'CAR',
             'CHI',
             'COL',
             'CBJ',
             'DAL',
             'DET',
             'EDM',
             'FLA',
             'LA',
             'MIN',
             'MON',
             'NAS',
             'NJ',
             'NYI',
             'NYR',
             'OTT',
             'PHI',
             'PIT',
             'SJ',
             'SEA',
             'STL',
             'TB',
             'TOR',
             'VAN',
             'VEG',
             'WAS',
             'WPG']

# create subset dataframe with today's schedule
teams = df['Team']
defenses = df[todays_day]
season = ['2022'] * len(teams)
dates = [str_date] * len(teams)

todays_df = pd.DataFrame({
    'Season' : season,
    'Date' : dates,
    'Team' : teams,
    'Opponent' : defenses})

print("Today's Schedule:")
display(todays_df)

sched_update = pd.concat([todays_df, sched], axis = 0)
sched_update.to_csv(data_dir + 'schedule_so_far_updated.csv')

Today's Schedule:


Unnamed: 0,Season,Date,Team,Opponent
0,2022,20230128,ANA,ARI
1,2022,20230128,ARI,@ANA
2,2022,20230128,BOS,@FLA
3,2022,20230128,BUF,@MIN
4,2022,20230128,CGY,off
5,2022,20230128,CAR,off
6,2022,20230128,CHI,@EDM
7,2022,20230128,COL,STL
8,2022,20230128,CBJ,@SEA
9,2022,20230128,DAL,off


In [152]:
# todays_df.sort_values('Team')['Team']

In [153]:
# todays_df.sort_values('Opponent')['Opponent']

In [154]:
import joblib

## C predictions (LGBM model)

In [186]:
working_directory = 'D:/machine_learning/DFS/NHL'
import os
os.chdir(working_directory)
pred_dir = 'predictions/'
data_dir = 'CurrentData/'
etl_dir = 'CurrentData/ETL/'

sched_update = pd.read_csv(data_dir + 'schedule_so_far_updated.csv', index_col = 0)
sched_update = sched_update[sched_update['Opponent'] != 'off']
sched_update['Season'] = sched_update['Season'].fillna(0)

# convert date from object dtype to datetime
sched_update['Date'] = pd.to_datetime(sched_update['Date'])

# convert date from datetime to string
sched_update['Date'] = sched_update['Date'].dt.strftime('%Y%m%d')
# sched_update[['Season','Date']] = sched_update[['Season','Date']].astype(int)

# drop duplicate players
players_df = player_stats[['Team','Name', 'Position']].drop_duplicates(subset = 'Name')

# df w/ schedule
curr_date_df = sched_update[(sched_update['Season']==curr_season) & (sched_update['Date']==curr_date)].copy().reset_index(drop=True)

# get players that play on the current date
curr_date_players = pd.merge(curr_date_df, players_df, how = 'left', on = ['Team'])

# get centers that play on the current date
curr_date_players_c = curr_date_players.loc[(curr_date_players['Position'] == 'C')]

# create HomeOrAway column for today's players since it would be nan otherwise
curr_date_players_c['HomeOrAway'] = np.where(curr_date_players_c['Opponent'].str.contains('@'), 'AWAY', 'HOME')

# c_vs is 2022 stats so far
c_vs = pd.read_csv(etl_dir + 'c_stats.csv', index_col = 0)
c_vs = c_vs.rename(columns={'Team_x' : 'Team'})
    
c_vs = c_vs[c_vs['Season']==curr_season].reset_index(drop=True)
curr_c_vs = pd.concat([c_vs, curr_date_players_c])
curr_c_vs.reset_index(inplace = True, drop = True)

curr_c_vs.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

curr_c_vs = curr_c_vs.drop_duplicates(subset = ['Name', 'Date'])

# remove @ string from opponents
curr_c_vs = curr_c_vs.replace({'@':''}, regex=True)

# sort by players & week
curr_c_vs.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

# aggregate stats for last 3 games (sums and means)
# games
curr_c_vs['GM3'] = curr_c_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['GM_pg3'] = curr_c_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goals
curr_c_vs['G3'] = curr_c_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['G_pg3'] = curr_c_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# assists
curr_c_vs['A3'] = curr_c_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['A_pg3'] = curr_c_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# points
curr_c_vs['PTS3'] = curr_c_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['PTS_pg3'] = curr_c_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# plusminus
curr_c_vs['plusminus3'] = curr_c_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['plusminus_pg3'] = curr_c_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Hat Tricks
curr_c_vs['HT3'] = curr_c_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['HT_pg3'] = curr_c_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Penalty Minutes
curr_c_vs['PIM3'] = curr_c_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['PIM_pg3'] = curr_c_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# PPG
curr_c_vs['PPG3'] = curr_c_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['PPG_pg3'] = curr_c_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SHG
curr_c_vs['SHG3'] = curr_c_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['SHG_pg3'] = curr_c_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SOG
curr_c_vs['SOG3'] = curr_c_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['SOG_pg3'] = curr_c_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# BLocks
curr_c_vs['BLK3'] = curr_c_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['BLK_pg3'] = curr_c_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# FP
curr_c_vs['FP3'] = curr_c_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['FP_pg3'] = curr_c_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())

# aggregate stats for season
# games
curr_c_vs['GM'] = curr_c_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
# goals
curr_c_vs['G'] = curr_c_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['G_pg'] = curr_c_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# assists
curr_c_vs['A'] = curr_c_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['A_pg'] = curr_c_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# points
curr_c_vs['PTS'] = curr_c_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['PTS_pg'] = curr_c_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# plusminus
curr_c_vs['PM'] = curr_c_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['PM_pg'] = curr_c_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Hat Tricks
curr_c_vs['HT'] = curr_c_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['HT_pg'] = curr_c_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Penalty Minutes
curr_c_vs['PIM'] = curr_c_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['PIM_pg'] = curr_c_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# PPG
curr_c_vs['PPG'] = curr_c_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['PPG_pg'] = curr_c_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SHG
curr_c_vs['SHG3'] = curr_c_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['SHG_pg'] = curr_c_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SOG
curr_c_vs['SOG'] = curr_c_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['SOG_pg'] = curr_c_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# BLocks
curr_c_vs['BLK'] = curr_c_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['BLK_pg'] = curr_c_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# FP
curr_c_vs['FP'] = curr_c_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['FP_pg'] = curr_c_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())

curr_c_vs = curr_c_vs.fillna(0)

# DEFENSE
# aggregate stats for last 3 games (sums and means)
# goals
curr_c_vs['def_G3'] = curr_c_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['def_G_pg3'] = curr_c_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# assists
curr_c_vs['def_A3'] = curr_c_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['def_A_pg3'] = curr_c_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# points
curr_c_vs['def_PTS3'] = curr_c_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['def_PTS_pg3'] = curr_c_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# plusminus
curr_c_vs['def_plusminus3'] = curr_c_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['def_plusminus_pg3'] = curr_c_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Hat Tricks
curr_c_vs['def_HT3'] = curr_c_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['def_HT_pg3'] = curr_c_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Penalty Minutes
curr_c_vs['def_PIM3'] = curr_c_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['def_PIM_pg3'] = curr_c_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# PPG
curr_c_vs['def_PPG3'] = curr_c_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['def_PPG_pg3'] = curr_c_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SHG
curr_c_vs['def_SHG3'] = curr_c_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['def_SHG_pg3'] = curr_c_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SOG
curr_c_vs['def_SOG3'] = curr_c_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['def_SOG_pg3'] = curr_c_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# BLocks
curr_c_vs['def_BLK3'] = curr_c_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['def_BLK_pg3'] = curr_c_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# FP
curr_c_vs['def_FP3'] = curr_c_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_c_vs['def_FP_pg3'] = curr_c_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())

# aggregate stats for season
# goals
curr_c_vs['def_G'] = curr_c_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['def_G_pg'] = curr_c_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# assists
curr_c_vs['def_A'] = curr_c_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['def_A_pg'] = curr_c_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# points
curr_c_vs['def_PTS'] = curr_c_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['def_PTS_pg'] = curr_c_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# plusminus
curr_c_vs['def_PM'] = curr_c_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['def_PM_pg'] = curr_c_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Hat Tricks
curr_c_vs['def_HT'] = curr_c_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['def_HT_pg'] = curr_c_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Penalty Minutes
curr_c_vs['def_PIM'] = curr_c_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['def_PIM_pg'] = curr_c_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# PPG
curr_c_vs['def_PPG'] = curr_c_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['def_PPG_pg'] = curr_c_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SHG
curr_c_vs['def_SHG3'] = curr_c_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['def_SHG_pg'] = curr_c_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SOG
curr_c_vs['def_SOG'] = curr_c_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['def_SOG_pg'] = curr_c_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# BLocks
curr_c_vs['def_BLK'] = curr_c_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['def_BLK_pg'] = curr_c_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# FP
curr_c_vs['def_FP'] = curr_c_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_c_vs['def_FP_pg'] = curr_c_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())

curr_c_vs = curr_c_vs.fillna(0)

# ordinal encode HomeorAway column
from sklearn.preprocessing import OrdinalEncoder
encoder = OrdinalEncoder()
curr_c_vs['HomeOrAway'] = encoder.fit_transform(curr_c_vs['HomeOrAway'].to_numpy().reshape(-1, 1))

# get features calculated for current date for each player
pred_df = curr_c_vs[curr_c_vs['Date']==curr_date].copy().reset_index(drop=True)

# creation of dataframe from which to predict FP
X_test = pred_df.copy()

# features for best model
c_features = ['HomeOrAway', 'GM3', 'GM_pg3', 'G3', 'G_pg3', 'A3', 'A_pg3', 'PTS3', 'PTS_pg3', 'plusminus3', 'plusminus_pg3', 'HT3', 'HT_pg3', 'PIM3', 'PIM_pg3', 'PPG3', 'PPG_pg3', 'SHG3', 'SHG_pg3', 'SOG3', 'SOG_pg3', 'BLK3', 'BLK_pg3', 'FP3', 'FP_pg3', 'GM', 'G', 'G_pg', 'A', 'A_pg', 'PTS', 'PTS_pg', 'PM', 'PM_pg', 'HT', 'HT_pg', 'PIM', 'PIM_pg', 'PPG', 'PPG_pg', 'SHG_pg', 'SOG', 'SOG_pg', 'BLK', 'BLK_pg', 'FP', 'FP_pg', 'def_G3', 'def_G_pg3', 'def_A3', 'def_A_pg3', 'def_PTS3', 'def_PTS_pg3', 'def_plusminus3', 'def_plusminus_pg3', 'def_HT3', 'def_HT_pg3', 'def_PIM3', 'def_PIM_pg3', 'def_PPG3', 'def_PPG_pg3', 'def_SHG3', 'def_SHG_pg3', 'def_SOG3', 'def_SOG_pg3', 'def_BLK3', 'def_BLK_pg3', 'def_FP3', 'def_FP_pg3', 'def_G', 'def_G_pg', 'def_A', 'def_A_pg', 'def_PTS', 'def_PTS_pg', 'def_PM', 'def_PM_pg', 'def_HT', 'def_HT_pg', 'def_PIM', 'def_PIM_pg', 'def_PPG', 'def_PPG_pg', 'def_SHG_pg', 'def_SOG', 'def_SOG_pg', 'def_BLK', 'def_BLK_pg', 'def_FP', 'def_FP_pg'] 

# open previously pickled scaler for G and load it
file = 'scalers/c_scaler.pkl'
scaler = joblib.load(file)
    
file = 'scalers/c_X_train.pkl'
C_X_train = joblib.load(file)

# fit scaler to guard X train df from when model was trained
c_scaler = scaler.fit(C_X_train[c_features])

# scale the prediction dataframe
X_test = pd.DataFrame(c_scaler.transform(X_test[c_features]), columns = X_test[c_features].columns)

# open best performing model, make predictions, write to csv
import joblib
filename = 'models/LGBM_models/C_model_allfeats.pkl'
model = joblib.load(filename)

predictions = model.predict(X_test[c_features])
pred_df['Prediction_LGBM'] = predictions
pred_df['PredictRank_LGBM'] = pred_df.groupby(['Season','Date'])['Prediction_LGBM'].rank(method='min', ascending = False)

out_cols = ['Season','Date', 'Name', 'Team', 'Opponent', 'Prediction_LGBM', 'PredictRank_LGBM']
display(pred_df[out_cols].sort_values(by = 'PredictRank_LGBM'))
pred_df[out_cols].sort_values(by = 'PredictRank_LGBM').to_csv(pred_dir + 'C_Predictions_LGBM_'+str(curr_date)+'.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  curr_date_players_c['HomeOrAway'] = np.where(curr_date_players_c['Opponent'].str.contains('@'), 'AWAY', 'HOME')


Unnamed: 0,Season,Date,Name,Team,Opponent,Prediction_LGBM,PredictRank_LGBM
89,2022,20230128,Connor McDavid,EDM,CHI,20.503430,1.0
275,2022,20230128,Nathan MacKinnon,COL,STL,19.098275,2.0
153,2022,20230128,Jack Hughes,LA,TB,18.066681,3.0
368,2022,20230128,Steven Stamkos,TB,LA,17.924714,4.0
125,2022,20230128,Evgeni Malkin,PIT,SJ,16.926660,5.0
...,...,...,...,...,...,...,...
295,2022,20230128,Olle Lycksell,PHI,WPG,1.683897,411.0
225,2022,20230128,Liam O'Brien,ARI,ANA,1.668362,412.0
199,2022,20230128,Joseph Cramarossa,MIN,BUF,1.632522,413.0
261,2022,20230128,Max Willman,PHI,WPG,1.508045,414.0


## W predictions (RF model)

In [187]:
working_directory = 'D:/machine_learning/DFS/NHL'
import os
os.chdir(working_directory)
pred_dir = 'predictions/'
data_dir = 'CurrentData/'
etl_dir = 'CurrentData/ETL/'

sched_update = pd.read_csv(data_dir + 'schedule_so_far_updated.csv', index_col = 0)
sched_update = sched_update[sched_update['Opponent'] != 'off']
sched_update['Season'] = sched_update['Season'].fillna(0)

# convert date from object dtype to datetime
sched_update['Date'] = pd.to_datetime(sched_update['Date'])

# convert date from datetime to string
sched_update['Date'] = sched_update['Date'].dt.strftime('%Y%m%d')
# sched_update[['Season','Date']] = sched_update[['Season','Date']].astype(int)

# drop duplicate players
players_df = player_stats[['Team','Name', 'Position']].drop_duplicates(subset = 'Name')

# df w/ schedule
curr_date_df = sched_update[(sched_update['Season']==curr_season) & (sched_update['Date']==curr_date)].copy().reset_index(drop=True)

# get players that play on the current date
curr_date_players = pd.merge(curr_date_df, players_df, how = 'left', on = ['Team'])

# get wings that play on the current date
curr_date_players_w = curr_date_players.loc[(curr_date_players['Position'] == 'LW') | (curr_date_players['Position'] == 'RW')]

# create HomeOrAway column for today's players since it would be nan otherwise
curr_date_players_w['HomeOrAway'] = np.where(curr_date_players_w['Opponent'].str.contains('@'), 'AWAY', 'HOME')

# w_vs is 2022 stats so far
w_vs = pd.read_csv(etl_dir + 'w_stats.csv', index_col = 0)
w_vs = w_vs.rename(columns={'Team_x' : 'Team'})
    
w_vs = w_vs[w_vs['Season']==curr_season].reset_index(drop=True)
curr_w_vs = pd.concat([w_vs, curr_date_players_w])
curr_w_vs.reset_index(inplace = True, drop = True)

curr_w_vs.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

curr_w_vs = curr_w_vs.drop_duplicates(subset = ['Name', 'Date'])

# remove @ string from opponents
curr_w_vs = curr_w_vs.replace({'@':''}, regex=True)

# sort by players & week
curr_w_vs.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

# aggregate stats for last 3 games (sums and means)
# games
curr_w_vs['GM3'] = curr_w_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['GM_pg3'] = curr_w_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goals
curr_w_vs['G3'] = curr_w_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['G_pg3'] = curr_w_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# assists
curr_w_vs['A3'] = curr_w_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['A_pg3'] = curr_w_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# points
curr_w_vs['PTS3'] = curr_w_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['PTS_pg3'] = curr_w_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# plusminus
curr_w_vs['plusminus3'] = curr_w_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['plusminus_pg3'] = curr_w_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Hat Tricks
curr_w_vs['HT3'] = curr_w_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['HT_pg3'] = curr_w_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Penalty Minutes
curr_w_vs['PIM3'] = curr_w_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['PIM_pg3'] = curr_w_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# PPG
curr_w_vs['PPG3'] = curr_w_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['PPG_pg3'] = curr_w_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SHG
curr_w_vs['SHG3'] = curr_w_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['SHG_pg3'] = curr_w_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SOG
curr_w_vs['SOG3'] = curr_w_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['SOG_pg3'] = curr_w_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# BLocks
curr_w_vs['BLK3'] = curr_w_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['BLK_pg3'] = curr_w_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# FP
curr_w_vs['FP3'] = curr_w_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['FP_pg3'] = curr_w_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())

# aggregate stats for season
# games
curr_w_vs['GM'] = curr_w_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
# goals
curr_w_vs['G'] = curr_w_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['G_pg'] = curr_w_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# assists
curr_w_vs['A'] = curr_w_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['A_pg'] = curr_w_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# points
curr_w_vs['PTS'] = curr_w_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['PTS_pg'] = curr_w_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# plusminus
curr_w_vs['PM'] = curr_w_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['PM_pg'] = curr_w_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Hat Tricks
curr_w_vs['HT'] = curr_w_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['HT_pg'] = curr_w_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Penalty Minutes
curr_w_vs['PIM'] = curr_w_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['PIM_pg'] = curr_w_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# PPG
curr_w_vs['PPG'] = curr_w_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['PPG_pg'] = curr_w_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SHG
curr_w_vs['SHG3'] = curr_w_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['SHG_pg'] = curr_w_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SOG
curr_w_vs['SOG'] = curr_w_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['SOG_pg'] = curr_w_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# BLocks
curr_w_vs['BLK'] = curr_w_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['BLK_pg'] = curr_w_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# FP
curr_w_vs['FP'] = curr_w_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['FP_pg'] = curr_w_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())

curr_w_vs = curr_w_vs.fillna(0)

# DEFENSE
# aggregate stats for last 3 games (sums and means)
# goals
curr_w_vs['def_G3'] = curr_w_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['def_G_pg3'] = curr_w_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# assists
curr_w_vs['def_A3'] = curr_w_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['def_A_pg3'] = curr_w_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# points
curr_w_vs['def_PTS3'] = curr_w_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['def_PTS_pg3'] = curr_w_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# plusminus
curr_w_vs['def_plusminus3'] = curr_w_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['def_plusminus_pg3'] = curr_w_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Hat Tricks
curr_w_vs['def_HT3'] = curr_w_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['def_HT_pg3'] = curr_w_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Penalty Minutes
curr_w_vs['def_PIM3'] = curr_w_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['def_PIM_pg3'] = curr_w_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# PPG
curr_w_vs['def_PPG3'] = curr_w_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['def_PPG_pg3'] = curr_w_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SHG
curr_w_vs['def_SHG3'] = curr_w_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['def_SHG_pg3'] = curr_w_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SOG
curr_w_vs['def_SOG3'] = curr_w_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['def_SOG_pg3'] = curr_w_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# BLocks
curr_w_vs['def_BLK3'] = curr_w_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['def_BLK_pg3'] = curr_w_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# FP
curr_w_vs['def_FP3'] = curr_w_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_w_vs['def_FP_pg3'] = curr_w_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())

# aggregate stats for season
# goals
curr_w_vs['def_G'] = curr_w_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['def_G_pg'] = curr_w_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# assists
curr_w_vs['def_A'] = curr_w_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['def_A_pg'] = curr_w_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# points
curr_w_vs['def_PTS'] = curr_w_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['def_PTS_pg'] = curr_w_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# plusminus
curr_w_vs['def_PM'] = curr_w_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['def_PM_pg'] = curr_w_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Hat Tricks
curr_w_vs['def_HT'] = curr_w_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['def_HT_pg'] = curr_w_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Penalty Minutes
curr_w_vs['def_PIM'] = curr_w_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['def_PIM_pg'] = curr_w_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# PPG
curr_w_vs['def_PPG'] = curr_w_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['def_PPG_pg'] = curr_w_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SHG
curr_w_vs['def_SHG3'] = curr_w_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['def_SHG_pg'] = curr_w_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SOG
curr_w_vs['def_SOG'] = curr_w_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['def_SOG_pg'] = curr_w_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# BLocks
curr_w_vs['def_BLK'] = curr_w_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['def_BLK_pg'] = curr_w_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# FP
curr_w_vs['def_FP'] = curr_w_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_w_vs['def_FP_pg'] = curr_w_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())

curr_w_vs = curr_w_vs.fillna(0)

# ordinal encode HomeorAway column
from sklearn.preprocessing import OrdinalEncoder
encoder = OrdinalEncoder()
curr_w_vs['HomeOrAway'] = encoder.fit_transform(curr_w_vs['HomeOrAway'].to_numpy().reshape(-1, 1))


# get features calculated for current date for each player
pred_df = curr_w_vs[curr_w_vs['Date']==curr_date].copy().reset_index(drop=True)

# creation of dataframe from which to predict FP
X_test = pred_df.copy()

# features for best model
w_features = ['HomeOrAway', 'GM3', 'GM_pg3', 'G3', 'G_pg3', 'A3', 'A_pg3', 'PTS3', 'PTS_pg3', 'plusminus3', 'plusminus_pg3', 'HT3', 'HT_pg3', 'PIM3', 'PIM_pg3', 'PPG3', 'PPG_pg3', 'SHG3', 'SHG_pg3', 'SOG3', 'SOG_pg3', 'BLK3', 'BLK_pg3', 'FP3', 'FP_pg3', 'GM', 'G', 'G_pg', 'A', 'A_pg', 'PTS', 'PTS_pg', 'PM', 'PM_pg', 'HT', 'HT_pg', 'PIM', 'PIM_pg', 'PPG', 'PPG_pg', 'SHG_pg', 'SOG', 'SOG_pg', 'BLK', 'BLK_pg', 'FP', 'FP_pg', 'def_G3', 'def_G_pg3', 'def_A3', 'def_A_pg3', 'def_PTS3', 'def_PTS_pg3', 'def_plusminus3', 'def_plusminus_pg3', 'def_HT3', 'def_HT_pg3', 'def_PIM3', 'def_PIM_pg3', 'def_PPG3', 'def_PPG_pg3', 'def_SHG3', 'def_SHG_pg3', 'def_SOG3', 'def_SOG_pg3', 'def_BLK3', 'def_BLK_pg3', 'def_FP3', 'def_FP_pg3', 'def_G', 'def_G_pg', 'def_A', 'def_A_pg', 'def_PTS', 'def_PTS_pg', 'def_PM', 'def_PM_pg', 'def_HT', 'def_HT_pg', 'def_PIM', 'def_PIM_pg', 'def_PPG', 'def_PPG_pg', 'def_SHG_pg', 'def_SOG', 'def_SOG_pg', 'def_BLK', 'def_BLK_pg', 'def_FP', 'def_FP_pg'] 

# open previously pickled scaler for W and load it
file = 'scalers/w_scaler.pkl'
scaler = joblib.load(file)
    
file = 'scalers/w_X_train.pkl'
W_X_train = joblib.load(file)

# fit scaler to guard X train df from when model was trained
w_scaler = scaler.fit(W_X_train[w_features])

# scale the prediction dataframe
X_test = pd.DataFrame(w_scaler.transform(X_test[w_features]), columns = X_test[w_features].columns)

# open best performing model, make predictions, write to csv
import joblib
filename = 'models/LGBM_models/W_model_allfeats.pkl'
model = joblib.load(filename)

predictions = model.predict(X_test[w_features])
pred_df['Prediction_LGBM'] = predictions
pred_df['PredictRank_LGBM'] = pred_df.groupby(['Season','Date'])['Prediction_LGBM'].rank(method='min', ascending = False)

out_cols = ['Season','Date', 'Name', 'Team', 'Opponent', 'Prediction_LGBM', 'PredictRank_LGBM']
display(pred_df[out_cols].sort_values(by = 'PredictRank_LGBM'))
pred_df[out_cols].sort_values(by = 'PredictRank_LGBM').to_csv(pred_dir + 'W_Predictions_LGBM_'+str(curr_date)+'.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  curr_date_players_w['HomeOrAway'] = np.where(curr_date_players_w['Opponent'].str.contains('@'), 'AWAY', 'HOME')


Unnamed: 0,Season,Date,Name,Team,Opponent,Prediction_LGBM,PredictRank_LGBM
321,2022,20230128,Nikita Kucherov,TB,LA,23.667369,1.0
238,2022,20230128,Kirill Kaprizov,MIN,BUF,21.661702,2.0
56,2022,20230128,Brady Tkachuk,OTT,MON,18.259459,3.0
350,2022,20230128,Pierre-Luc Dubois,WPG,PHI,18.079436,4.0
242,2022,20230128,Kyle Connor,WPG,PHI,18.074812,5.0
...,...,...,...,...,...,...,...
199,2022,20230128,Jonah Gadjovich,SJ,PIT,1.358485,437.0
176,2022,20230128,James Hamblin,EDM,CHI,1.330745,438.0
406,2022,20230128,Trey Fix-Wolansky,CBJ,SEA,1.227211,439.0
365,2022,20230128,Ross Johnston,NYI,VEG,0.993601,440.0


## D predictions (RF model)

In [188]:
working_directory = 'D:/machine_learning/DFS/NHL'
import os
os.chdir(working_directory)
pred_dir = 'predictions/'
data_dir = 'CurrentData/'
etl_dir = 'CurrentData/ETL/'

sched_update = pd.read_csv(data_dir + 'schedule_so_far_updated.csv', index_col = 0)
sched_update = sched_update[sched_update['Opponent'] != 'off']
sched_update['Season'] = sched_update['Season'].fillna(0)

# convert date from object dtype to datetime
sched_update['Date'] = pd.to_datetime(sched_update['Date'])

# convert date from datetime to string
sched_update['Date'] = sched_update['Date'].dt.strftime('%Y%m%d')
# sched_update[['Season','Date']] = sched_update[['Season','Date']].astype(int)

# drop duplicate players
players_df = player_stats[['Team','Name', 'Position']].drop_duplicates(subset = 'Name')

# df w/ schedule
curr_date_df = sched_update[(sched_update['Season']==curr_season) & (sched_update['Date']==curr_date)].copy().reset_index(drop=True)

# get players that play on the current date
curr_date_players = pd.merge(curr_date_df, players_df, how = 'left', on = ['Team'])

# get defensemen that play on the current date
curr_date_players_d = curr_date_players.loc[curr_date_players['Position'] == 'D']

# create HomeOrAway column for today's players since it would be nan otherwise
curr_date_players_d['HomeOrAway'] = np.where(curr_date_players_d['Opponent'].str.contains('@'), 'AWAY', 'HOME')

# d_vs is 2022 stats so far
d_vs = pd.read_csv(etl_dir + 'd_stats.csv', index_col = 0)
d_vs = d_vs.rename(columns={'Team_x' : 'Team'})
    
d_vs = d_vs[d_vs['Season']==curr_season].reset_index(drop=True)
curr_d_vs = pd.concat([d_vs, curr_date_players_d])
curr_d_vs.reset_index(inplace = True, drop = True)

curr_d_vs.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

curr_d_vs = curr_d_vs.drop_duplicates(subset = ['Name', 'Date'])

# remove @ string from opponents
curr_d_vs = curr_d_vs.replace({'@':''}, regex=True)

# sort by players & week
curr_d_vs.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

# aggregate stats for last 3 games (sums and means)
# games
curr_d_vs['GM3'] = curr_d_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['GM_pg3'] = curr_d_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goals
curr_d_vs['G3'] = curr_d_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['G_pg3'] = curr_d_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# assists
curr_d_vs['A3'] = curr_d_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['A_pg3'] = curr_d_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# points
curr_d_vs['PTS3'] = curr_d_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['PTS_pg3'] = curr_d_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# plusminus
curr_d_vs['plusminus3'] = curr_d_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['plusminus_pg3'] = curr_d_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Hat Tricks
curr_d_vs['HT3'] = curr_d_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['HT_pg3'] = curr_d_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Penalty Minutes
curr_d_vs['PIM3'] = curr_d_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['PIM_pg3'] = curr_d_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# PPG
curr_d_vs['PPG3'] = curr_d_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['PPG_pg3'] = curr_d_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SHG
curr_d_vs['SHG3'] = curr_d_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['SHG_pg3'] = curr_d_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SOG
curr_d_vs['SOG3'] = curr_d_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['SOG_pg3'] = curr_d_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# BLocks
curr_d_vs['BLK3'] = curr_d_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['BLK_pg3'] = curr_d_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# FP
curr_d_vs['FP3'] = curr_d_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['FP_pg3'] = curr_d_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())

# aggregate stats for season
# games
curr_d_vs['GM'] = curr_d_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
# goals
curr_d_vs['G'] = curr_d_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['G_pg'] = curr_d_vs.groupby(['Name'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# assists
curr_d_vs['A'] = curr_d_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['A_pg'] = curr_d_vs.groupby(['Name'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# points
curr_d_vs['PTS'] = curr_d_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['PTS_pg'] = curr_d_vs.groupby(['Name'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# plusminus
curr_d_vs['PM'] = curr_d_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['PM_pg'] = curr_d_vs.groupby(['Name'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Hat Tricks
curr_d_vs['HT'] = curr_d_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['HT_pg'] = curr_d_vs.groupby(['Name'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Penalty Minutes
curr_d_vs['PIM'] = curr_d_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['PIM_pg'] = curr_d_vs.groupby(['Name'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# PPG
curr_d_vs['PPG'] = curr_d_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['PPG_pg'] = curr_d_vs.groupby(['Name'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SHG
curr_d_vs['SHG3'] = curr_d_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['SHG_pg'] = curr_d_vs.groupby(['Name'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SOG
curr_d_vs['SOG'] = curr_d_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['SOG_pg'] = curr_d_vs.groupby(['Name'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# BLocks
curr_d_vs['BLK'] = curr_d_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['BLK_pg'] = curr_d_vs.groupby(['Name'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# FP
curr_d_vs['FP'] = curr_d_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['FP_pg'] = curr_d_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())

curr_d_vs = curr_d_vs.fillna(0)

# DEFENSE
# aggregate stats for last 3 games (sums and means)
# goals
curr_d_vs['def_G3'] = curr_d_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['def_G_pg3'] = curr_d_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# assists
curr_d_vs['def_A3'] = curr_d_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['def_A_pg3'] = curr_d_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# points
curr_d_vs['def_PTS3'] = curr_d_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['def_PTS_pg3'] = curr_d_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# plusminus
curr_d_vs['def_plusminus3'] = curr_d_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['def_plusminus_pg3'] = curr_d_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Hat Tricks
curr_d_vs['def_HT3'] = curr_d_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['def_HT_pg3'] = curr_d_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# Penalty Minutes
curr_d_vs['def_PIM3'] = curr_d_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['def_PIM_pg3'] = curr_d_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# PPG
curr_d_vs['def_PPG3'] = curr_d_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['def_PPG_pg3'] = curr_d_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SHG
curr_d_vs['def_SHG3'] = curr_d_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['def_SHG_pg3'] = curr_d_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# SOG
curr_d_vs['def_SOG3'] = curr_d_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['def_SOG_pg3'] = curr_d_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# BLocks
curr_d_vs['def_BLK3'] = curr_d_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['def_BLK_pg3'] = curr_d_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# FP
curr_d_vs['def_FP3'] = curr_d_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_d_vs['def_FP_pg3'] = curr_d_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())

# aggregate stats for season
# goals
curr_d_vs['def_G'] = curr_d_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['def_G_pg'] = curr_d_vs.groupby(['Opponent'])['Goals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# assists
curr_d_vs['def_A'] = curr_d_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['def_A_pg'] = curr_d_vs.groupby(['Opponent'])['Assists'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# points
curr_d_vs['def_PTS'] = curr_d_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['def_PTS_pg'] = curr_d_vs.groupby(['Opponent'])['Points'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# plusminus
curr_d_vs['def_PM'] = curr_d_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['def_PM_pg'] = curr_d_vs.groupby(['Opponent'])['PlusMinus'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Hat Tricks
curr_d_vs['def_HT'] = curr_d_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['def_HT_pg'] = curr_d_vs.groupby(['Opponent'])['HatTricks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# Penalty Minutes
curr_d_vs['def_PIM'] = curr_d_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['def_PIM_pg'] = curr_d_vs.groupby(['Opponent'])['PenaltyMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# PPG
curr_d_vs['def_PPG'] = curr_d_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['def_PPG_pg'] = curr_d_vs.groupby(['Opponent'])['PowerPlayGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SHG
curr_d_vs['def_SHG3'] = curr_d_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['def_SHG_pg'] = curr_d_vs.groupby(['Opponent'])['ShortHandedGoals'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# SOG
curr_d_vs['def_SOG'] = curr_d_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['def_SOG_pg'] = curr_d_vs.groupby(['Opponent'])['ShotsOnGoal'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# BLocks
curr_d_vs['def_BLK'] = curr_d_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['def_BLK_pg'] = curr_d_vs.groupby(['Opponent'])['Blocks'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# FP
curr_d_vs['def_FP'] = curr_d_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_d_vs['def_FP_pg'] = curr_d_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())

curr_d_vs = curr_d_vs.fillna(0)

# ordinal encode HomeorAway column
from sklearn.preprocessing import OrdinalEncoder
encoder = OrdinalEncoder()
curr_d_vs['HomeOrAway'] = encoder.fit_transform(curr_d_vs['HomeOrAway'].to_numpy().reshape(-1, 1))

# get features calculated for current date for each player
pred_df = curr_d_vs[curr_d_vs['Date']==curr_date].copy().reset_index(drop=True)

# creation of dataframe from which to predict FP
X_test = pred_df.copy()

# features for best model
w_features = ['HomeOrAway', 'GM3', 'GM_pg3', 'G3', 'G_pg3', 'A3', 'A_pg3', 'PTS3', 'PTS_pg3', 'plusminus3', 'plusminus_pg3', 'HT3', 'HT_pg3', 'PIM3', 'PIM_pg3', 'PPG3', 'PPG_pg3', 'SHG3', 'SHG_pg3', 'SOG3', 'SOG_pg3', 'BLK3', 'BLK_pg3', 'FP3', 'FP_pg3', 'GM', 'G', 'G_pg', 'A', 'A_pg', 'PTS', 'PTS_pg', 'PM', 'PM_pg', 'HT', 'HT_pg', 'PIM', 'PIM_pg', 'PPG', 'PPG_pg', 'SHG_pg', 'SOG', 'SOG_pg', 'BLK', 'BLK_pg', 'FP', 'FP_pg', 'def_G3', 'def_G_pg3', 'def_A3', 'def_A_pg3', 'def_PTS3', 'def_PTS_pg3', 'def_plusminus3', 'def_plusminus_pg3', 'def_HT3', 'def_HT_pg3', 'def_PIM3', 'def_PIM_pg3', 'def_PPG3', 'def_PPG_pg3', 'def_SHG3', 'def_SHG_pg3', 'def_SOG3', 'def_SOG_pg3', 'def_BLK3', 'def_BLK_pg3', 'def_FP3', 'def_FP_pg3', 'def_G', 'def_G_pg', 'def_A', 'def_A_pg', 'def_PTS', 'def_PTS_pg', 'def_PM', 'def_PM_pg', 'def_HT', 'def_HT_pg', 'def_PIM', 'def_PIM_pg', 'def_PPG', 'def_PPG_pg', 'def_SHG_pg', 'def_SOG', 'def_SOG_pg', 'def_BLK', 'def_BLK_pg', 'def_FP', 'def_FP_pg'] 

# open previously pickled scaler for D and load it
file = 'scalers/w_scaler.pkl'
scaler = joblib.load(file)
    
file = 'scalers/d_X_train.pkl'
D_X_train = joblib.load(file)

# fit scaler to guard X train df from when model was trained
w_scaler = scaler.fit(D_X_train[w_features])

# scale the prediction dataframe
X_test = pd.DataFrame(w_scaler.transform(X_test[w_features]), columns = X_test[w_features].columns)

# open best performing model, make predictions, write to csv
import joblib
filename = 'models/LGBM_models/D_model_allfeats.pkl'
model = joblib.load(filename)

predictions = model.predict(X_test[w_features])
pred_df['Prediction_LGBM'] = predictions
pred_df['PredictRank_LGBM'] = pred_df.groupby(['Season','Date'])['Prediction_LGBM'].rank(method='min', ascending = False)

out_cols = ['Season','Date', 'Name', 'Team', 'Opponent', 'Prediction_LGBM', 'PredictRank_LGBM']
display(pred_df[out_cols].sort_values(by = 'PredictRank_LGBM'))
pred_df[out_cols].sort_values(by = 'PredictRank_LGBM').to_csv(pred_dir + 'D_Predictions_LGBM_'+str(curr_date)+'.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  curr_date_players_d['HomeOrAway'] = np.where(curr_date_players_d['Opponent'].str.contains('@'), 'AWAY', 'HOME')


Unnamed: 0,Season,Date,Name,Team,Opponent,Prediction_LGBM,PredictRank_LGBM
0,2022,20230128,Aaron Ekblad,FLA,BOS,14.222054,1.0
201,2022,20230128,Jakob Chychrun,ARI,ANA,14.088659,2.0
144,2022,20230128,Erik Karlsson,SJ,PIT,13.689584,3.0
373,2022,20230128,Rasmus Dahlin,BUF,MIN,13.577439,4.0
15,2022,20230128,Alex Pietrangelo,VEG,NYI,12.596925,5.0
...,...,...,...,...,...,...,...
389,2022,20230128,Ryan Murray,EDM,CHI,0.949065,474.0
164,2022,20230128,Haydn Fleury,TB,LA,0.924804,475.0
135,2022,20230128,Egor Zamula,PHI,WPG,0.917766,476.0
77,2022,20230128,Casey Fitzgerald,BUF,MIN,0.826129,477.0


## G predictions

In [189]:
working_directory = 'D:/machine_learning/DFS/NHL'
import os
os.chdir(working_directory)
pred_dir = 'predictions/'
data_dir = 'CurrentData/'
etl_dir = 'CurrentData/ETL/'

sched_update = pd.read_csv(data_dir + 'schedule_so_far_updated.csv', index_col = 0)
sched_update = sched_update[sched_update['Opponent'] != 'off']
sched_update['Season'] = sched_update['Season'].fillna(0)

# convert date from object dtype to datetime
sched_update['Date'] = pd.to_datetime(sched_update['Date'])

# convert date from datetime to string
sched_update['Date'] = sched_update['Date'].dt.strftime('%Y%m%d')
# sched_update[['Season','Date']] = sched_update[['Season','Date']].astype(int)

# drop duplicate players
players_df = player_stats[['Team','Name', 'Position']].drop_duplicates(subset = 'Name')

# df w/ schedule
curr_date_df = sched_update[(sched_update['Season']==curr_season) & (sched_update['Date']==curr_date)].copy().reset_index(drop=True)

# get players that play on the current date
curr_date_players = pd.merge(curr_date_df, players_df, how = 'left', on = ['Team'])

# get centers that play on the current date
curr_date_players_g = curr_date_players.loc[curr_date_players['Position'] == 'G']

# create HomeOrAway column for today's players since it would be nan otherwise
curr_date_players_g['HomeOrAway'] = np.where(curr_date_players_g['Opponent'].str.contains('@'), 'AWAY', 'HOME')

# g_vs is 2022 stats so far
g_vs = pd.read_csv(etl_dir + 'g_stats.csv', index_col = 0)
g_vs = g_vs.rename(columns={'Team_x' : 'Team'})
    
g_vs = g_vs[g_vs['Season']==curr_season].reset_index(drop=True)
curr_g_vs = pd.concat([g_vs, curr_date_players_g])
curr_g_vs.reset_index(inplace = True, drop = True)

curr_g_vs.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

curr_g_vs = curr_g_vs.drop_duplicates(subset = ['Name', 'Date'])

# remove @ string from opponents
curr_g_vs = curr_g_vs.replace({'@':''}, regex=True)

# sort by players & week
curr_g_vs.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

# sort by players & week
curr_g_vs.sort_values(['Name', 'Date'], ascending = [True, True], inplace = True)

# aggregate stats for last 3 games (sums and means)
# games
curr_g_vs['GM3'] = curr_g_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['GM_pg3'] = curr_g_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goaltending wins
curr_g_vs['GTW3'] = curr_g_vs.groupby(['Name'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['GTW_pg3'] = curr_g_vs.groupby(['Name'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goaltending losses
curr_g_vs['GTL3'] = curr_g_vs.groupby(['Name'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['GTL_pg3'] = curr_g_vs.groupby(['Name'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goaltending overtime losses
curr_g_vs['GTOTL3'] = curr_g_vs.groupby(['Name'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['GTOTL_pg3'] = curr_g_vs.groupby(['Name'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goaltending shots against
curr_g_vs['GTSA3'] = curr_g_vs.groupby(['Name'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['GTSA_pg3'] = curr_g_vs.groupby(['Name'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goaltending goals against
curr_g_vs['GTGA3'] = curr_g_vs.groupby(['Name'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['GTGA_pg3'] = curr_g_vs.groupby(['Name'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# GoaltendingSaves
curr_g_vs['GTS3'] = curr_g_vs.groupby(['Name'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['GTS_pg3'] = curr_g_vs.groupby(['Name'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# GoaltendingShutouts
curr_g_vs['GTSO3'] = curr_g_vs.groupby(['Name'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['GTSO_pg3'] = curr_g_vs.groupby(['Name'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# GoaltendingGoalsAgainstAverage
curr_g_vs['GTGAA_pg3'] = curr_g_vs.groupby(['Name'])['GoaltendingGoalsAgainstAverage'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# GoaltendingSavePercentage
curr_g_vs['GTSP_pg3'] = curr_g_vs.groupby(['Name'])['GoaltendingSavePercentage'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# GoaltendingMinutes
curr_g_vs['GTM3'] = curr_g_vs.groupby(['Name'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['GTM_pg3'] = curr_g_vs.groupby(['Name'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# FP
curr_g_vs['FP3'] = curr_g_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['FP_pg3'] = curr_g_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# win percentage over last 3 games
curr_g_vs['WP3'] = curr_g_vs['GTW3'] / curr_g_vs['GM3']
# saves per minute over last 3 games
curr_g_vs['SPM3'] = curr_g_vs['GTS3'] / curr_g_vs['GTM3']


# aggregate stats for season
# games
curr_g_vs['GM'] = curr_g_vs.groupby(['Name'])['Games'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
# goaltending wins
curr_g_vs['GTW'] = curr_g_vs.groupby(['Name'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['GTW_pg'] = curr_g_vs.groupby(['Name'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# goaltending losses
curr_g_vs['GTL'] = curr_g_vs.groupby(['Name'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['GTL_pg'] = curr_g_vs.groupby(['Name'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# goaltending overtime losses
curr_g_vs['GTOTL'] = curr_g_vs.groupby(['Name'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['GTOTL_pg'] = curr_g_vs.groupby(['Name'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# goaltending shots against
curr_g_vs['GTSA'] = curr_g_vs.groupby(['Name'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['GTSA_pg'] = curr_g_vs.groupby(['Name'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# goaltending goals against
curr_g_vs['GTGA'] = curr_g_vs.groupby(['Name'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['GTGA_pg'] = curr_g_vs.groupby(['Name'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# GoaltendingSaves
curr_g_vs['GTS'] = curr_g_vs.groupby(['Name'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['GTS_pg'] = curr_g_vs.groupby(['Name'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# GoaltendingShutouts
curr_g_vs['GTSO'] = curr_g_vs.groupby(['Name'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['GTSO_pg'] = curr_g_vs.groupby(['Name'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# GoaltendingGoalsAgainstAverage
curr_g_vs['GTGAA_pg'] = curr_g_vs.groupby(['Name'])['GoaltendingGoalsAgainstAverage'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# GoaltendingSavePercentage
curr_g_vs['GTSP_pg'] = curr_g_vs.groupby(['Name'])['GoaltendingSavePercentage'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# GoaltendingMinutes
curr_g_vs['GTM'] = curr_g_vs.groupby(['Name'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['GTM_pg'] = curr_g_vs.groupby(['Name'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# FP
curr_g_vs['FP'] = curr_g_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['FP_pg'] = curr_g_vs.groupby(['Name'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# win percentage for season
curr_g_vs['WP'] = curr_g_vs['GTW'] / curr_g_vs['GM']
# saves per minute for season
curr_g_vs['SPM'] = curr_g_vs['GTS'] / curr_g_vs['GTM']

curr_g_vs = curr_g_vs.fillna(0)

# DEFENSE
# aggregate stats for last 3 games (sums and means)
# games
curr_g_vs['def_GM3'] = curr_g_vs.groupby(['Opponent'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['def_GM_pg3'] = curr_g_vs.groupby(['Opponent'])['Games'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goaltending wins
curr_g_vs['def_GTW3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['def_GTW_pg3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goaltending losses
curr_g_vs['def_GTL3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['def_GTL_pg3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goaltending overtime losses
curr_g_vs['def_GTOTL3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['def_GTOTL_pg3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goaltending shots against
curr_g_vs['def_GTSA3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['def_GTSA_pg3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# goaltending goals against
curr_g_vs['def_GTGA3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['def_GTGA_pg3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# GoaltendingSaves
curr_g_vs['def_GTS3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['def_GTS_pg3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# GoaltendingShutouts
curr_g_vs['def_GTSO3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['def_GTSO_pg3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# GoaltendingGoalsAgainstAverage
curr_g_vs['def_GTGAA_pg3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingGoalsAgainstAverage'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# GoaltendingSavePercentage
curr_g_vs['def_GTSP_pg3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingSavePercentage'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# GoaltendingMinutes
curr_g_vs['def_GTM3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['def_GTM_pg3'] = curr_g_vs.groupby(['Opponent'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# FP
curr_g_vs['def_FP3'] = curr_g_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).sum())
curr_g_vs['def_FP_pg3'] = curr_g_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(3, min_periods = 1).mean())
# win percentage for last 3 games
curr_g_vs['def_WP3'] = curr_g_vs['def_GTW3'] / curr_g_vs['def_GM3']
# saves per minute for last 3 games
curr_g_vs['def_SPM3'] = curr_g_vs['def_GTS3'] / curr_g_vs['def_GTM3']


# aggregate stats for season
# games
curr_g_vs['def_GM'] = curr_g_vs.groupby(['Opponent'])['Games'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
# goaltending wins
curr_g_vs['def_GTW'] = curr_g_vs.groupby(['Opponent'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['def_GTW_pg'] = curr_g_vs.groupby(['Opponent'])['GoaltendingWins'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# goaltending losses
curr_g_vs['def_GTL'] = curr_g_vs.groupby(['Opponent'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['def_GTL_pg'] = curr_g_vs.groupby(['Opponent'])['GoaltendingLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# goaltending overtime losses
curr_g_vs['def_GTOTL'] = curr_g_vs.groupby(['Opponent'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['def_GTOTL_pg'] = curr_g_vs.groupby(['Opponent'])['GoaltendingOvertimeLosses'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# goaltending shots against
curr_g_vs['def_GTSA'] = curr_g_vs.groupby(['Opponent'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['def_GTSA_pg'] = curr_g_vs.groupby(['Opponent'])['GoaltendingShotsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# goaltending goals against
curr_g_vs['def_GTGA'] = curr_g_vs.groupby(['Opponent'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['def_GTGA_pg'] = curr_g_vs.groupby(['Opponent'])['GoaltendingGoalsAgainst'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# GoaltendingSaves
curr_g_vs['def_GTS'] = curr_g_vs.groupby(['Opponent'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['def_GTS_pg'] = curr_g_vs.groupby(['Opponent'])['GoaltendingSaves'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# GoaltendingShutouts
curr_g_vs['def_GTSO'] = curr_g_vs.groupby(['Opponent'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['def_GTSO_pg'] = curr_g_vs.groupby(['Opponent'])['GoaltendingShutouts'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# GoaltendingGoalsAgainstAverage
curr_g_vs['def_GTGAA_pg'] = curr_g_vs.groupby(['Opponent'])['GoaltendingGoalsAgainstAverage'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# GoaltendingSavePercentage
curr_g_vs['def_GTSP_pg'] = curr_g_vs.groupby(['Opponent'])['GoaltendingSavePercentage'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# GoaltendingMinutes
curr_g_vs['def_GTM'] = curr_g_vs.groupby(['Opponent'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['def_GTM_pg'] = curr_g_vs.groupby(['Opponent'])['GoaltendingMinutes'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# FP
curr_g_vs['def_FP'] = curr_g_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).sum())
curr_g_vs['def_FP_pg'] = curr_g_vs.groupby(['Opponent'])['FantasyPointsFanDuel'].transform(lambda x: x.shift().rolling(82, min_periods = 1).mean())
# win percentage for last 3 games
curr_g_vs['def_WP'] = curr_g_vs['def_GTW'] / curr_g_vs['def_GM']
# saves per minute for last 3 games
curr_g_vs['def_SPM'] = curr_g_vs['def_GTS'] / curr_g_vs['def_GTM']
curr_g_vs = curr_g_vs.fillna(0)

# ordinal encode HomeorAway column
from sklearn.preprocessing import OrdinalEncoder
encoder = OrdinalEncoder()
curr_g_vs['HomeOrAway'] = encoder.fit_transform(curr_g_vs['HomeOrAway'].to_numpy().reshape(-1, 1))

# get features calculated for current date for each player
pred_df = curr_g_vs[curr_g_vs['Date']==curr_date].copy().reset_index(drop=True)

# creation of dataframe from which to predict FP
X_test = pred_df.copy()

# features for best model
g_features = ['HomeOrAway', 'GM3', 'GM_pg3', 'GTW3', 'GTW_pg3', 'GTL3', 'GTL_pg3', 'GTOTL3', 'GTOTL_pg3', 'GTSA3', 'GTSA_pg3', 'GTGA3', 'GTGA_pg3', 'GTS3', 'GTS_pg3', 'GTSO3', 'GTSO_pg3', 'GTGAA_pg3', 'GTSP_pg3', 'GTM3', 'GTM_pg3', 'FP3', 'FP_pg3', 'WP3', 'SPM3', 'GM', 'GTW', 'GTW_pg', 'GTL', 'GTL_pg', 'GTOTL', 'GTOTL_pg', 'GTSA', 'GTSA_pg', 'GTGA', 'GTGA_pg', 'GTS', 'GTS_pg', 'GTSO', 'GTSO_pg', 'GTGAA_pg', 'GTSP_pg', 'GTM', 'GTM_pg', 'FP', 'FP_pg', 'WP', 'SPM', 'def_GM3', 'def_GM_pg3', 'def_GTW3', 'def_GTW_pg3', 'def_GTL3', 'def_GTL_pg3', 'def_GTOTL3', 'def_GTOTL_pg3', 'def_GTSA3', 'def_GTSA_pg3', 'def_GTGA3', 'def_GTGA_pg3', 'def_GTS3', 'def_GTS_pg3', 'def_GTSO3', 'def_GTSO_pg3', 'def_GTGAA_pg3', 'def_GTSP_pg3', 'def_GTM3', 'def_GTM_pg3', 'def_FP3', 'def_FP_pg3', 'def_WP3', 'def_SPM3', 'def_GM', 'def_GTW', 'def_GTW_pg', 'def_GTL', 'def_GTL_pg', 'def_GTOTL', 'def_GTOTL_pg', 'def_GTSA', 'def_GTSA_pg', 'def_GTGA', 'def_GTGA_pg', 'def_GTS', 'def_GTS_pg', 'def_GTSO', 'def_GTSO_pg', 'def_GTGAA_pg', 'def_GTSP_pg', 'def_GTM', 'def_GTM_pg', 'def_FP', 'def_FP_pg', 'def_WP', 'def_SPM'] 

# open best performing model, make predictions, write to csv
import joblib
filename = 'models/LGBM_models/G_model_allfeats.pkl'
model = joblib.load(filename)

predictions = model.predict(X_test[g_features])
pred_df['Prediction_LGBM'] = predictions
pred_df['PredictRank_LGBM'] = pred_df.groupby(['Season','Date'])['Prediction_LGBM'].rank(method='min', ascending = False)

# open previously pickled scaler for G and load it
file = 'scalers/g_scaler.pkl'
scaler = joblib.load(file)
    
file = 'scalers/g_X_train.pkl'
G_X_train = joblib.load(file)

# fit scaler to goalie X train df from when model was trained
g_scaler = scaler.fit(G_X_train[g_features])

# scale the prediction dataframe
X_test = pd.DataFrame(g_scaler.transform(X_test[g_features]), columns = X_test[g_features].columns)

# open best performing model, make predictions, write to csv
import joblib
filename = 'models/LGBM_models/G_model_allfeats.pkl'
model = joblib.load(filename)

predictions = model.predict(X_test[g_features])
pred_df['Prediction_LGBM'] = predictions
pred_df['PredictRank_LGBM'] = pred_df.groupby(['Season','Date'])['Prediction_LGBM'].rank(method='min', ascending = False)

# average predictions for LGBM and LGBM models, store in prediction LGBM
pred_df['Prediction_LGBM'] = (pred_df['Prediction_LGBM'] + pred_df['Prediction_LGBM'])/2

out_cols = ['Season','Date', 'Name', 'Team', 'Opponent', 'Prediction_LGBM', 'PredictRank_LGBM']
display(pred_df[out_cols].sort_values(by = 'PredictRank_LGBM'))
pred_df[out_cols].sort_values(by = 'PredictRank_LGBM').to_csv(pred_dir + 'G_Predictions_LGBM_'+str(curr_date)+'.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  curr_date_players_g['HomeOrAway'] = np.where(curr_date_players_g['Opponent'].str.contains('@'), 'AWAY', 'HOME')


Unnamed: 0,Season,Date,Name,Team,Opponent,Prediction_LGBM,PredictRank_LGBM
31,2022,20230128,Connor Hellebuyck,WPG,PHI,16.987146,1.0
105,2022,20230128,Lukas Dostal,ANA,ARI,16.419873,2.0
57,2022,20230128,Ilya Sorokin,NYI,VEG,15.896049,3.0
5,2022,20230128,Alexandar Georgiev,COL,STL,14.506275,4.0
104,2022,20230128,Logan Thompson,VEG,NYI,14.369261,5.0
...,...,...,...,...,...,...,...
8,2022,20230128,Anson Thornton,ARI,ANA,2.873817,162.0
60,2022,20230128,Ivan Nalimov,CHI,EDM,2.751219,163.0
119,2022,20230128,Mitchell Weeks,CHI,EDM,2.743200,164.0
32,2022,20230128,Cory Schneider,NYI,VEG,2.690559,165.0


# Part 5: Lineup Optimization

## Slate Lineups

In [257]:
import os
import pickle
import numpy as np
import pandas as pd

pred_dir = 'Predictions/'
roster_file = 'rosters/FanDuel-NHL-2023 ET-01 ET-28 ET-86478-players-list.csv'

In [258]:
# read in model predictions
c_preds = pd.read_csv(pred_dir + 'C_Predictions_LGBM_'+str(curr_date)+'.csv')
w_preds = pd.read_csv(pred_dir + 'W_Predictions_LGBM_'+str(curr_date)+'.csv')
d_preds = pd.read_csv(pred_dir + 'D_Predictions_LGBM_'+str(curr_date)+'.csv')
g_preds = pd.read_csv(pred_dir + 'G_Predictions_LGBM_'+str(curr_date)+'.csv')

# concat predictions to a single dataframe
all_preds = pd.concat([c_preds, w_preds, d_preds, g_preds]).reset_index(drop = True)
all_preds = all_preds.drop(['Unnamed: 0'], axis = 1)

In [259]:
all_preds = all_preds.rename(columns = {'Name' : 'Nickname'})

# fix names with accent marks/suffixes
import unidecode

for i in range(len(all_preds)):
    # accent mark fix
    all_preds.iloc[i, 2] = unidecode.unidecode(all_preds.iloc[i, 2])
    # drop Jrs.
    if all_preds.iloc[i, 2][-3:] == 'Jr.' or all_preds.iloc[i, 2][-2:] == 'IV':
        all_preds.iloc[i, 2] = all_preds.iloc[i, 2][:-3]

# read in fanduel salaries
fd_df = pd.read_csv(roster_file)

In [260]:
# merge predictions and FD df so salaries are available
all_preds_merged = pd.merge(all_preds, fd_df, on='Nickname')

# drop players w/ 0 FPPG
all_preds_merged = all_preds_merged[(all_preds_merged['FPPG'] != 0)].reset_index(drop = True)

# keep players w/ > 2.1 FPPG
all_preds_merged = all_preds_merged[(all_preds_merged['FPPG'] >= 2.1)].reset_index(drop = True)

# drop injured players
all_preds_merged = all_preds_merged[(all_preds_merged['Injury Indicator'] != 'O')].reset_index(drop = True)
all_preds_merged = all_preds_merged[(all_preds_merged['Injury Indicator'] != 'IR')].reset_index(drop = True)
# all_preds_merged = all_preds_merged[(all_preds_merged['Injury Indicator'] != 'NA')].reset_index(drop = True)
# all_preds_merged = all_preds_merged[(all_preds_merged['Injury Indicator'] != 'GTD')].reset_index(drop = True)

# drop players based on how many games they've played
# makes sure that players who don't play aren't considered
all_preds_merged = all_preds_merged[(all_preds_merged.Played >= all_preds_merged.Played.quantile(.2))].reset_index(drop = True)

# drop players based on salary cutoffs
# makes sure that bad players are excluded for each position
all_preds_merged = all_preds_merged[(all_preds_merged.Salary >= 4000)].reset_index(drop = True)

# drop unnamed columns originating from fd_df
cols = all_preds_merged.columns
for col in cols:
    if 'Unnamed'in col:
        all_preds_merged.drop(col, inplace = True, axis = 1)

In [261]:
all_preds_merged

Unnamed: 0,Season,Date,Nickname,Team_x,Opponent_x,Prediction_LGBM,PredictRank_LGBM,Id,Position,First Name,...,FPPG,Played,Salary,Game,Team_y,Opponent_y,Injury Indicator,Injury Details,Tier,Roster Position
0,2022,20230128,Connor McDavid,EDM,CHI,20.503430,1.0,86478-64102,C,Connor,...,25.771430,49.0,10600,CHI@EDM,EDM,CHI,,,,C/UTIL
1,2022,20230128,Nathan MacKinnon,COL,STL,19.098275,2.0,86478-41391,C,Nathan,...,22.730555,36.0,10300,STL@COL,COL,STL,,,,C/UTIL
2,2022,20230128,Steven Stamkos,TB,LA,17.924714,4.0,86478-8655,C,Steven,...,18.514894,47.0,8900,LA@TB,TB,LA,,,,C/UTIL
3,2022,20230128,Evgeni Malkin,PIT,SJ,16.926660,5.0,86478-8595,C,Evgeni,...,14.745833,48.0,7400,SJ@PIT,PIT,SJ,,,,C/UTIL
4,2022,20230128,Ryan Nugent-Hopkins,EDM,CHI,16.465592,6.0,86478-14563,W,Ryan,...,16.028572,49.0,8400,CHI@EDM,EDM,CHI,,,,W/UTIL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
282,2022,20230128,Anthony Stolarz,ANA,ARI,5.470033,51.0,86478-38243,G,Anthony,...,8.800000,21.0,7600,ARI@ANH,ANH,ARI,,,,G
283,2022,20230128,Filip Gustavsson,MIN,BUF,5.269154,53.0,86478-82752,G,Filip,...,16.872728,22.0,8000,BUF@MIN,MIN,BUF,,,,G
284,2022,20230128,Kaapo Kahkonen,SJ,PIT,4.772807,57.0,86478-84324,G,Kaapo,...,7.733333,21.0,6800,SJ@PIT,SJ,PIT,,,,G
285,2022,20230128,Adin Hill,VEG,NYI,4.610524,61.0,86478-66612,G,Adin,...,13.790476,21.0,7400,VGK@NYI,VGK,NYI,,,,G


In [262]:
# players by position
all_preds_merged['Position'].value_counts()

W    104
D     76
C     67
G     40
Name: Position, dtype: int64

In [263]:
# filter_df for pulp
filter_df = all_preds_merged.copy()
filter_df['MaxUsed'] = 0
filter_df['Pos_C'] = np.where(filter_df['Position']=='C', 1, 0)
filter_df['Pos_W'] = np.where(filter_df['Position']=='W', 1, 0)
filter_df['Pos_D'] = np.where(filter_df['Position']=='D', 1, 0)
filter_df['Pos_G'] = np.where(filter_df['Position']=='G', 1, 0)
filter_df['Pos_All'] = 1

In [264]:
encoded = pd.get_dummies(filter_df['Nickname']) #<-- One-Hot Encoding 

In [265]:
filter_df = filter_df.join(encoded) #<-- joining it to the raw_data table

In [266]:
encoded.columns.tolist()

['Aaron Ekblad',
 'Adam Boqvist',
 'Adam Henrique',
 'Adam Larsson',
 'Adam Lowry',
 'Adam Pelech',
 'Adin Hill',
 'Adrian Kempe',
 'Alec Martinez',
 'Aleksander Barkov',
 'Alex DeBrincat',
 'Alex Iafallo',
 'Alex Killorn',
 'Alex Newhook',
 'Alex Pietrangelo',
 'Alex Tuch',
 'Alexandar Georgiev',
 'Alexander Barabanov',
 'Anders Lee',
 'Andre Burakovsky',
 'Andreas Athanasiou',
 'Andrei Vasilevskiy',
 'Anthony Beauvillier',
 'Anthony Cirelli',
 'Anthony Stolarz',
 'Anton Forsberg',
 'Anton Lundell',
 'Anze Kopitar',
 'Artturi Lehkonen',
 'Arvid Soderblom',
 'Barrett Hayton',
 'Ben Hutton',
 'Blake Wheeler',
 'Boone Jenner',
 'Brad Marchand',
 'Brady Tkachuk',
 'Brandon Hagel',
 'Brandon Montour',
 'Brandon Saad',
 'Brandon Tanev',
 'Brayden McNabb',
 'Brayden Point',
 'Brayden Schenn',
 'Brock McGinn',
 'Brock Nelson',
 'Bryan Rust',
 'Cale Makar',
 'Calen Addison',
 'Calle Rosen',
 'Cam Fowler',
 'Cam York',
 'Carter Hart',
 'Carter Verhaeghe',
 'Casey DeSmith',
 'Casey Mittelstadt',

In [267]:
import pulp

def pred_lineup(df, lineups, overlap, player_names = [0], nonplayer_names = [0]):
    #Initialize our PuLP problem, we choose pulp.LpMinimize because we are going to want the lowest ranked players numerically. (1st is better than 2nd, technically smaller though)
    prob = pulp.LpProblem('NHL_DK', pulp.LpMaximize)
    
    #Count How Many Different Players We Have (Including Team D)
    num_all = df.shape[0]
    
    #Create Salary Cap Variable
    salary_cap = 55000
    
    #Create List Of PuLP player variables
    players_lineup = [pulp.LpVariable("Player_{}".format(i+1), cat="Binary") for i in range(num_all)]
    
    #Tell our PuLP solver that we want our lineup to have our prediction field to be maximized
    prob += pulp.lpSum((pulp.lpSum(df.loc[i, 'Prediction_LGBM'] * players_lineup[i] for i in range(num_all))))
    
    # Make sure only 2 Cs are selected
    prob += ((pulp.lpSum(df.loc[i, 'Pos_C'] * players_lineup[i] for i in range(num_all)))  >= 2)
    prob += ((pulp.lpSum(df.loc[i, 'Pos_C'] * players_lineup[i] for i in range(num_all)))  <= 3)
    
    # Make sure only 2 Ws are selected
    prob += ((pulp.lpSum(df.loc[i, 'Pos_W'] * players_lineup[i] for i in range(num_all)))  >= 2)
    prob += ((pulp.lpSum(df.loc[i, 'Pos_W'] * players_lineup[i] for i in range(num_all)))  <= 3)
    
    # Make sure only 2 Ds are selected
    prob += ((pulp.lpSum(df.loc[i, 'Pos_D'] * players_lineup[i] for i in range(num_all)))  >= 2)
    prob += ((pulp.lpSum(df.loc[i, 'Pos_D'] * players_lineup[i] for i in range(num_all)))  <= 3)
    
    # Make sure only 1 G is selected
    prob += ((pulp.lpSum(df.loc[i, 'Pos_G'] * players_lineup[i] for i in range(num_all)))  == 1)
    
    #Make Sure To Select 9 Players Total
    prob += ((pulp.lpSum(df.loc[i, 'Pos_All'] * players_lineup[i] for i in range(num_all))) == 9)
    
    #add the salary constraint
    prob += ((pulp.lpSum(df.loc[i, 'Salary'] * players_lineup[i] for i in range(num_all))) <= salary_cap)
    
    #Add Our MaxUsed Constraint
    prob += ((pulp.lpSum(df.loc[i, 'MaxUsed'] * players_lineup[i] for i in range(num_all))) == 0)
    
    #select specific player, if indicated
    if player_names != [0]:
        for player_name in player_names:
            prob += ((pulp.lpSum(df.loc[i, player_name] * players_lineup[i] for i in range(num_all)))  == 1)
            
    #remove specific player from consideration, if indicated
    if nonplayer_names != [0]:
        for nonplayer_name in nonplayer_names:
            prob += ((pulp.lpSum(df.loc[i, nonplayer_name] * players_lineup[i] for i in range(num_all)))  == 0)
    
    # add constraint to select only 1 of each player
    for player in encoded.columns.tolist():
        prob += ((pulp.lpSum(df.loc[i, player] * players_lineup[i] for i in range(num_all))) <= 1)
    
    #variance constraints - each lineup can't have more than the num overlap of any combination of players in any previous lineups
    for i in range(len(lineups)):
        prob += ((pulp.lpSum(lineups[i][k] * players_lineup[k] for k in range(num_all))) <= overlap)
        
    #solve the problem
    status = prob.solve()
    
    #check if the optimizer found an optimal solution
    if status != pulp.LpStatusOptimal:
        print('Only {} feasible lineups produced'.format(len(lineups)), '\n')
        return None
    lineup_copy = []
    for i in range(num_all):
        if players_lineup[i].varValue >= 0.9 and players_lineup[i].varValue <= 1.1:
            lineup_copy.append(1)
        else:
            lineup_copy.append(0)

    return lineup_copy

In [268]:
# function to tranform lineups resulting from PULP optimization
def transform_lineup(lineup, lineup_num):
    positions, names, ids, salaries = [], [], [], []
    game_infos, teams, ppgs, ranks = [], [], [], []
    predictions = []
    pos_C = []
    pos_W = []
    pos_D = []
    pos_G = []

    players_lineup = lineup[:len(lineup)]
    for num, player in enumerate(players_lineup):
        if player == 1:
            positions.append(filter_df.loc[num, 'Position'])
            names.append(filter_df.loc[num, 'Nickname'])
            ids.append(filter_df.loc[num, 'Id'])
            salaries.append(filter_df.loc[num, 'Salary'])
            game_infos.append(filter_df.loc[num, 'Game'])
            teams.append(filter_df.loc[num, 'Team_x'])
            ppgs.append(filter_df.loc[num, 'FPPG'])
            ranks.append(filter_df.loc[num, 'PredictRank_LGBM'])
            predictions.append(filter_df.loc[num, 'Prediction_LGBM'])
            pos_C.append(filter_df.loc[num, 'Pos_C']),
            pos_W.append(filter_df.loc[num, 'Pos_W']),
            pos_D.append(filter_df.loc[num, 'Pos_D']),
            pos_G.append(filter_df.loc[num, 'Pos_G']),
            
    df = pd.DataFrame({'Position':positions,'Name':names, 'ID':ids, 'Salary':salaries, 
                       'Game Info':game_infos,'TeamAbbrev':teams,'FPPG':ppgs,
                       'Rank':ranks,'LineupNum':lineup_num, 'Prediction':predictions,
                       'Pos_C':pos_C, 'Pos_W':pos_W, 'Pos_D':pos_D, 'Pos_G':pos_G,})
    return df

In [269]:
csv_dfs = []
# function to display transformed lineups optimized by PULP
def gen_lineups(df, num_lineups, overlap, player_names = [0], nonplayer_names = [0]):
    lineup_list = []
    for i in range(num_lineups):
        lineup_list.append(pred_lineup(df, lineup_list, overlap, player_names, nonplayer_names))
    print('Number of generated lineups:', len(lineup_list))
    
    for i in range(len(lineup_list)):
        lineup_df = transform_lineup(lineup_list[i], i+1)
        lineup_df['FD_ID'] = lineup_df['ID'] + ':' + lineup_df['Name']
        
        # custom sort for lineup
        sort_dict = {'C': 0, 'W': 1, 'D': 2, 'G': 3} 
        display(lineup_df.sort_values(by=['Position'], key=lambda x: x.map(sort_dict)).iloc[:, :10])
        print('salary sum:', lineup_df['Salary'].sum(), '\nlineup FPPG mean:', lineup_df['FPPG'].mean(), '\nlineup Rank mean:', lineup_df['Rank'].mean(), '\nlineup Predicted FP:', lineup_df['Prediction'].sum())
        
        v_counts = lineup_df['Position'].value_counts()
#         display(v_counts)
        values = lineup_df['Position'].value_counts().keys().tolist()
        counts = lineup_df['Position'].value_counts().tolist()
#         print(counts)

In [270]:
# lineup generation (don't enter player name as 4th arg. (which should be a list) unless specific player is desired)
gen_lineups(filter_df, 5, 3, [],[])

Number of generated lineups: 5


Unnamed: 0,Position,Name,ID,Salary,Game Info,TeamAbbrev,FPPG,Rank,LineupNum,Prediction
0,C,Evgeni Malkin,86478-8595,7400,SJ@PIT,PIT,14.745833,5.0,1,16.92666
1,C,Barrett Hayton,86478-97108,4100,ARI@ANH,ARI,7.012245,56.0,1,7.893851
2,C,Pierre-Luc Dubois,86478-70901,7900,PHI@WPG,WPG,15.892,4.0,1,18.079436
3,W,Alex DeBrincat,86478-73885,7400,MON@OTT,OTT,13.991666,7.0,1,17.725675
4,W,Frank Vatrano,86478-61549,6000,ARI@ANH,ANA,10.691837,15.0,1,15.971384
5,W,Dylan Guenther,86478-161390,4000,ARI@ANH,ARI,7.74375,66.0,1,8.127628
6,D,Aaron Ekblad,86478-55607,6700,BOS@FLA,FLA,12.08,1.0,1,14.222054
7,D,Travis Hamonic,86478-9036,4000,MON@OTT,OTT,7.158333,42.0,1,7.782216
8,G,Ilya Sorokin,86478-97232,7500,VGK@NYI,NYI,17.760001,3.0,1,15.896049


salary sum: 55000 
lineup FPPG mean: 11.897296163958307 
lineup Rank mean: 22.11111111111111 
lineup Predicted FP: 122.62495324518673


Unnamed: 0,Position,Name,ID,Salary,Game Info,TeamAbbrev,FPPG,Rank,LineupNum,Prediction
0,C,Evgeni Malkin,86478-8595,7400,SJ@PIT,PIT,14.745833,5.0,2,16.92666
1,C,Mark Scheifele,86478-15321,7400,PHI@WPG,WPG,14.438,8.0,2,15.98592
2,C,Pavel Zacha,86478-65970,4700,BOS@FLA,BOS,9.15625,44.0,2,9.436556
3,W,Alex DeBrincat,86478-73885,7400,MON@OTT,OTT,13.991666,7.0,2,17.725675
4,W,Frank Vatrano,86478-61549,6000,ARI@ANH,ANA,10.691837,15.0,2,15.971384
5,W,Oliver Bjorkstrand,86478-42773,5600,CBJ@SEA,SEA,10.047916,29.0,2,12.427158
6,D,Scott Mayfield,86478-39261,4300,VGK@NYI,NYI,8.203921,39.0,2,8.117463
7,D,Adam Pelech,86478-53866,4100,VGK@NYI,NYI,7.88,50.0,2,7.217531
8,G,Connor Hellebuyck,86478-58909,8100,PHI@WPG,WPG,21.210257,1.0,2,16.987146


salary sum: 55000 
lineup FPPG mean: 12.262853482154746 
lineup Rank mean: 22.0 
lineup Predicted FP: 120.79549369276694


Unnamed: 0,Position,Name,ID,Salary,Game Info,TeamAbbrev,FPPG,Rank,LineupNum,Prediction
1,C,Max Domi,86478-41592,5700,CHI@EDM,CHI,11.144681,26.0,3,11.787201
2,C,Mason McTavish,86478-159629,5100,ARI@ANH,ANA,10.08125,35.0,3,10.568538
3,C,Pierre-Luc Dubois,86478-70901,7900,PHI@WPG,WPG,15.892,4.0,3,18.079436
0,W,Anders Lee,86478-39238,6700,VGK@NYI,NYI,12.323529,12.0,3,14.665351
4,W,Drake Batherson,86478-82190,7200,MON@OTT,OTT,13.702084,11.0,3,16.298494
5,W,Frank Vatrano,86478-61549,6000,ARI@ANH,ANA,10.691837,15.0,3,15.971384
6,D,Scott Mayfield,86478-39261,4300,VGK@NYI,NYI,8.203921,39.0,3,8.117463
7,D,Travis Hamonic,86478-9036,4000,MON@OTT,OTT,7.158333,42.0,3,7.782216
8,G,Connor Hellebuyck,86478-58909,8100,PHI@WPG,WPG,21.210257,1.0,3,16.987146


salary sum: 55000 
lineup FPPG mean: 12.267543537883443 
lineup Rank mean: 20.555555555555557 
lineup Predicted FP: 120.25722898779699


Unnamed: 0,Position,Name,ID,Salary,Game Info,TeamAbbrev,FPPG,Rank,LineupNum,Prediction
0,C,Yanni Gourde,86478-53606,5300,CBJ@SEA,SEA,9.574468,34.0,4,10.762816
1,C,Mason McTavish,86478-159629,5100,ARI@ANH,ANA,10.08125,35.0,4,10.568538
2,C,Pavel Zacha,86478-65970,4700,BOS@FLA,BOS,9.15625,44.0,4,9.436556
3,W,Nikita Kucherov,86478-43214,10200,LA@TB,TB,20.240425,1.0,4,23.667369
4,W,Alex DeBrincat,86478-73885,7400,MON@OTT,OTT,13.991666,7.0,4,17.725675
5,W,Frank Vatrano,86478-61549,6000,ARI@ANH,ANA,10.691837,15.0,4,15.971384
6,D,Samuel Girard,86478-73909,4400,STL@COL,COL,7.970731,36.0,4,8.317428
7,D,Ryan Pulock,86478-43200,4400,VGK@NYI,NYI,7.631373,43.0,4,7.728635
8,G,Ilya Sorokin,86478-97232,7500,VGK@NYI,NYI,17.760001,3.0,4,15.896049


salary sum: 55000 
lineup FPPG mean: 11.899777936603373 
lineup Rank mean: 24.22222222222222 
lineup Predicted FP: 120.07445061100057


Unnamed: 0,Position,Name,ID,Salary,Game Info,TeamAbbrev,FPPG,Rank,LineupNum,Prediction
0,C,Evgeni Malkin,86478-8595,7400,SJ@PIT,PIT,14.745833,5.0,5,16.92666
1,C,Mason McTavish,86478-159629,5100,ARI@ANH,ANA,10.08125,35.0,5,10.568538
2,C,Brayden Schenn,86478-9266,5200,STL@COL,STL,10.410204,38.0,5,10.390733
3,W,Drake Batherson,86478-82190,7200,MON@OTT,OTT,13.702084,11.0,5,16.298494
4,W,Frank Vatrano,86478-61549,6000,ARI@ANH,ANA,10.691837,15.0,5,15.971384
5,W,Oliver Bjorkstrand,86478-42773,5600,CBJ@SEA,SEA,10.047916,29.0,5,12.427158
6,D,Aaron Ekblad,86478-55607,6700,BOS@FLA,FLA,12.08,1.0,5,14.222054
7,D,Samuel Girard,86478-73909,4400,STL@COL,COL,7.970731,36.0,5,8.317428
8,G,Logan Thompson,86478-97093,7400,VGK@NYI,VEG,18.057143,5.0,5,14.369261


salary sum: 55000 
lineup FPPG mean: 11.976333218100121 
lineup Rank mean: 19.444444444444443 
lineup Predicted FP: 119.49171141126574


In [256]:
filter_df[['Nickname','Salary', 'FPPG', 'Prediction_LGBM']]

Unnamed: 0,Nickname,Salary,FPPG,Prediction_LGBM
0,Connor McDavid,10600,25.771430,20.503430
1,Nathan MacKinnon,10300,22.730555,19.098275
2,Steven Stamkos,8900,18.514894,17.924714
3,Evgeni Malkin,7400,14.745833,16.926660
4,Ryan Nugent-Hopkins,8400,16.028572,16.465592
...,...,...,...,...
464,Anthony Stolarz,7600,8.800000,5.470033
465,Filip Gustavsson,8000,16.872728,5.269154
466,Kaapo Kahkonen,6800,7.733333,4.772807
467,Adin Hill,7400,13.790476,4.610524


## Single Game Lineups

In [296]:
import os
import pickle
import numpy as np
import pandas as pd

pred_dir = 'Predictions/'
roster_file = 'rosters/BOS_FLA.csv'

In [297]:
# read in model predictions
c_preds = pd.read_csv(pred_dir + 'C_Predictions_LGBM_'+str(curr_date)+'.csv')
w_preds = pd.read_csv(pred_dir + 'W_Predictions_LGBM_'+str(curr_date)+'.csv')
d_preds = pd.read_csv(pred_dir + 'D_Predictions_LGBM_'+str(curr_date)+'.csv')
g_preds = pd.read_csv(pred_dir + 'G_Predictions_LGBM_'+str(curr_date)+'.csv')

# concat predictions to a single dataframe
all_preds = pd.concat([c_preds, w_preds, d_preds, g_preds]).reset_index(drop = True)
all_preds = all_preds.drop(['Unnamed: 0'], axis = 1)

In [298]:
all_preds = all_preds.rename(columns = {'Name' : 'Nickname'})

# fix names with accent marks/suffixes
import unidecode

for i in range(len(all_preds)):
    # accent mark fix
    all_preds.iloc[i, 2] = unidecode.unidecode(all_preds.iloc[i, 2])
    # drop Jrs.
    if all_preds.iloc[i, 2][-3:] == 'Jr.' or all_preds.iloc[i, 2][-2:] == 'IV':
        all_preds.iloc[i, 2] = all_preds.iloc[i, 2][:-3]

# read in fanduel salaries
fd_df = pd.read_csv(roster_file)

In [299]:
# merge predictions and FD df so salaries are available
all_preds_merged = pd.merge(all_preds, fd_df, on='Nickname')

# drop players w/ 0 FPPG
all_preds_merged = all_preds_merged[(all_preds_merged['FPPG'] != 0)].reset_index(drop = True)

# keep players w/ > 2.1 FPPG
all_preds_merged = all_preds_merged[(all_preds_merged['FPPG'] >= 2.1)].reset_index(drop = True)

# drop injured players
all_preds_merged = all_preds_merged[(all_preds_merged['Injury Indicator'] != 'O')].reset_index(drop = True)
all_preds_merged = all_preds_merged[(all_preds_merged['Injury Indicator'] != 'IR')].reset_index(drop = True)
# all_preds_merged = all_preds_merged[(all_preds_merged['Injury Indicator'] != 'NA')].reset_index(drop = True)
# all_preds_merged = all_preds_merged[(all_preds_merged['Injury Indicator'] != 'GTD')].reset_index(drop = True)

# drop players based on how many games they've played
# makes sure that players who don't play aren't considered
all_preds_merged = all_preds_merged[(all_preds_merged.Played >= all_preds_merged.Played.quantile(.2))].reset_index(drop = True)

# drop players based on salary cutoffs
# makes sure that bad players are excluded for each position
all_preds_merged = all_preds_merged[(all_preds_merged.Salary >= 6500)].reset_index(drop = True)

# drop unnamed columns originating from fd_df
cols = all_preds_merged.columns
for col in cols:
    if 'Unnamed'in col:
        all_preds_merged.drop(col, inplace = True, axis = 1)

In [300]:
all_preds_merged

Unnamed: 0,Season,Date,Nickname,Team_x,Opponent_x,Prediction_LGBM,PredictRank_LGBM,Id,Position,First Name,...,FPPG,Played,Salary,Game,Team_y,Opponent_y,Injury Indicator,Injury Details,Tier,Roster Position
0,2022,20230128,Sam Reinhart,FLA,BOS,13.298571,16.0,86482-55375,W,Sam,...,12.354901,51.0,8500,BOS@FLA,FLA,BOS,,,,Captain - 1.5x Pts/UTIL
1,2022,20230128,Aleksander Barkov,FLA,BOS,12.985959,18.0,86482-41393,C,Aleksander,...,15.973171,41.0,12500,BOS@FLA,FLA,BOS,,,,Captain - 1.5x Pts/UTIL
2,2022,20230128,Patrice Bergeron,BOS,FLA,12.74994,19.0,86482-8175,C,Patrice,...,14.802083,48.0,14000,BOS@FLA,BOS,FLA,,,,Captain - 1.5x Pts/UTIL
3,2022,20230128,Carter Verhaeghe,FLA,BOS,12.092941,23.0,86482-43028,W,Carter,...,14.65,50.0,11500,BOS@FLA,FLA,BOS,,,,Captain - 1.5x Pts/UTIL
4,2022,20230128,David Krejci,BOS,FLA,11.769493,27.0,86482-8176,C,David,...,11.931818,44.0,9000,BOS@FLA,BOS,FLA,,,,Captain - 1.5x Pts/UTIL
5,2022,20230128,Pavel Zacha,BOS,FLA,9.436556,44.0,86482-65970,C,Pavel,...,9.15625,48.0,7500,BOS@FLA,BOS,FLA,,,,Captain - 1.5x Pts/UTIL
6,2022,20230128,Charlie Coyle,BOS,FLA,7.872801,57.0,86482-16969,C,Charlie,...,9.316667,48.0,8000,BOS@FLA,BOS,FLA,,,,Captain - 1.5x Pts/UTIL
7,2022,20230128,Anton Lundell,FLA,BOS,7.780734,59.0,86482-152882,C,Anton,...,9.507143,42.0,8000,BOS@FLA,FLA,BOS,,,,Captain - 1.5x Pts/UTIL
8,2022,20230128,Trent Frederic,BOS,FLA,6.841376,78.0,86482-70927,C,Trent,...,7.288889,45.0,7500,BOS@FLA,BOS,FLA,,,,Captain - 1.5x Pts/UTIL
9,2022,20230128,Sam Bennett,FLA,BOS,6.789653,79.0,86482-58332,C,Sam,...,11.52,50.0,8500,BOS@FLA,FLA,BOS,,,,Captain - 1.5x Pts/UTIL


In [301]:
# players by position
all_preds_merged['Position'].value_counts()

D    12
C    10
W     9
Name: Position, dtype: int64

In [302]:
# filter_df for pulp
filter_df = all_preds_merged.copy()
filter_df['MaxUsed'] = 0
filter_df['Pos_C'] = np.where(filter_df['Position']=='C', 1, 0)
filter_df['Pos_W'] = np.where(filter_df['Position']=='W', 1, 0)
filter_df['Pos_D'] = np.where(filter_df['Position']=='D', 1, 0)
filter_df['Pos_G'] = np.where(filter_df['Position']=='G', 1, 0)
filter_df['Pos_All'] = 1

In [303]:
# ensure players are from different teams (at least 1 player from each team)
teams = filter_df['Team_x'].value_counts().index.tolist()
filter_df['Team_1'] = np.where((filter_df['Team_x']==teams[0]), 1, 0)
filter_df['Team_2'] = np.where((filter_df['Team_x']==teams[1]), 1, 0)

In [304]:
encoded = pd.get_dummies(filter_df['Nickname']) #<-- One-Hot Encoding 

In [305]:
filter_df = filter_df.join(encoded) #<-- joining it to the raw_data table

In [306]:
encoded.columns.tolist()

['Aaron Ekblad',
 'Aleksander Barkov',
 'Anton Lundell',
 'Brad Marchand',
 'Brandon Carlo',
 'Brandon Montour',
 'Carter Verhaeghe',
 'Charlie Coyle',
 'Charlie McAvoy',
 'Colin White',
 'Connor Clifton',
 'David Krejci',
 'David Pastrnak',
 'Derek Forbort',
 'Eetu Luostarinen',
 'Eric Staal',
 'Gustav Forsling',
 'Hampus Lindholm',
 'Josh Mahura',
 'Marc Staal',
 'Matt Grzelcyk',
 'Matthew Tkachuk',
 'Nick Cousins',
 'Patrice Bergeron',
 'Pavel Zacha',
 'Radko Gudas',
 'Ryan Lomberg',
 'Sam Bennett',
 'Sam Reinhart',
 'Taylor Hall',
 'Trent Frederic']

In [311]:
import pulp

def pred_lineup_singlegame(df, lineups, overlap, player_names = [0], nonplayer_names = [0]):
    #Initialize our PuLP problem, we choose pulp.LpMinimize because we are going to want the lowest ranked players numerically. (1st is better than 2nd, technically smaller though)
    prob = pulp.LpProblem('NHL_DK', pulp.LpMaximize)
    
    #Count How Many Different Players We Have (Including Team D)
    num_all = df.shape[0]
    
    #Create Salary Cap Variable
    salary_cap = 55000
    
    #Create List Of PuLP player variables
    players_lineup = [pulp.LpVariable("Player_{}".format(i+1), cat="Binary") for i in range(num_all)]
    
    #Tell our PuLP solver that we want our lineup to have our prediction field to be maximized
    prob += pulp.lpSum((pulp.lpSum(df.loc[i, 'Prediction_LGBM'] * players_lineup[i] for i in range(num_all))))
    
    # Make sure only no goalies are selected
    prob += ((pulp.lpSum(df.loc[i, 'Pos_G'] * players_lineup[i] for i in range(num_all)))  == 0)
    
    #Make Sure To Select 9 Players Total
    prob += ((pulp.lpSum(df.loc[i, 'Pos_All'] * players_lineup[i] for i in range(num_all))) == 5)
    
    #add the salary constraint
    prob += ((pulp.lpSum(df.loc[i, 'Salary'] * players_lineup[i] for i in range(num_all))) <= salary_cap)
    
    #Add Our MaxUsed Constraint
    prob += ((pulp.lpSum(df.loc[i, 'MaxUsed'] * players_lineup[i] for i in range(num_all))) == 0)
    
    #Add constraints for teams so at least 1 player from each team is selected
    prob += ((pulp.lpSum(df.loc[i, 'Team_1'] * players_lineup[i] for i in range(num_all))) <= 4)
    prob += ((pulp.lpSum(df.loc[i, 'Team_2'] * players_lineup[i] for i in range(num_all))) <= 4)
    
    #select specific player, if indicated
    if player_names != [0]:
        for player_name in player_names:
            prob += ((pulp.lpSum(df.loc[i, player_name] * players_lineup[i] for i in range(num_all)))  == 1)
            
    #remove specific player from consideration, if indicated
    if nonplayer_names != [0]:
        for nonplayer_name in nonplayer_names:
            prob += ((pulp.lpSum(df.loc[i, nonplayer_name] * players_lineup[i] for i in range(num_all)))  == 0)
    
    # add constraint to select only 1 of each player
    for player in encoded.columns.tolist():
        prob += ((pulp.lpSum(df.loc[i, player] * players_lineup[i] for i in range(num_all))) <= 1)
    
    #variance constraints - each lineup can't have more than the num overlap of any combination of players in any previous lineups
    for i in range(len(lineups)):
        prob += ((pulp.lpSum(lineups[i][k] * players_lineup[k] for k in range(num_all))) <= overlap)
        
    #solve the problem
    status = prob.solve()
    
    #check if the optimizer found an optimal solution
    if status != pulp.LpStatusOptimal:
        print('Only {} feasible lineups produced'.format(len(lineups)), '\n')
        return None
    lineup_copy = []
    for i in range(num_all):
        if players_lineup[i].varValue >= 0.9 and players_lineup[i].varValue <= 1.1:
            lineup_copy.append(1)
        else:
            lineup_copy.append(0)

    return lineup_copy

In [312]:
# function to tranform lineups resulting from PULP optimization
def transform_lineup(lineup, lineup_num):
    positions, names, ids, salaries = [], [], [], []
    game_infos, teams, ppgs, ranks = [], [], [], []
    predictions = []
    pos_C = []
    pos_W = []
    pos_D = []
    pos_G = []

    players_lineup = lineup[:len(lineup)]
    for num, player in enumerate(players_lineup):
        if player == 1:
            positions.append(filter_df.loc[num, 'Position'])
            names.append(filter_df.loc[num, 'Nickname'])
            ids.append(filter_df.loc[num, 'Id'])
            salaries.append(filter_df.loc[num, 'Salary'])
            game_infos.append(filter_df.loc[num, 'Game'])
            teams.append(filter_df.loc[num, 'Team_x'])
            ppgs.append(filter_df.loc[num, 'FPPG'])
            ranks.append(filter_df.loc[num, 'PredictRank_LGBM'])
            predictions.append(filter_df.loc[num, 'Prediction_LGBM'])
            pos_C.append(filter_df.loc[num, 'Pos_C']),
            pos_W.append(filter_df.loc[num, 'Pos_W']),
            pos_D.append(filter_df.loc[num, 'Pos_D']),
            pos_G.append(filter_df.loc[num, 'Pos_G']),
            
    df = pd.DataFrame({'Position':positions,'Name':names, 'ID':ids, 'Salary':salaries, 
                       'Game Info':game_infos,'TeamAbbrev':teams,'FPPG':ppgs,
                       'Rank':ranks,'LineupNum':lineup_num, 'Prediction':predictions,
                       'Pos_C':pos_C, 'Pos_W':pos_W, 'Pos_D':pos_D, 'Pos_G':pos_G,})
    return df

In [317]:
csv_dfs = []
# function to display transformed lineups optimized by PULP
def gen_lineups_singlegame(df, num_lineups, overlap, player_names = [0], nonplayer_names = [0]):
    lineup_list = []
    for i in range(num_lineups):
        lineup_list.append(pred_lineup_singlegame(df, lineup_list, overlap, player_names, nonplayer_names))
    print('Number of generated lineups:', len(lineup_list))
    
    for i in range(len(lineup_list)):
        lineup_df = transform_lineup(lineup_list[i], i+1)
        lineup_df['FD_ID'] = lineup_df['ID'] + ':' + lineup_df['Name']
        
        # custom sort for lineup
        sort_dict = {'C': 0, 'W': 1, 'D': 2, 'G': 3} 
        display(lineup_df[['Name', 'Salary', 'FPPG', 'TeamAbbrev', 'Prediction']].sort_values(by=['Prediction'], ascending = False))
        
        print('salary sum:', lineup_df['Salary'].sum(), '\nlineup FPPG mean:', lineup_df['FPPG'].mean(), '\nlineup Rank mean:', lineup_df['Rank'].mean(), '\nlineup Predicted FP:', lineup_df['Prediction'].sum())
        v_counts = lineup_df['Position'].value_counts()
        values = lineup_df['Position'].value_counts().keys().tolist()
        counts = lineup_df['Position'].value_counts().tolist()

In [318]:
# lineup generation (don't enter player name as 4th arg. (which should be a list) unless specific player is desired)
gen_lineups_singlegame(filter_df, 5, 3, [],[])

Number of generated lineups: 5


Unnamed: 0,Name,Salary,FPPG,TeamAbbrev,Prediction
2,Matthew Tkachuk,14500,19.808333,FLA,17.784324
3,Brad Marchand,13500,15.539024,BOS,14.713951
4,Aaron Ekblad,9000,12.08,FLA,14.222054
0,Sam Reinhart,8500,12.354901,FLA,13.298571
1,David Krejci,9000,11.931818,BOS,11.769493


salary sum: 54500 
lineup FPPG mean: 14.342815368574062 
lineup Rank mean: 13.8 
lineup Predicted FP: 71.78839282186675


Unnamed: 0,Name,Salary,FPPG,TeamAbbrev,Prediction
2,Matthew Tkachuk,14500,19.808333,FLA,17.784324
3,David Pastrnak,15500,23.341667,BOS,16.175625
4,Aaron Ekblad,9000,12.08,FLA,14.222054
0,Sam Reinhart,8500,12.354901,FLA,13.298571
1,Pavel Zacha,7500,9.15625,BOS,9.436556


salary sum: 55000 
lineup FPPG mean: 15.348230408313231 
lineup Rank mean: 16.0 
lineup Predicted FP: 70.9171294213786


Unnamed: 0,Name,Salary,FPPG,TeamAbbrev,Prediction
3,David Pastrnak,15500,23.341667,BOS,16.175625
4,Aaron Ekblad,9000,12.08,FLA,14.222054
0,Sam Reinhart,8500,12.354901,FLA,13.298571
1,Aleksander Barkov,12500,15.973171,FLA,12.985959
2,David Krejci,9000,11.931818,BOS,11.769493


salary sum: 54500 
lineup FPPG mean: 15.136311694307278 
lineup Rank mean: 15.0 
lineup Predicted FP: 68.45170122924553


Unnamed: 0,Name,Salary,FPPG,TeamAbbrev,Prediction
1,Matthew Tkachuk,14500,19.808333,FLA,17.784324
2,Brad Marchand,13500,15.539024,BOS,14.713951
3,Aaron Ekblad,9000,12.08,FLA,14.222054
4,Brandon Montour,10500,14.044898,FLA,12.13474
0,Pavel Zacha,7500,9.15625,BOS,9.436556


salary sum: 55000 
lineup FPPG mean: 14.125701077456338 
lineup Rank mean: 15.4 
lineup Predicted FP: 68.29162423005475


Unnamed: 0,Name,Salary,FPPG,TeamAbbrev,Prediction
2,Matthew Tkachuk,14500,19.808333,FLA,17.784324
3,Aaron Ekblad,9000,12.08,FLA,14.222054
0,Sam Reinhart,8500,12.354901,FLA,13.298571
1,Aleksander Barkov,12500,15.973171,FLA,12.985959
4,Hampus Lindholm,9500,11.293749,BOS,9.9119


salary sum: 54000 
lineup FPPG mean: 14.302031136963825 
lineup Rank mean: 12.0 
lineup Predicted FP: 68.20280758991744


In [256]:
filter_df[['Nickname','Salary', 'FPPG', 'Prediction_LGBM']]

Unnamed: 0,Nickname,Salary,FPPG,Prediction_LGBM
0,Connor McDavid,10600,25.771430,20.503430
1,Nathan MacKinnon,10300,22.730555,19.098275
2,Steven Stamkos,8900,18.514894,17.924714
3,Evgeni Malkin,7400,14.745833,16.926660
4,Ryan Nugent-Hopkins,8400,16.028572,16.465592
...,...,...,...,...
464,Anthony Stolarz,7600,8.800000,5.470033
465,Filip Gustavsson,8000,16.872728,5.269154
466,Kaapo Kahkonen,6800,7.733333,4.772807
467,Adin Hill,7400,13.790476,4.610524
