In [1]:
#Import block
import os
import sys
import numpy as np
import pandas as pd
import my_module as mx
from sklearn.neural_network import MLPRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error



In [2]:
#The current working directory is the main repository directory; these lines set the path to where the data is
path = os.getcwd()
data_path = path + '\\data'

#This array makes it easier to format the rotowire data
rw_labels = ["name", "Team", "Pos", "Games", "Goals", "Assists", "Pts", "+/-", "PIM", "SOG", "GWG", "PP_Goals", "PP_Assists", "SH_Goals", "SH_Assists", "Hits", "Blocked_Shots"]

#This is the breakdown of how many fantasy points a player gets for each category
points_dict = {"Goals":5, "Assists":3, "+/-":1.5, "PIM":-0.25, "PP_Goals":4, "PP_Assists":2, "SH_Goals":6, "SH_Assists":4, "Faceoffs_Won":0.25, "Faceoffs_Lost":-0.15, "Hits":0.5, "Blocked_Shots":0.75 }


In [3]:
#I have data from the 2010-2011 season through the 2021-2022 season.
#By the end of this block, there will be 12 seasons-worth of data in the "data" variable
yearly_player_data = []

for i in range(2010, 2022):
    new_data = []
    
    #Imports the rotowire and moneypuck datasets from the selected year into rdf and mdf
    rdf = pd.read_csv(data_path + '\\rotowire_data\\rotowire{}.csv'.format(str(i)))
    mdf = pd.read_csv(data_path + '\\moneypuck_data\\moneypuck{}.csv'.format(str(i)))
    
    #Formats the rotowire data
    rdf.set_axis(rw_labels, axis=1, inplace=True)
    rdf.drop(index=rdf.index[0], axis=0, inplace=True)
    
    #The Moneypuck data has information about 5-on-5, 5-on-4, 4-on-5, other, and all.
    #For this project I'm just focused on "all" since I suspect it'll give me the best results.
    mdf = mdf[mdf["situation"] == "all"]
    
    #Merges the rotowire and moneypuck dataframes
    new_data = pd.merge(rdf, mdf, on="name")
    
    #Changes the name of a few columns in the new dataframe
    new_data = new_data.rename(columns={"name":"Name","faceoffsWon":"Faceoffs_Won","faceoffsLost":"Faceoffs_Lost"})
    
    #This section calculates each player's total fantasy output for that year
    cols = new_data.columns
    fant_points = [0 for i in range(len(new_data))]
    for i in range(len(new_data)):
        for j in range(len(new_data.iloc[i])):
            mult = points_dict.get(cols[j], 0)
            if mult != 0:
                fant_points[i] += mult*int(new_data.iloc[i, j])
    
    #Adds the players' fantasy points to the new_data dataframe
    new_data["Fantasy_Points"] = fant_points
    
    #Adds new_data to the "data" array
    yearly_player_data.append(new_data)
                
    



In [4]:
ml_data_one_year = pd.DataFrame()
ml_data_two_year = pd.DataFrame()
ml_data_three_year = pd.DataFrame()
for i in range(2011, 2022):
    arr = [yearly_player_data[i-2011]]
    points_df = yearly_player_data[i-2010]
    temp = mx.merge_dataframes(arr, points_df)
    ml_data_one_year = pd.concat([ml_data_one_year, temp], ignore_index=True)
    
for i in range(2012, 2022):
    arr = [yearly_player_data[i-2012], yearly_player_data[i-2011]]
    points_df = yearly_player_data[i-2010]
    temp = mx.merge_dataframes(arr, points_df)
    ml_data_two_year = pd.concat([ml_data_two_year, temp], ignore_index=True)
    
for i in range(2013, 2022):
    arr = [yearly_player_data[i-2013], yearly_player_data[i-2012], yearly_player_data[i-2011]]
    points_df = yearly_player_data[i-2010]
    temp = mx.merge_dataframes(arr, points_df)
    ml_data_three_year = pd.concat([ml_data_three_year, temp], ignore_index=True)



ONE YEAR:

In [5]:
arr = mx.separate_fantasy_points(ml_data_one_year)
X = mx.reformat_df(arr[0])
y = arr[1]

X_train_one, X_test_one, y_train_one, y_test_one = train_test_split(X, y, random_state=1)

one_year_regr = MLPRegressor().fit(X_train_one, y_train_one)


TWO YEAR:

In [6]:
arr = mx.separate_fantasy_points(ml_data_two_year)
X = mx.reformat_df(arr[0])
y = arr[1]

X_train_two, X_test_two, y_train_two, y_test_two = train_test_split(X, y, random_state=1)

two_year_regr = MLPRegressor(max_iter=500).fit(X_train_two, y_train_two)



THREE YEAR:

In [7]:
arr = mx.separate_fantasy_points(ml_data_three_year)
X = mx.reformat_df(arr[0])
y = arr[1]

X_train_three, X_test_three, y_train_three, y_test_three = train_test_split(X, y, random_state=1)

three_year_regr = MLPRegressor(max_iter=500).fit(X_train_three, y_train_three)



ANALYSIS:

In [8]:
y_pred = one_year_regr.predict(X_test_one)

print(mean_absolute_error(y_test_one, y_pred))

y_pred = two_year_regr.predict(X_test_two)

print(mean_absolute_error(y_test_two, y_pred))

y_pred = three_year_regr.predict(X_test_three)

print(mean_absolute_error(y_test_three, y_pred))

65.38579285265222
71.29476648261131
69.35447554898951


PREDICTIONS:

In [9]:
one_year_pred = yearly_player_data[11].copy()
one_year_pred.drop(columns=["Fantasy_Points"], inplace=True)
one_year_df = mx.get_name_predictions(one_year_regr, one_year_pred)
one_year_df.sort_values(by="Prediction", ascending=False, inplace=True)
#pd.options.display.max_rows = 978
display(one_year_df)


Unnamed: 0,Name,Prediction
65,Auston Matthews,456.198174
4,Cale Makar,440.052128
86,Leon Draisaitl,439.003532
53,Nathan MacKinnon,436.302886
100,J.T. Miller,429.353726
...,...,...
407,Walker Duehr,-7.261526
497,Brayden Tracey,-12.897283
437,Cameron Hillis,-13.887496
410,Bryce Kindopp,-17.336787


In [10]:
two_year_pred = [yearly_player_data[i] for i in [10,11]]
two_year_pred = mx.merge_dataframes(two_year_pred, yearly_player_data[11])
two_year_pred.drop(columns=["Predicted_Fantasy_Points"], inplace=True)
two_year_df = mx.get_name_predictions(two_year_regr, two_year_pred)
two_year_df.sort_values(by="Prediction", ascending=False, inplace=True)
#pd.options.display.max_rows = 978
display(two_year_df)

Unnamed: 0,Name,Prediction
304,Cale Makar,356.103463
256,Pierre-Luc Dubois,345.509333
166,Roope Hintz,336.640401
105,Elias Lindholm,331.776961
0,Auston Matthews,323.692259
...,...,...
769,Tyce Thompson,-4.730549
822,Jacob Bernard-Docker,-6.873640
803,Glenn Gawdin,-7.887080
778,Tanner Laczynski,-13.223707


In [11]:
three_year_pred = [yearly_player_data[i] for i in [9,10,11]]
three_year_pred = mx.merge_dataframes(three_year_pred, yearly_player_data[11])
three_year_pred.drop(columns=["Predicted_Fantasy_Points"], inplace=True)
three_year_df = mx.get_name_predictions(three_year_regr, three_year_pred)
three_year_df.sort_values(by="Prediction", ascending=False, inplace=True)
#pd.options.display.max_rows = 978
display(three_year_df)

Unnamed: 0,Name,Prediction
87,J.T. Miller,360.902401
79,Elias Lindholm,352.146078
2,Auston Matthews,347.781535
4,Mika Zibanejad,338.293757
3,Leon Draisaitl,332.351712
...,...,...
673,Sami Niku,-0.668440
575,Taro Hirose,-1.123760
645,Matthew Peca,-1.365058
705,Rhett Gardner,-3.187181
