# References

In [1]:
# References

# week11 - Demo_async (functional vs sequential API)
# week06 - FFNN_DiabeticRetinopathy (building a FFNN)
# week05 - imbalanced_data (standardization)
# week03 - linear_regression (keras linear regression model)

# Import packages and data

In [2]:
# Standard
import pandas as pd
import numpy as np
import random
import os
import math

# TF and Keras
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras import models
from keras import layers
from keras import metrics
from keras import losses

# Plots and images
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import Image
%matplotlib inline
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
import xgboost as xgb
pd.set_option('display.max_columns', None)

# Silence TF
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Setting random seeds
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

2023-12-03 22:02:31.950807: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Google Data Import
Run to access your drive from google colab

from google.colab import drive
drive.mount('/content/drive', force_remount=False)

X_train = pd.read_csv('drive/MyDrive/data/X_train.csv')
Y_train = pd.read_csv('drive/MyDrive/data/Y_train.csv')
X_val = pd.read_csv('drive/MyDrive/data/X_val.csv')
Y_val = pd.read_csv('drive/MyDrive/data/Y_val.csv')

## Local Data Import

In [3]:
X_train = pd.read_csv('data/X_train.csv')
Y_train = pd.read_csv('data/Y_train.csv')
X_val = pd.read_csv('data/X_val.csv')
Y_val = pd.read_csv('data/Y_val.csv')
X_test = pd.read_csv('data/X_test.csv')
Y_test = pd.read_csv('data/Y_test.csv')

In [4]:
X_train.drop(columns=['Unnamed: 0'], inplace=True)
Y_train.drop(columns=['Unnamed: 0'], inplace=True)
X_val.drop(columns=['Unnamed: 0'], inplace=True)
Y_val.drop(columns=['Unnamed: 0'], inplace=True)
X_test.drop(columns=['Unnamed: 0'], inplace=True)
Y_test.drop(columns=['Unnamed: 0'], inplace=True)

In [5]:
# Joining in Team Difficulty Index to refactor the player team name
# Importing difficulty data
difficulty_data = pd.read_csv('data/team_difficulty_ind.csv')

X_train = X_train.merge(difficulty_data, how='left', left_on='player_team_name', right_on='team_name').drop(columns = ['team_name'])
X_val = X_val.merge(difficulty_data, how='left', left_on='player_team_name', right_on='team_name').drop(columns = ['team_name'])
X_test = X_test.merge(difficulty_data, how='left', left_on='player_team_name', right_on='team_name').drop(columns = ['team_name'])

# Renaming the opponent difficulty column
X_train.rename(columns={"FDI": "player_team_ind"}, inplace=True)
X_val.rename(columns={"FDI": "player_team_ind"}, inplace=True)
X_test.rename(columns={"FDI": "player_team_ind"}, inplace=True)

In [6]:
X_train.head()

Unnamed: 0,season,player_team_name,opp_team_name,opp_diff_ind,position,GW,was_home,assists_lag_1,bonus_lag_1,bps_lag_1,clean_sheets_lag_1,creativity_lag_1,goals_conceded_lag_1,goals_scored_lag_1,ict_index_lag_1,influence_lag_1,minutes_lag_1,own_goals_lag_1,penalties_missed_lag_1,penalties_saved_lag_1,red_cards_lag_1,saves_lag_1,selected_lag_1,player_team_score_lag_1,opp_team_score_lag_1,threat_lag_1,total_points_lag_1,value_lag_1,yellow_cards_lag_1,assists_lag_3,bonus_lag_3,bps_lag_3,clean_sheets_lag_3,creativity_lag_3,goals_conceded_lag_3,goals_scored_lag_3,ict_index_lag_3,influence_lag_3,minutes_lag_3,own_goals_lag_3,penalties_missed_lag_3,penalties_saved_lag_3,red_cards_lag_3,saves_lag_3,selected_lag_3,player_team_score_lag_3,opp_team_score_lag_3,threat_lag_3,total_points_lag_3,value_lag_3,yellow_cards_lag_3,assists_lag_5,bonus_lag_5,bps_lag_5,clean_sheets_lag_5,creativity_lag_5,goals_conceded_lag_5,goals_scored_lag_5,ict_index_lag_5,influence_lag_5,minutes_lag_5,own_goals_lag_5,penalties_missed_lag_5,penalties_saved_lag_5,red_cards_lag_5,saves_lag_5,selected_lag_5,player_team_score_lag_5,opp_team_score_lag_5,threat_lag_5,total_points_lag_5,value_lag_5,yellow_cards_lag_5,player_team_ind
0,2018-19,West Ham,Liverpool,4,DEF,1,False,-0.184964,-0.215403,-0.606985,-0.337274,-0.43781,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,-0.406561,-1.049287,-1.053934,-0.391042,-0.532634,-3.367246,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.68483,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,-0.408072,-1.599196,-1.61956,-0.494354,-0.694657,-3.37034,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,-0.409936,-1.808301,-1.899736,-0.529878,-0.752585,-3.373454,-0.430582,2
1,2018-19,West Ham,Bournemouth,1,DEF,2,True,-0.184964,-0.215403,-0.606985,-0.337274,-0.43781,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,-0.158255,-1.049287,2.131566,-0.391042,-0.532634,0.335223,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.68483,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,-0.156925,-1.599196,3.264136,-0.494354,-0.694657,0.332269,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,-0.155985,-1.808301,3.816565,-0.529878,-0.752585,0.32949,-0.430582,2
2,2018-19,West Ham,Arsenal,4,DEF,3,False,-0.184964,-0.215403,-0.606985,-0.337274,-0.43781,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,-0.187225,-0.249288,0.538816,-0.391042,-0.532634,0.335223,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.68483,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,-0.171575,-0.99064,2.043212,-0.494354,-0.694657,0.332269,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,-0.170799,-1.121215,2.38749,-0.529878,-0.752585,0.32949,-0.430582,2
3,2018-19,West Ham,Wolves,2,DEF,4,True,-0.184964,-0.215403,-0.606985,-0.337274,-0.43781,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,-0.240123,-0.249288,1.335191,-0.391042,-0.532634,0.267905,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.68483,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,-0.194293,-0.787788,2.043212,-0.494354,-0.694657,0.309829,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,-0.19377,-0.892187,2.38749,-0.529878,-0.752585,0.307048,-0.430582,2
4,2018-19,West Ham,Everton,2,DEF,5,False,-0.184964,-0.215403,0.433488,-0.337274,2.172931,0.540089,-0.188555,0.665147,0.214342,1.437098,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,-0.26803,-1.049287,-0.257559,-0.391042,-0.130098,0.200588,4.315198,-0.286585,-0.332783,-0.322958,-0.495921,0.542159,-0.215361,-0.279485,-0.186392,-0.379283,-0.051107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,-0.231303,-0.787788,0.822288,-0.494354,-0.520153,0.264949,2.019094,-0.333476,-0.387287,-0.458941,-0.566673,0.287966,-0.368606,-0.316723,-0.33049,-0.494888,-0.267931,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,-0.212392,-1.121215,1.672952,-0.529878,-0.611039,0.278995,1.677694,2


In [7]:
cat_columns = ['GW', 'player_team_ind', 'opp_diff_ind','position', 'was_home']
#dropping season, player_team_name, opp_team_name due to categorical encoding limitations (e.g. Season 2018/19 does not exist in Validation set)
dropped_columns = ['season', 'player_team_name', 'opp_team_name']

X_train_encoded = pd.get_dummies(X_train, columns = cat_columns)
X_train_encoded.drop(columns=dropped_columns, inplace=True)

X_val_encoded = pd.get_dummies(X_val, columns = cat_columns)
X_val_encoded.drop(columns=dropped_columns, inplace=True)

X_test_encoded = pd.get_dummies(X_test, columns = cat_columns)
X_test_encoded.drop(columns=dropped_columns, inplace=True)

In [8]:
X_train_encoded

Unnamed: 0,assists_lag_1,bonus_lag_1,bps_lag_1,clean_sheets_lag_1,creativity_lag_1,goals_conceded_lag_1,goals_scored_lag_1,ict_index_lag_1,influence_lag_1,minutes_lag_1,own_goals_lag_1,penalties_missed_lag_1,penalties_saved_lag_1,red_cards_lag_1,saves_lag_1,selected_lag_1,player_team_score_lag_1,opp_team_score_lag_1,threat_lag_1,total_points_lag_1,value_lag_1,yellow_cards_lag_1,assists_lag_3,bonus_lag_3,bps_lag_3,clean_sheets_lag_3,creativity_lag_3,goals_conceded_lag_3,goals_scored_lag_3,ict_index_lag_3,influence_lag_3,minutes_lag_3,own_goals_lag_3,penalties_missed_lag_3,penalties_saved_lag_3,red_cards_lag_3,saves_lag_3,selected_lag_3,player_team_score_lag_3,opp_team_score_lag_3,threat_lag_3,total_points_lag_3,value_lag_3,yellow_cards_lag_3,assists_lag_5,bonus_lag_5,bps_lag_5,clean_sheets_lag_5,creativity_lag_5,goals_conceded_lag_5,goals_scored_lag_5,ict_index_lag_5,influence_lag_5,minutes_lag_5,own_goals_lag_5,penalties_missed_lag_5,penalties_saved_lag_5,red_cards_lag_5,saves_lag_5,selected_lag_5,player_team_score_lag_5,opp_team_score_lag_5,threat_lag_5,total_points_lag_5,value_lag_5,yellow_cards_lag_5,GW_1,GW_2,GW_3,GW_4,GW_5,GW_6,GW_7,GW_8,GW_9,GW_10,GW_11,GW_12,GW_13,GW_14,GW_15,GW_16,GW_17,GW_18,GW_19,GW_20,GW_21,GW_22,GW_23,GW_24,GW_25,GW_26,GW_27,GW_28,GW_29,GW_30,GW_31,GW_32,GW_33,GW_34,GW_35,GW_36,GW_37,GW_38,player_team_ind_1,player_team_ind_2,player_team_ind_3,player_team_ind_4,opp_diff_ind_1,opp_diff_ind_2,opp_diff_ind_3,opp_diff_ind_4,position_DEF,position_FWD,position_GK,position_GKP,position_MID,was_home_False,was_home_True
0,-0.184964,-0.215403,-0.606985,-0.337274,-0.437810,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,-0.406561,-1.049287,-1.053934,-0.391042,-0.532634,-3.367246,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.684830,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,-0.408072,-1.599196,-1.619560,-0.494354,-0.694657,-3.370340,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586000,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,-0.409936,-1.808301,-1.899736,-0.529878,-0.752585,-3.373454,-0.430582,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,True,False,False,False,False,True,False
1,-0.184964,-0.215403,-0.606985,-0.337274,-0.437810,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,-0.158255,-1.049287,2.131566,-0.391042,-0.532634,0.335223,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.684830,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,-0.156925,-1.599196,3.264136,-0.494354,-0.694657,0.332269,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586000,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,-0.155985,-1.808301,3.816565,-0.529878,-0.752585,0.329490,-0.430582,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,True,False,False,False,False,False,True
2,-0.184964,-0.215403,-0.606985,-0.337274,-0.437810,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,-0.187225,-0.249288,0.538816,-0.391042,-0.532634,0.335223,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.684830,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,-0.171575,-0.990640,2.043212,-0.494354,-0.694657,0.332269,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586000,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,-0.170799,-1.121215,2.387490,-0.529878,-0.752585,0.329490,-0.430582,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,True,False,False,False,False,True,False
3,-0.184964,-0.215403,-0.606985,-0.337274,-0.437810,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,-0.240123,-0.249288,1.335191,-0.391042,-0.532634,0.267905,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.684830,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,-0.194293,-0.787788,2.043212,-0.494354,-0.694657,0.309829,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586000,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,-0.193770,-0.892187,2.387490,-0.529878,-0.752585,0.307048,-0.430582,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,True,False,False,False,False,False,True
4,-0.184964,-0.215403,0.433488,-0.337274,2.172931,0.540089,-0.188555,0.665147,0.214342,1.437098,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,-0.268030,-1.049287,-0.257559,-0.391042,-0.130098,0.200588,4.315198,-0.286585,-0.332783,-0.322958,-0.495921,0.542159,-0.215361,-0.279485,-0.186392,-0.379283,-0.051107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,-0.231303,-0.787788,0.822288,-0.494354,-0.520153,0.264949,2.019094,-0.333476,-0.387287,-0.458941,-0.566673,0.287966,-0.368606,-0.316723,-0.330490,-0.494888,-0.267931,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,-0.212392,-1.121215,1.672952,-0.529878,-0.611039,0.278995,1.677694,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,True,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68786,-0.184964,-0.215403,-0.606985,-0.337274,-0.437810,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,0.264949,0.550711,-0.257559,-0.391042,-0.532634,-0.674541,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.684830,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,0.275431,0.429324,0.415313,-0.494354,-0.694657,-0.677533,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586000,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,0.287310,0.665207,0.386784,-0.529878,-0.752585,-0.680404,-0.430582,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,True,False,False,False,True,False,False,False,True
68787,-0.184964,-0.215403,-0.606985,-0.337274,-0.437810,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,0.257406,-0.249288,1.335191,-0.391042,-0.532634,-0.674541,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.684830,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,0.269612,0.429324,0.822288,-0.494354,-0.694657,-0.677533,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586000,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,0.281039,0.115539,0.958415,-0.529878,-0.752585,-0.680404,-0.430582,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,True,False,False,False,False,True,False,False,False,True
68788,-0.184964,-0.215403,-0.606985,-0.337274,-0.437810,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,0.257406,-1.049287,-1.053934,-0.391042,-0.532634,-0.674541,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.684830,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,0.266037,-0.382084,0.008339,-0.494354,-0.694657,-0.677533,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586000,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,0.276369,-0.159296,0.386784,-0.529878,-0.752585,-0.680404,-0.430582,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,True,False,False,False,False,True,False,False,True,False
68789,-0.184964,-0.215403,-0.606985,-0.337274,-0.437810,-0.508046,-0.188555,-0.562091,-0.555264,-0.785924,-0.038724,-0.029791,-0.025034,-0.044671,-0.150989,0.254330,0.550711,1.335191,-0.391042,-0.532634,-0.674541,-0.231739,-0.286585,-0.332783,-0.750849,-0.495921,-0.548162,-0.684023,-0.279485,-0.684830,-0.704265,-0.872107,-0.063972,-0.049956,-0.04048,-0.075565,-0.171572,0.262457,-0.382084,0.822288,-0.494354,-0.694657,-0.677533,-0.364314,-0.333476,-0.387287,-0.800614,-0.566673,-0.586000,-0.754589,-0.316723,-0.727066,-0.756172,-0.907968,-0.079379,-0.061733,-0.048352,-0.093734,-0.177503,0.272210,0.115539,0.672600,-0.529878,-0.752585,-0.680404,-0.430582,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,True,False,False,True,False


# Tree Algorithm Runs
- Decision Tree Regression
- Gradient Boosting Regression
- Random Forest Boosting Regression
- XGBoosted Regression

In [9]:
# Decision Tree
DTmodel = DecisionTreeRegressor(max_depth=100, random_state=0, criterion='squared_error')
DTmodel.fit(X_train_encoded, Y_train)
DTtrain_mse = mean_squared_error(y_true = Y_train, y_pred = DTmodel.predict(X_train_encoded))
DTval_mse = mean_squared_error(y_true = Y_val, y_pred = DTmodel.predict(X_val_encoded))
print('Decision Tree Train MSE:', DTtrain_mse)
print('Decision Tree Validation MSE:', DTval_mse)

Decision Tree Train MSE: 0.160522325401516
Decision Tree Validation MSE: 8.997440186459333


In [10]:
# Gradient Boosting
GBmodel = GradientBoostingRegressor(max_depth=100, random_state=0, criterion='squared_error')
GBmodel.fit(X_train_encoded, Y_train)
GBtrain_mse = mean_squared_error(y_true = Y_train, y_pred = GBmodel.predict(X_train_encoded))
GBval_mse = mean_squared_error(y_true = Y_val, y_pred = GBmodel.predict(X_val_encoded))
print('Gradient Boosting Train MSE:', GBtrain_mse)
print('Gradient Boosting Validation MSE:', GBval_mse)

  y = column_or_1d(y, warn=True)


Gradient Boosting Train MSE: 0.16052232973499825
Gradient Boosting Validation MSE: 7.985427433881894


In [11]:
# Random Forest Boosting
RFmodel = RandomForestRegressor(max_depth=100, random_state=0, criterion='squared_error')
RFmodel.fit(X_train_encoded, Y_train.values.ravel())
RFtrain_mse = mean_squared_error(y_true = Y_train, y_pred = RFmodel.predict(X_train_encoded))
RFval_mse = mean_squared_error(y_true = Y_val, y_pred = RFmodel.predict(X_val_encoded))
print('Random Forest Train MSE:', RFtrain_mse)
print('Random Forest Validation MSE:', RFval_mse)

Random Forest Train MSE: 0.7839539325030256
Random Forest Validation MSE: 4.562945342550019


In [12]:
# XGBoost Model
xgb_model = xgb.XGBRegressor(eval_metric="rmse", eta = .3, random_state=0)
xgb_model.fit(X_train_encoded, Y_train)
xgbtrain_mse = mean_squared_error(y_true = Y_train, y_pred = xgb_model.predict(X_train_encoded))
xgbval_mse = mean_squared_error(y_true = Y_val, y_pred = xgb_model.predict(X_val_encoded))
print('Extreme Gradient Boosting Train MSE:', xgbtrain_mse)
print('Extreme Gradient Boosting Validation MSE:', xgbval_mse)

  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  elif is_categorical_dtype(dtype) and enable_categorical:
  if is_categorical_dtype(dtype)
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


Extreme Gradient Boosting Train MSE: 2.9762724141484163
Extreme Gradient Boosting Validation MSE: 4.562016953162534


# Making a Baseline

In [13]:
# Creating a baseline with the average value
baseline = Y_train['total_points'].mean()
baseline

1.3593784070590629

In [14]:
true = np.array(Y_train['total_points'])
pred = np.array(pred_vect(Y_train['total_points'],1))

#print(f'RMSE is {round(RMSE(true, pred),3)}')
print(f'MSE is {round(MSE(true, pred),3)}')
print(f'MAE is {round(MAE(true, pred),3)}')

NameError: name 'pred_vect' is not defined