In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from src.data.data_loader import load_master_team_list
from src.data.data_loader import load_understat_team_stats
from src.features.data_engineering import preprocess_seasons_data, reverse_processing, get_oponent_team_stats, get_merged_seasons_data
from src.features.utils import idx_to_team_name, str_date_days_forward

import xgboost as xgb

In [2]:
rolling_columns = ['assists', 'bonus', 'bps', 'clean_sheets',
                   'creativity', 'goals_conceded', 'goals_scored',
                   'ict_index', 'influence', 'minutes',
                   'saves', 'selected', 'player_team_score', 'opponent_team_score', 'threat',
                   'total_points', 'transfers_in', 'transfers_out',
                   'value', 'yellow_cards']

times = ['all', 6, 3]

test_subset_XL = (['2016-17', [4, 8, 9, 14, 18, 20, 21, 26, 32]], ['2018-19', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], ['2021-22', [17, 18, 19, 20, 21, 22, 23, 24, 25, 26]])

In [3]:
from src.features.data_engineering import get_merged_seasons_data

drop_features = ['own_goals', 'penalties_missed', 'penalties_saved', 'red_cards']
data = get_merged_seasons_data()
data = data.drop(drop_features, axis=1)

In [4]:
(x_train, y_train), (x_test, y_test), (x_train_target, x_test_target), x_scaler = preprocess_seasons_data(data, random_split=False, test_subset=test_subset_XL, rolling_features=False, rolling_columns=rolling_columns, rolling_times=times, opponent_team_stats=False)

In [5]:
model_xgb = xgb.XGBRegressor()

In [6]:
model_xgb.fit(x_train, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
             missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, ...)

In [7]:
x_test_reversed = reverse_processing(x_test, x_scaler, x_test_target)

In [8]:
# create series to keep indexes and name same as in the original y data
y_pred_xgb = pd.Series(model_xgb.predict(x_test), index=y_test.index,
                        name='predicted_total_points_next_gameweek')

In [9]:
predictions_xgb = pd.concat([y_pred_xgb, x_test_reversed], axis=1)
#preview predictions_xgb sorted by predicted total points descending
predictions_xgb.sort_values(by='predicted_total_points_next_gameweek', ascending=False).head(10)

Unnamed: 0,predicted_total_points_next_gameweek,name,GW,element,value,total_points_next_gameweek,season,assists,bonus,bps,...,saves,selected,player_team_score,opponent_team_score,threat,total_points,transfers_in,transfers_out,yellow_cards,position
130595,13.344357,Mohamed Salah,26,233,131,3.0,2021-22,1.0,2.0,54.0,...,0.0,5419768.0,6.0,0.0,89.0,18.0,1263924.0,7849.0,0.0,MID
125207,12.045697,Mohamed Salah,18,233,131,0.0,2021-22,0.0,0.0,8.0,...,0.0,6483921.0,2.0,2.0,40.0,2.0,29513.0,15369.0,0.0,MID
15488,10.955445,Harry_Kane,26,403,112,13.0,2016-17,1.0,3.0,94.0,...,0.0,890180.0,4.0,0.0,50.0,20.0,96719.0,42698.0,0.0,FWD
130251,10.754548,JoÃ£o Pedro Cavaco Cancelo,26,256,72,9.0,2021-22,0.0,0.0,20.0,...,0.0,3845997.0,2.0,3.0,31.0,1.0,25871.0,199834.0,0.0,DEF
128562,10.50931,Bruno Miguel Borges Fernandes,24,277,117,2.0,2021-22,0.0,0.0,20.0,...,0.0,1805775.0,1.0,1.0,53.0,2.0,132610.0,153784.0,0.0,MID
10181,10.368421,Zlatan_Ibrahimovic,18,272,115,5.0,2016-17,2.0,3.0,55.0,...,0.0,1744404.0,3.0,1.0,65.0,15.0,544685.0,2906.0,0.0,FWD
126629,10.336879,Mohamed Salah,21,233,129,0.0,2021-22,0.0,0.0,25.0,...,0.0,5447666.0,2.0,2.0,65.0,7.0,52331.0,311864.0,0.0,MID
50234,10.295902,Paul_Pogba_302,8,302,81,2.0,2018-19,1.0,0.0,26.0,...,0.0,845475.0,3.0,2.0,75.0,4.0,16808.0,112543.0,1.0,MID
4122,9.428675,Kevin_De Bruyne,8,235,106,1.0,2016-17,0.0,0.0,6.0,...,0.0,329474.0,1.0,1.0,75.0,0.0,17492.0,99251.0,0.0,MID
4269,8.845166,Callum_Wilson,8,49,64,2.0,2016-17,1.0,0.0,35.0,...,0.0,106209.0,6.0,1.0,55.0,9.0,35553.0,3431.0,0.0,FWD
