In [245]:
import nbformat
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold

In [246]:
def kfold_cross_validation(model, splits, X, y):
    mae = []
    mse = []
    r2 = []
    rms = []

    kf = KFold(5, shuffle=True, random_state=42)
    for train_ind, test_ind in kf.split(X):
        X_train, Y_train = X.iloc[train_ind], y.iloc[train_ind]
        X_test, Y_test = X.iloc[test_ind], y.iloc[test_ind]

        model.fit(X_train, Y_train)

        y_pred = model.predict(X_test)

            # Calculate metrics
        mae_r = mean_absolute_error(Y_test, y_pred)
        mse_r = mean_squared_error(Y_test, y_pred)
        rmse_r = np.sqrt(mse)
        r2_r = r2_score(Y_test, y_pred)

        # Output metrics
        mae.append(mae_r)
        mse.append(mse_r)
        rms.append(rmse_r)
        r2.append(r2_r)

    print("MAE = ", mae)
    print("MSE = ", mse)
    print("rms = ", rms)
    print("r2 = ", r2)


In [247]:
%run data_download_from_fpl.ipynb

{ 'chips': [...],
  'element_stats': [...],
  'element_types': [...],
  'elements': [...],
  'events': [...],
  'game_config': {...},
  'game_settings': {...},
  'phases': [...],
  'teams': [...],
  'total_players': 11108257}
{'fixtures': [...], 'history': [...], 'history_past': [...]}


100%|██████████| 709/709 [00:43<00:00, 16.29it/s]


      id_player      web_name  element  fixture  opponent_team  total_points  \
11            1  Fábio Vieira        1      111             16             0   
12            1  Fábio Vieira        1      129             19             0   
3             1  Fábio Vieira        1       39             18             0   
18            1  Fábio Vieira        1      182              4             0   
4             1  Fábio Vieira        1       47             13             0   
...         ...           ...      ...      ...            ...           ...   
1995        707   Rees-Dottin      707      185              9             0   
7486        708  Wilson-Brown      708      187             13             0   
7487        708  Wilson-Brown      708      191              2             0   
8076        709         Danns      709      190             19             0   
8077        709         Danns      709      196             14             0   

      was_home          kickoff_time  t

In [248]:
future_data_2 = future_data
past_data_2 = past_data

In [249]:
X = past_data_2.drop(columns = ['total_points'])
y = past_data_2[['total_points']]

In [250]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [251]:
model = XGBRegressor()

In [252]:
kfold_cross_validation(model, 5, X, y)

MAE =  [0.34246283769607544, 0.3441910445690155, 0.33510273694992065, 0.3427409529685974, 0.3529222905635834]
MSE =  [0.5680287480354309, 0.5409798622131348, 0.5327792763710022, 0.5448369979858398, 0.5612797737121582]
rms =  [array([], dtype=float64), array([0.75367682]), array([0.75367682, 0.73551333]), array([0.75367682, 0.73551333, 0.72991731]), array([0.75367682, 0.73551333, 0.72991731, 0.73813075])]
r2 =  [0.8963351249694824, 0.9111698865890503, 0.8912090063095093, 0.9011116027832031, 0.8971577286720276]


In [253]:
future_data_2['is_home'] = future_data_2['is_home'].astype('bool')

future_data_2['id_player'] = future_data_2['id_player'].astype('int64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  future_data_2['is_home'] = future_data_2['is_home'].astype('bool')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  future_data_2['id_player'] = future_data_2['id_player'].astype('int64')


In [254]:
y_future = model.predict(future_data_2.drop(columns = ['total_points', 'event']))

In [255]:
future_points = pd.DataFrame(y_future, columns=(['predicted_points']))

In [256]:
future_data_2 = future_data_2.merge(future_points, left_index=True, right_index=True)
future_data_2.head()

Unnamed: 0,id_player,opponent_team,is_home,cumulative_points,bps,total_points,event,predicted_points
0,1,18,True,0,0,0,21,0.032809
1,1,2,True,0,0,0,22,0.020634
2,1,20,False,0,0,0,23,0.002672
3,1,13,True,0,0,0,24,-0.022418
4,1,11,False,0,0,0,25,-0.020514


In [257]:
# base url for all FPL API endpoints
base_url = 'https://fantasy.premierleague.com/api/'

# get data from bootstrap-static endpoint
r = requests.get(base_url+'bootstrap-static/').json()

# show the top level fields
pprint(r, indent=2, depth=1, compact=True)

{ 'chips': [...],
  'element_stats': [...],
  'element_types': [...],
  'elements': [...],
  'events': [...],
  'game_config': {...},
  'game_settings': {...},
  'phases': [...],
  'teams': [...],
  'total_players': 11108338}


In [258]:
pd.set_option('display.max_columns', None)
# create players dataframe
players = pd.json_normalize(r['elements'])

In [259]:
future_data_2 = future_data_2.merge(players[['id', 'first_name', 'second_name']], left_on= 'id_player', right_on = 'id' )

In [260]:
future_data_2[['id_player', 'first_name', 'second_name', 'opponent_team', 'is_home', 'cumulative_points','bps', 'total_points', 'event', 'predicted_points']]

Unnamed: 0,id_player,first_name,second_name,opponent_team,is_home,cumulative_points,bps,total_points,event,predicted_points
0,1,Fábio,Ferreira Vieira,18,True,0,0,0,21,0.032809
1,1,Fábio,Ferreira Vieira,2,True,0,0,0,22,0.020634
2,1,Fábio,Ferreira Vieira,20,False,0,0,0,23,0.002672
3,1,Fábio,Ferreira Vieira,13,True,0,0,0,24,-0.022418
4,1,Fábio,Ferreira Vieira,11,False,0,0,0,25,-0.020514
...,...,...,...,...,...,...,...,...,...,...
13006,708,Thomas,Wilson-Brown,20,False,0,0,0,34,-0.008219
13007,708,Thomas,Wilson-Brown,17,True,0,0,0,35,-0.008060
13008,708,Thomas,Wilson-Brown,16,False,0,0,0,36,-0.009929
13009,708,Thomas,Wilson-Brown,10,True,0,0,0,37,0.005951


something happening with the event column