In [130]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Evaluate model predictions
### Select best predicted squad and compare gained points with average points from specific gameweek

In [132]:
import pandas as pd
import tensorflow as tf

from src.data.data_loader import load_average_pts
from src.features.data_engineering import preprocess_merged_seasons
from src.features.data_engineering import reverse_processing

from src.modeling.train_model import train_mlp_model
from src.modeling.predictions import merge_reversed_data_with_predictions

In [42]:
average_pts = load_average_pts()

In [43]:
average_pts.head()

Unnamed: 0,GW,AVG_PTS_2016/17,AVG_PTS_2018/19,AVG_PTS_2021/22
0,1,44,53,69
1,2,56,59,56
2,3,40,48,54
3,4,41,43,57
4,5,47,46,55


## Load specific test gameweek data
**NOTE:** To properly evaluate model predictions comparing it with average points from specific gameweek, remember to leave performance sample of every player from specific gameweek in test dataset

In [44]:
test_subset = (['2016-17', [35,36,37]], ['2021-22', [27,28,29]])

(x_train, y_train), (x_test, y_test), (x_train_target, x_test_target), x_scaler = preprocess_merged_seasons(random_split=False, test_subset=test_subset)

In [45]:
# preview unique values in 'season' and 'GW' columns from x_test_target
print(x_test_target.season.unique())
print(x_test_target.GW.unique())

['2016-17' '2021-22']
[35 36 37 27 28 29]


## Train MLP model

In [46]:
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Dense(units=128, activation='relu', input_shape=(x_train.shape[1],)))
model.add(tf.keras.layers.Dense(units=256, activation='relu'))
model.add(tf.keras.layers.Dense(units=256, activation='relu'))
model.add(tf.keras.layers.Dense(units=126, activation='relu'))
model.add(tf.keras.layers.Dense(units=64, activation='relu'))
model.add(tf.keras.layers.Dense(units=1, activation='linear'))

In [47]:
model = train_mlp_model(model, x_train, y_train)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [49]:
import os
root_dir = os.path.dirname(os.path.abspath(''))

In [50]:
model.save(root_dir + '\\models\\baseline\\mlp_model_06-07-22.h5')

## Make predictions on specific test subset

In [51]:
model = tf.keras.models.load_model(root_dir + '\\models\\baseline\\mlp_model_06-07-22.h5')

In [52]:
x_test_reversed = reverse_processing(x_test, x_scaler, x_test_target)

In [53]:
predictions_merged = merge_reversed_data_with_predictions(model, x_test, y_test, x_test_reversed)

### Evaluate model predictions naively for gameweek 27 in 2021-22 season
At this moment, I do not take price constraints into account.

In [113]:
# get data from predictions_merged only for 2021-22 season and gameweek 27
predictions_merged_27 = predictions_merged[(predictions_merged.season == '2021-22') & (predictions_merged.GW == 27)]
# sort predictions_merged_27 by predicted points in descending order
predictions_merged_27 = predictions_merged_27.sort_values(by='predicted_total_points_next_gameweek', ascending=False)

In [129]:
# get first row from predictions_merged_27 and double 'total_points_next_gameweek' value, because this player would be chosen as a capitan
predictions_merged_27.iloc[0, predictions_merged_27.columns.get_loc('total_points_next_gameweek')] *= 2

In [124]:
# get goalkeepers from predictions_merged_27 (with 1 in 'position_GK' column)
df_gk_27 = predictions_merged_27[predictions_merged_27.position_GK == 1]
df_def_27 = predictions_merged_27[predictions_merged_27.position_DEF == 1]
df_mid_27 = predictions_merged_27[predictions_merged_27.position_MID == 1]
df_fwd_27 = predictions_merged_27[predictions_merged_27.position_FWD == 1]

In [125]:
# get one top row from df_gk, four top from df_def, four top from df_mid, two top from df_fwd and concatenate them into one dataframe
df_top_11_27 = pd.concat([df_gk_27.head(1), df_def_27.head(4), df_mid_27.head(4), df_fwd_27.head(2)])

In [126]:
# get 'name', 'total_points_next_gameweek', 'transfers_balance', 'value' columns from df_top_11
df_top_11_27[['name', 'total_points_next_gameweek', 'transfers_balance', 'value']]

Unnamed: 0,name,total_points_next_gameweek,transfers_balance,value
131336,Nick Pope,2.0,105427.0,54.0
131050,JoÃ£o Pedro Cavaco Cancelo,2.0,35950.0,71.0
130820,RÃºben Santos Gato Alves Dias,0.0,45820.0,63.0
131096,Aymeric Laporte,2.0,10139.0,58.0
130904,Ben Mee,1.0,315689.0,48.0
131057,Kevin De Bruyne,36.0,-24780.0,119.0
131119,Heung-Min Son,7.0,281052.0,108.0
130995,Bruno Miguel Borges Fernandes,2.0,135480.0,117.0
130913,Raheem Sterling,0.0,55949.0,107.0
131348,Harry Kane,13.0,156443.0,122.0


In [127]:
df_top_11_27.total_points_next_gameweek.sum()

65.0

In [128]:
# get number of avg points from avg_pts where season is 2021-22 and gameweek is 27

average_pts.loc[average_pts['GW'] == 27, ['AVG_PTS_2021/22']].values[0][0]

40