In [1]:
import os
import pandas as pd
import joblib
import numpy as np

# Load samples

In [2]:
samples_df = pd.read_csv(os.path.join('data', 'samples.csv'))
display(samples_df)

Unnamed: 0,season,gw,position,player,team,opponent,home,player fpl points 1,player fpl points 3,player fpl points 5,...,opponent ppda att 10,opponent ppda att 38,opponent ppda def 1,opponent ppda def 3,opponent ppda def 5,opponent ppda def 10,opponent ppda def 38,status player availability,status team league rank,status opponent league rank
0,2024-25,38,GK,Alisson Ramses Becker,Liverpool,Crystal Palace,True,4.0,2.666667,3.2,...,299.9,291.894737,21.0,20.0,18.4,18.6,22.394737,1.0,,
1,2024-25,38,DEF,Dean Huijsen,Bournemouth,Leicester,True,0.0,3.0,3.6,...,290.6,278.473684,28.0,20.333333,23.2,21.8,20.763158,1.0,,
2,2024-25,38,MID,Mitoma Kaoru,Brighton,Tottenham,False,6.0,2.333333,3.8,...,232.0,203.131579,12.0,20.0,18.8,22.1,22.210526,1.0,,
3,2024-25,38,FWD,Jean-Philippe Mateta,Crystal Palace,Liverpool,False,1.0,1.333333,2.8,...,200.8,217.815789,18.0,18.0,20.2,24.3,24.526316,1.0,,
4,2024-25,38,AM,Mikel Arteta,Arsenal,Southampton,False,9.0,5.0,6.4,...,270.9,250.342105,14.0,13.666667,15.0,18.4,20.447368,,2.0,20.0


# Load OpenFPL models, scalers and features

In [3]:
# Models
num_cvs = 5
positions = ['GK', 'DEF', 'MID', 'FWD', 'AM']
models = {cv: {position: [] for position in positions} for cv in range(1, num_cvs+1)}
for cv in models.keys():
    for position in models[cv].keys():
        search_dir = os.path.join('models', f'cv{cv}_{position}')
        search_file = open(os.path.join(search_dir, 'search.txt'), 'r')
        search_log = search_file.read()
        search_top_candidates = [x.split(' ')[0] for x in search_log.split('The population is:')[-1].split('Candidate ')[1:]]
        search_file.close()
        for candidate_num in search_top_candidates:
            candidate_dir = os.path.join(search_dir, candidate_num)
            candidate_file = os.listdir(candidate_dir)[0]
            model = joblib.load(os.path.join(candidate_dir, candidate_file))
            models[cv][position].append(model)
       
# Scalers
xscaler = joblib.load(os.path.join('models', 'xscaler.save'))
xscaler_features = xscaler.feature_names_in_
yscaler = joblib.load(os.path.join('models', 'yscaler.save'))

# Features
features = joblib.load(os.path.join('models', 'features.save'))

configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.

  setstate(state)
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


# Predict with OpenFPL

In [4]:
metadata = ['season', 'gw', 'position', 'player', 'team', 'opponent', 'home']
predictions_df = pd.DataFrame(columns=metadata+['prediction'])
for position in positions:
    
    # Preprocess
    position_samples_df = samples_df[samples_df['position'] == position]
    position_data = position_samples_df[xscaler_features].to_numpy()    
    position_data_scaled = np.nan_to_num(xscaler.transform(np.nan_to_num(position_data).astype('float32'))).astype('float32')
    position_feature_indices = [xscaler_features.index(feature) for feature in features[position]]
    position_data_scaled = position_data_scaled[:, position_feature_indices]
    
    # Predict
    position_predictions = []
    for cv in models.keys():
        for model in models[cv][position]:
            model_predictions = model.predict(position_data_scaled)
            model_predictions = yscaler.inverse_transform(model_predictions.reshape(model_predictions.shape[0], 1)).reshape(model_predictions.shape[0])
            position_predictions.append(model_predictions) 
    position_ensemble_predictions = np.median(position_predictions, axis=0)
    position_predictions_df = position_samples_df[metadata]
    position_predictions_df['prediction'] = position_ensemble_predictions
    predictions_df = pd.concat([predictions_df, position_predictions_df], ignore_index=True)

display(predictions_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  position_predictions_df['prediction'] = position_ensemble_predictions
  predictions_df = pd.concat([predictions_df, position_predictions_df], ignore_index=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  position_predictions_df['prediction'] = position_ensemble_predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returni

Unnamed: 0,season,gw,position,player,team,opponent,home,prediction
0,2024-25,38,GK,Alisson Ramses Becker,Liverpool,Crystal Palace,True,3.274773
1,2024-25,38,DEF,Dean Huijsen,Bournemouth,Leicester,True,3.522795
2,2024-25,38,MID,Mitoma Kaoru,Brighton,Tottenham,False,4.29901
3,2024-25,38,FWD,Jean-Philippe Mateta,Crystal Palace,Liverpool,False,2.277508
4,2024-25,38,AM,Mikel Arteta,Arsenal,Southampton,False,7.695012


# Store OpenFPL predictions

In [5]:
predictions_df.to_csv(os.path.join('data', 'predictions.csv'), index=False)