In [None]:
latest_gameweek = 5

In [None]:
import pandas as pd
import numpy as np

import json
import requests
from pathlib import Path
from src.utils import fetch_latest_fpl_data

import catboost

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [None]:
# fpl data
filepath = Path('../data/fpl_df.csv')
fpl_df = pd.read_csv(filepath, index_col=0, low_memory=False)
display(fpl_df.head())
display(fpl_df.shape)

In [None]:
# fpl fixtures
filepath = Path('../data/fixtures/fpl_fixtures.csv')
fixtures_fpl = pd.read_csv(filepath, index_col=0)
display(fixtures_fpl.head())
display(fixtures_fpl.shape)

In [None]:
features_no_shift = ['element_type', 'home']

features_shift = ['corners_and_indirect_freekicks_order', 'creativity_rank', 
       'direct_freekicks_order', 'ict_index_rank', 'influence_rank',
       'minutes', 'now_cost', 'penalties_order', 'points_per_game', 
       'selected_by_percent', 'threat_rank',
       'team_xG_ewm_5', 'team_xG_ewm_10', 'team_xG_ewm_20',
       'team_xG_ewm_40', 'team_xGA_ewm_5', 'team_xGA_ewm_10',
       'team_xGA_ewm_20', 'team_xGA_ewm_40', 
       'opponent_xG_ewm_5', 'opponent_xG_ewm_10',
       'opponent_xG_ewm_20', 'opponent_xG_ewm_40', 'opponent_xGA_ewm_5',
       'opponent_xGA_ewm_10', 'opponent_xGA_ewm_20',
       'opponent_xGA_ewm_40', 
       'gameweek_assists_ewm_5', 'gameweek_bps_ewm_5',
       'gameweek_creativity_ewm_5', 'event_points_ewm_5',
       'gameweek_goals_scored_ewm_5', 'gameweek_goals_conceded_ewm_5',
       'gameweek_saves_ewm_5', 'gameweek_threat_ewm_5',
       'gameweek_xG_ewm_5', 'gameweek_xA_ewm_5', 'gameweek_xGA_ewm_5',
       'gameweek_minutes_ewm_5', 'gameweek_xPoints_ewm_5',
       'gameweek_assists_ewm_10', 'gameweek_bps_ewm_10',
       'gameweek_creativity_ewm_10', 'event_points_ewm_10',
       'gameweek_goals_scored_ewm_10', 'gameweek_goals_conceded_ewm_10',
       'gameweek_saves_ewm_10', 'gameweek_threat_ewm_10',
       'gameweek_xG_ewm_10', 'gameweek_xA_ewm_10', 'gameweek_xGA_ewm_10',
       'gameweek_minutes_ewm_10', 'gameweek_xPoints_ewm_10',
       'gameweek_assists_ewm_20', 'gameweek_bps_ewm_20',
       'gameweek_creativity_ewm_20', 'event_points_ewm_20',
       'gameweek_goals_scored_ewm_20', 'gameweek_goals_conceded_ewm_20',
       'gameweek_saves_ewm_20', 'gameweek_threat_ewm_20',
       'gameweek_xG_ewm_20', 'gameweek_xA_ewm_20', 'gameweek_xGA_ewm_20',
       'gameweek_minutes_ewm_20', 'gameweek_xPoints_ewm_20',
       'gameweek_assists_ewm_40', 'gameweek_bps_ewm_40',
       'gameweek_creativity_ewm_40', 'event_points_ewm_40',
       'gameweek_goals_scored_ewm_40', 'gameweek_goals_conceded_ewm_40',
       'gameweek_saves_ewm_40', 'gameweek_threat_ewm_40',
       'gameweek_xG_ewm_40', 'gameweek_xA_ewm_40', 'gameweek_xGA_ewm_40',
       'gameweek_minutes_ewm_40', 'gameweek_xPoints_ewm_40',
       'gameweek_assists_expanding', 'gameweek_bps_expanding',
       'gameweek_creativity_expanding', 'event_points_expanding',
       'gameweek_goals_scored_expanding',
       'gameweek_goals_conceded_expanding', 'gameweek_saves_expanding',
       'gameweek_threat_expanding', 'gameweek_xG_expanding',
       'gameweek_xA_expanding', 'gameweek_xGA_expanding',
       'gameweek_minutes_expanding', 'gameweek_xPoints_expanding',
       'gameweek_assists_expanding_per90', 'gameweek_bps_expanding_per90',
       'gameweek_creativity_expanding_per90',
       'event_points_expanding_per90',
       'gameweek_goals_scored_expanding_per90',
       'gameweek_goals_conceded_expanding_per90',
       'gameweek_saves_expanding_per90',
       'gameweek_threat_expanding_per90', 'gameweek_xG_expanding_per90',
       'gameweek_xA_expanding_per90', 'gameweek_xGA_expanding_per90',
       'gameweek_xPoints_expanding_per90', 'xG_overperformance'
    ]

features = features_no_shift + features_shift

In [None]:
df = fpl_df.loc[fpl_df.season=='23-24'].groupby(['name'])[features + ['team_name']].last().reset_index()
display(df)

In [None]:
path = Path('../data/team_data.csv')
team_data = pd.read_csv(path, index_col=0)
# get latest moving average info for each team
team_data = team_data.groupby('value').last()
ewm_cols = [col for col in team_data.columns if 'ewm' in col]
team_data = team_data[ewm_cols]
# change col names to have 'opponent' in front
new_cols = ['opponent_' + col for col in ewm_cols]
team_data.columns = new_cols
team_data = team_data.reset_index()
display(team_data)

In [None]:
# get prediction data by adding rows for each future game for each player and getting the right opponent data
prediction_data = []
first_gameweek = latest_gameweek + 1
last_gameweek = np.min((latest_gameweek + 10, 38))
for ix, row in df.iterrows():
    my_team = row['team_name']
    for gameweek in range(first_gameweek,last_gameweek+1):
        
        opponent_data = []
        opponent_names = []
        home_game = []
        date_data = []
        gameweek_data = []

        # home games
        home_games = fixtures_fpl[(fixtures_fpl.event==gameweek) & (fixtures_fpl.home_team==my_team)]
        for ix2, row2 in home_games.iterrows():
            opponent_name = row2['away_team']
            # get opponents xg data            
            opponent_data.append( team_data.loc[team_data.value==opponent_name, new_cols] )
            # record opponent name
            opponent_names.append( opponent_name )
            # record whether home game
            home_game.append( 1 )
            # record date of game
            date_data.append( row2['kickoff_time'] )
            # record gameweek
            gameweek_data.append( gameweek )
        
        # away games
        away_games = fixtures_fpl[(fixtures_fpl.event==gameweek) & (fixtures_fpl.away_team==my_team)]
        for ix2, row2 in away_games.iterrows():
            opponent_name = row2['home_team']
            opponent_data.append( team_data.loc[team_data.value==opponent_name, new_cols] )            
            opponent_names.append( opponent_name )
            home_game.append( 0 )
            date_data.append( row2['kickoff_time'] )
            gameweek_data.append( gameweek )

        # create duplicate rows of the target player for each game and replace opponent data with correct info
        for i in range(0,len(opponent_data)):
            copy_of_row = row.copy()
            copy_of_row[new_cols] = opponent_data[i].squeeze()
            copy_of_row['opponent_team'] = opponent_names[i]
            copy_of_row['home'] = home_game[i]
            copy_of_row['date'] = date_data[i]
            copy_of_row['gameweek'] = gameweek_data[i]
            prediction_data.append( copy_of_row )

prediction_df = pd.DataFrame(prediction_data).reset_index(drop=True)
display(prediction_df.head())
display(prediction_df.shape)

In [None]:
# load prediction model
model = catboost.CatBoostRegressor()
path = Path('../models/catboost_20230809-201635.cbm')
model.load_model(path)

In [None]:
# make projections
X = prediction_df[features]
prediction_df['expected_points'] = model.predict(X)

In [None]:
prediction_df.loc[prediction_df.name.str.contains('Haaland'), ['name', 'team_name', 'opponent_team', 'home', 'date', 'expected_points']]

In [None]:
path = Path(f'../data/predictions/gameweek{latest_gameweek}.csv')
prediction_df.to_csv(path)