In [None]:
import pandas as pd
import numpy as np

import json
import requests
from pathlib import Path

import catboost

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [None]:
# offline FPL data from previous seasons
filepath = Path('../../data/modeling/fpl_df.csv')
fpl_offline_df = pd.read_csv(filepath, index_col=0)
display(fpl_offline_df.head())
display(fpl_offline_df.shape)

In [None]:
teams = ['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton',
         'Burnley', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham',
         'Liverpool', 'Luton', 'Manchester City', 'Manchester Utd',
         'Newcastle Utd', 'Nottingham Forest', 'Sheffield Utd', 'Tottenham',
         'West Ham', 'Wolves']

In [None]:
# fetch FPL data online
fpl_online_data = json.loads(requests.get('https://fantasy.premierleague.com/api/bootstrap-static/').text)
fpl_online_df = pd.DataFrame(fpl_online_data['elements'])
fpl_online_df['team_name'] = [teams[i] for i in fpl_online_df['team']-1]
fpl_online_df

In [None]:
# get FPL fixtures data
fpl_fixtures_data = json.loads(requests.get('https://fantasy.premierleague.com/api/fixtures/').text)
fpl_fixtures = pd.DataFrame(fpl_fixtures_data)
fpl_fixtures['home_team'] = [teams[i] for i in fpl_fixtures['team_h']-1]
fpl_fixtures['away_team'] = [teams[i] for i in fpl_fixtures['team_a']-1]
fpl_fixtures

In [None]:
features_no_shift = ['element_type', 'home']

features_shift = ['corners_and_indirect_freekicks_order', 'creativity_rank', 
       'direct_freekicks_order', 'ict_index_rank', 'influence_rank',
       'minutes', 'now_cost', 'penalties_order', 'points_per_game', 
       'selected_by_percent', 'threat_rank',
       'team_xG_ewm_5', 'team_xG_ewm_10', 'team_xG_ewm_20',
       'team_xG_ewm_40', 'team_xGA_ewm_5', 'team_xGA_ewm_10',
       'team_xGA_ewm_20', 'team_xGA_ewm_40', 
       'opponent_xG_ewm_5', 'opponent_xG_ewm_10',
       'opponent_xG_ewm_20', 'opponent_xG_ewm_40', 'opponent_xGA_ewm_5',
       'opponent_xGA_ewm_10', 'opponent_xGA_ewm_20',
       'opponent_xGA_ewm_40', 
       'gameweek_assists_ewm_5', 'gameweek_bps_ewm_5',
       'gameweek_creativity_ewm_5', 'event_points_ewm_5',
       'gameweek_goals_scored_ewm_5', 'gameweek_goals_conceded_ewm_5',
       'gameweek_saves_ewm_5', 'gameweek_threat_ewm_5',
       'gameweek_xG_ewm_5', 'gameweek_xA_ewm_5', 'gameweek_xGA_ewm_5',
       'gameweek_minutes_ewm_5', 'gameweek_xPoints_ewm_5',
       'gameweek_assists_ewm_10', 'gameweek_bps_ewm_10',
       'gameweek_creativity_ewm_10', 'event_points_ewm_10',
       'gameweek_goals_scored_ewm_10', 'gameweek_goals_conceded_ewm_10',
       'gameweek_saves_ewm_10', 'gameweek_threat_ewm_10',
       'gameweek_xG_ewm_10', 'gameweek_xA_ewm_10', 'gameweek_xGA_ewm_10',
       'gameweek_minutes_ewm_10', 'gameweek_xPoints_ewm_10',
       'gameweek_assists_ewm_20', 'gameweek_bps_ewm_20',
       'gameweek_creativity_ewm_20', 'event_points_ewm_20',
       'gameweek_goals_scored_ewm_20', 'gameweek_goals_conceded_ewm_20',
       'gameweek_saves_ewm_20', 'gameweek_threat_ewm_20',
       'gameweek_xG_ewm_20', 'gameweek_xA_ewm_20', 'gameweek_xGA_ewm_20',
       'gameweek_minutes_ewm_20', 'gameweek_xPoints_ewm_20',
       'gameweek_assists_ewm_40', 'gameweek_bps_ewm_40',
       'gameweek_creativity_ewm_40', 'event_points_ewm_40',
       'gameweek_goals_scored_ewm_40', 'gameweek_goals_conceded_ewm_40',
       'gameweek_saves_ewm_40', 'gameweek_threat_ewm_40',
       'gameweek_xG_ewm_40', 'gameweek_xA_ewm_40', 'gameweek_xGA_ewm_40',
       'gameweek_minutes_ewm_40', 'gameweek_xPoints_ewm_40',
       'gameweek_assists_expanding', 'gameweek_bps_expanding',
       'gameweek_creativity_expanding', 'event_points_expanding',
       'gameweek_goals_scored_expanding',
       'gameweek_goals_conceded_expanding', 'gameweek_saves_expanding',
       'gameweek_threat_expanding', 'gameweek_xG_expanding',
       'gameweek_xA_expanding', 'gameweek_xGA_expanding',
       'gameweek_minutes_expanding', 'gameweek_xPoints_expanding',
       'gameweek_assists_expanding_per90', 'gameweek_bps_expanding_per90',
       'gameweek_creativity_expanding_per90',
       'event_points_expanding_per90',
       'gameweek_goals_scored_expanding_per90',
       'gameweek_goals_conceded_expanding_per90',
       'gameweek_saves_expanding_per90',
       'gameweek_threat_expanding_per90', 'gameweek_xG_expanding_per90',
       'gameweek_xA_expanding_per90', 'gameweek_xGA_expanding_per90',
       'gameweek_xPoints_expanding_per90', 'xG_overperformance'
    ]

features = features_no_shift + features_shift

In [None]:
# list of players who appear both on old and new data
#player_list_online = fpl_online_df['web_name'].unique()
#player_list_offline = fpl_offline_df['web_name'].unique()
#player_list = list((set(player_list_online).intersection(set(player_list_offline))))
#len(player_list)

In [None]:
df = fpl_online_df.copy()
display(df.head())
display(df.shape)

In [None]:
# get necessary features not in df yet
extra_features = list(set(features).difference(set(df.columns)))
extra_data = fpl_offline_df.groupby('web_name').last()[extra_features]
df = df.join(extra_data, on='web_name', how='left')
display(df.head())
display(df.shape)

# get necessary features not in df yet
#extra_features = list(set(features).difference(set(df.columns)))
#extra_data = fpl_offline_df[fpl_offline_df.web_name.isin(player_list)].groupby('web_name').last()[extra_features]
#df = df.join(extra_data, on='web_name')
#display(df.head())
#display(df.shape)

In [None]:
path = Path('../../data/modeling/team_data.csv')
team_data = pd.read_csv(path, index_col=0)
# get latest moving average info for each team
team_data = team_data.groupby('value').last()
ewm_cols = [col for col in team_data.columns if 'ewm' in col]
team_data = team_data[ewm_cols]
# change col names to have 'opponent' in front
new_cols = ['opponent_' + col for col in ewm_cols]
team_data.columns = new_cols
team_data = team_data.reset_index()
# add data for luton
new_row = pd.DataFrame(np.array(['Luton', 0.8, 2.3, 0.8, 2.3, 0.8, 2.3, 0.8, 2.3]).reshape(1,9), columns=team_data.columns)
team_data = pd.concat([team_data, new_row], ignore_index=True)
display(team_data)


In [None]:
# get prediction data by adding rows for each future game for each player and getting the right opponent data
prediction_data = []
first_gameweek = 1
last_gameweek = 10
for ix, row in df.iterrows():
    my_team = row['team_name']
    for gameweek in range(first_gameweek,last_gameweek+1):
        
        opponent_data = []
        opponent_names = []
        home_game = []
        date_data = []

        # home games
        home_games = fpl_fixtures[(fpl_fixtures.event==gameweek) & (fpl_fixtures.home_team==my_team)]
        for ix2, row2 in home_games.iterrows():
            opponent_name = row2['away_team']
            # get opponents xg data            
            opponent_data.append( team_data.loc[team_data.value==opponent_name, new_cols] )
            # record opponent name
            opponent_names.append( opponent_name )
            # record whether home game
            home_game.append( 1 )
            # record date of game
            date_data.append( row2['kickoff_time'] )
        
        # away games
        away_games = fpl_fixtures[(fpl_fixtures.event==gameweek) & (fpl_fixtures.away_team==my_team)]
        for ix2, row2 in away_games.iterrows():
            opponent_name = row2['home_team']
            opponent_data.append( team_data.loc[team_data.value==opponent_name, new_cols] )            
            opponent_names.append( opponent_name )
            home_game.append( 0 )
            date_data.append( row2['kickoff_time'] )

        # create duplicate rows of the target player for each game and replace opponent data with correct info
        copy_of_row = row.copy()
        for i in range(0,len(opponent_data)):
            copy_of_row[new_cols] = opponent_data[i].squeeze()
            copy_of_row['opponent_team'] = opponent_names[i]
            copy_of_row['home'] = home_game[i]
            copy_of_row['date'] = date_data[i]
            prediction_data.append( copy_of_row )

prediction_df = pd.DataFrame(prediction_data).reset_index(drop=True)
display(prediction_df.head())
display(prediction_df.shape)

In [None]:
# load prediction model
model = catboost.CatBoostRegressor()
path = Path('../../models/catboost_20230809-201635.cbm')
model.load_model(path)

In [None]:
# make projections
X = prediction_df[features]
prediction_df['expected_points'] = model.predict(X)

In [None]:
prediction_df.loc[prediction_df.web_name=='Rashford', ['web_name', 'team_name', 'opponent_team', 'home', 'date', 'expected_points']]

In [None]:
path = Path('../../data/predictions/gameweek0.csv')
prediction_df.to_csv(path)