In [None]:
latest_gameweek = 2

In [None]:
import pandas as pd
import numpy as np

from pathlib import Path
import os
import datetime as dt
import pickle
import json
import requests
from src.utils import fetch_latest_fpl_data

import matplotlib.pyplot as plt
import plotly.express as px

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 100)

# Fetch new data

In [None]:
# teams for season 23-24
teams = ['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton',
         'Burnley', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham',
         'Liverpool', 'Luton', 'Manchester City', 'Manchester Utd',
         'Newcastle Utd', 'Nottingham Forest', 'Sheffield Utd', 'Tottenham',
         'West Ham', 'Wolves']

## FPL player data

In [None]:
# fetch FPL data online
fpl_online_data = json.loads(requests.get('https://fantasy.premierleague.com/api/bootstrap-static/').text)
fpl_online_df = pd.DataFrame(fpl_online_data['elements'])
fpl_online_df['team_name'] = [teams[i] for i in fpl_online_df['team']-1]
fpl_online_df['name'] = fpl_online_df.apply(lambda x: x['first_name'] + ' ' + x['second_name'], axis=1)
fpl_online_df

In [None]:
# CREATE NEW DATA SET IF THERE IS NEW DATA AVAILABLE AND SAVE TO FILE

# fetch latest fpl data from data folder
#folder_path_str = '../data/fpl/'
#folder_path = Path(folder_path_str)
#files = os.listdir(folder_path)
# drop non-csv files (e.g. DS_Store)
#files = [file for file in files if file.endswith('.csv')]
# sort files and pick last one
#files = np.sort(files)
#file = files[-1]
#full_path = folder_path_str + file
#old_data = pd.read_csv(full_path, index_col=0)

old_data = fetch_latest_fpl_data()

# only take players who have played, i.e., minutes>0
new_data = fpl_online_df[fpl_online_df.minutes>0].copy()
# players who have now played but had not previously played at all
new_data_1 = new_data[~new_data.name.isin(old_data.name.unique())].copy()
# players whose minutes are higher now than previously
aux = new_data[new_data.name.isin(old_data.name.unique())].copy()
new_rows = []
for ix, row in aux.iterrows():
    player_name = row['name']
    change_in_minutes = row['minutes'] - old_data.loc[old_data.name==player_name, 'minutes'].iloc[-1]
    if change_in_minutes > 0:
        new_rows.append(row)
if len(new_rows) > 0:
    new_data_2 = pd.DataFrame(new_rows)
else:
    new_data_2 = pd.DataFrame() # empty df

# overwrites old new_data variable
new_data = pd.concat([new_data_1, new_data_2], ignore_index=True)
display(new_data)

In [None]:
# create new data set combining old and new data and save to file
if new_data.shape[0] > 0:

    # add info
    new_data['gameweek'] = latest_gameweek
    new_data['season'] = '23-24'
    time_now = dt.datetime.now()
    new_data['data_retrieved_datetime'] = time_now
    display(new_data)

    full_data = pd.concat([old_data, new_data], ignore_index=True)
    print(f'Full data shape: {full_data.shape}')
    
    # save new full data
    path = Path('../data/fpl/data_' + str(time_now.strftime("%Y%m%d-%H%M%S")) + '.csv')
    full_data.to_csv(path)

## FPL fixtures data

In [None]:
# get FPL fixtures data
fpl_fixtures_data = json.loads(requests.get('https://fantasy.premierleague.com/api/fixtures/').text)
fpl_fixtures = pd.DataFrame(fpl_fixtures_data)
fpl_fixtures['home_team'] = [teams[i] for i in fpl_fixtures['team_h']-1]
fpl_fixtures['away_team'] = [teams[i] for i in fpl_fixtures['team_a']-1]
fpl_fixtures

In [None]:
filepath = Path('../data/fixtures/fpl_fixtures.csv')
fpl_fixtures.to_csv(filepath)

## FBRef fixtures

In [None]:
data = pd.read_html('https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures')
fbref_fixtures = data[0]
fbref_fixtures = fbref_fixtures[fbref_fixtures['xG'].notnull()]
fbref_fixtures = fbref_fixtures.rename(columns={'xG':'xG_home', 'xG.1':'xG_away'})
display(fbref_fixtures)

In [None]:
filepath = Path('../data/fixtures/fbref_fixtures.csv')
fbref_fixtures.to_csv(filepath)