In [None]:
import pandas as pd
import numpy as np
import json
import requests
from pathlib import Path

pd.set_option('display.max_columns', 300)

In [None]:
teams = ['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton', 'Burnley', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Leeds', 'Liverpool', 'Manchester City', 'Manchester Utd', 'Newcastle Utd', 'Nottingham Forest', 'Tottenham', 'Sunderland', 'West Ham', 'Wolves']

In [None]:
# fetch FPL data online
fpl_online_data = json.loads(requests.get('https://fantasy.premierleague.com/api/bootstrap-static/').text)
fpl_online_df = pd.DataFrame(fpl_online_data['elements'])
fpl_online_df['team_name'] = [teams[i] for i in fpl_online_df['team']-1]
fpl_online_df['name'] = fpl_online_df.apply(lambda x: x['first_name'] + ' ' + x['second_name'], axis=1)
fpl_online_df['season'] = '25-26'
fpl_online_df

In [None]:
fpl_online_df[fpl_online_df.team_name == 'Liverpool']

In [None]:
[col for col in fpl_online_df.columns if 'defensive' in col]

In [None]:
fpl_online_df[fpl_online_df.second_name.str.contains('Szobo')]

In [None]:
# Statshead data
data1 = pd.read_csv('../../../misc/pl24-25_data1.csv', header=[0,1])

# edit column names that have unnamed main headers
new_columns = [('General',col[1]) if 'Unnamed' in col[0] \
                else col for col in data1.columns]
data1.columns = pd.MultiIndex.from_tuples(new_columns)

# remove duplicate columns, keeping the last occurrence
data1 = data1.loc[:,~data1.columns.duplicated(keep='last')] 

data1

In [None]:
# Statshead data
data2 = pd.read_csv('../../../misc/pl24-25_data2.csv', header=[0,1])

# edit column names that have unnamed main headers
new_columns = [('General',col[1]) if 'Unnamed' in col[0] \
                else col for col in data2.columns]
data2.columns = pd.MultiIndex.from_tuples(new_columns)

# remove duplicate columns, keeping the last occurrence
data2 = data2.loc[:,~data2.columns.duplicated(keep='last')] 

data2

In [None]:
data_combined = pd.concat([data1, data2], axis=0, ignore_index=True)
data_combined = data_combined[~data_combined.duplicated(subset=[('General', 'Player'), ('General', 'Date')], keep='first')]
data_combined

In [None]:
data_combined['CBIT'] = data_combined[('General', 'Tkl+Int')] + data_combined[('Blocks', 'Blocks')] + data_combined[('General', 'Clr')]

In [None]:
df = data_combined[(data_combined.CBIT>=10) & (data_combined[('General', 'Pos.')] == 'DF')]
df

In [None]:
len(df[df[('General', 'Player')] == 'Murillo'])

In [None]:
len(df[df[('General', 'Player')] == 'James Tarkowski'])

In [None]:
len(df[df[('General', 'Player')] == 'Jan Bednarek'])

In [None]:
len(df[df[('General', 'Player')] == 'Wout Faes'])

In [None]:
len(data_combined[data_combined[('General', 'Player')] == 'Wout Faes'])

In [None]:
len(data_combined[data_combined[('General', 'Player')] == 'Murillo'])

In [None]:
len(data_combined[data_combined[('General', 'Player')] == 'Murillo'])

## fpl_df

In [None]:
fpl_df = pd.read_csv('../data/fpl_df.csv', index_col=0)
fpl_df

In [None]:
fpl_df[[col for col in fpl_online_df.columns if 'defensive' in col]]

In [None]:
fpl_df[fpl_df.name.str.contains('Szobo').fillna(False)]

In [None]:
col = 'defensive_contribution'
aux = fpl_df.groupby(['first_name', 'second_name', 'season'])[col].diff()
aux.notnull().sum()

In [None]:
fpl_df.groupby(['first_name', 'second_name', 'season'])[col].mean().reset_index().tail(50)

In [None]:
def my_fill_na(x, gameweek_col, diff_col):
    '''Fill nan values for first items for grouped variables where diff is calculated. But also don't fill for season 22-23,
    where data is missing for a number of weeks at the beginning of the season.'''
    my_value = x[diff_col] if (np.isnan(x[gameweek_col])) & (x['minutes']<=90) else x[gameweek_col]
    return my_value

In [None]:
fpl_df[f'gameweek_{col}']

In [None]:
fpl_df[[col for col in fpl_df.columns if 'defensive' in col]]