This notebook is used to create a csv file that includes my point projections in a format similar to fplreview.com. This csv file can then be used for making advanced team optimization plans using Sertalp B. Cay's repo "fpl-optimization".

In [1]:
latest_gameweek = 34

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
import json

# Save team data

In [3]:
# get login credentials for fetching team data
file_path = Path('../../login_credentials/fpl_login.json')
login_credentials = pd.read_json(file_path, typ='series')

In [4]:
# utility function for fetching team data
from src.utils import fetch_my_team

In [5]:
# fetch my team data
my_team = fetch_my_team(login_credentials.user_name, login_credentials.password, login_credentials.team_id)

In [6]:
# save team data to fpl-optimization repo
file_path = Path('../../../repos/fpl-optimization/data/team.json')
with open(file_path, 'w') as f:
    json.dump(my_team, f)

# Edit projections into fplreview format 
### (to be used with fpl-optimization repo solver)

In [7]:
filepath = Path(f'../data/predictions/gameweek{latest_gameweek}.csv')
projections = pd.read_csv(filepath, index_col=0)
display(projections.head())
display(projections.shape)

Unnamed: 0,name,element_type,home,corners_and_indirect_freekicks_order,creativity_rank,direct_freekicks_order,ict_index_rank,influence_rank,minutes,now_cost,...,gameweek_xG_expanding_per90,gameweek_xA_expanding_per90,gameweek_xGA_expanding_per90,gameweek_xPoints_expanding_per90,xG_overperformance,team_name,opponent_team,date,gameweek,expected_points
0,Aaron Cresswell,2,1,3.0,338,,446,449,321,42,...,0.021506,0.14173,1.331404,3.62778,1.950078,West Ham,Liverpool,2024-04-27T11:30:00Z,35,1.588187
1,Aaron Cresswell,2,0,3.0,338,,446,449,321,42,...,0.021506,0.14173,1.331404,3.62778,1.950078,West Ham,Chelsea,2024-05-05T13:00:00Z,36,1.668047
2,Aaron Cresswell,2,1,3.0,338,,446,449,321,42,...,0.021506,0.14173,1.331404,3.62778,1.950078,West Ham,Luton,2024-05-11T14:00:00Z,37,2.597269
3,Aaron Cresswell,2,0,3.0,338,,446,449,321,42,...,0.021506,0.14173,1.331404,3.62778,1.950078,West Ham,Manchester City,2024-05-19T15:00:00Z,38,1.419396
4,Aaron Hickey,2,0,,272,,283,256,713,45,...,0.028356,0.020959,1.229178,3.259115,0.0,Brentford,Everton,2024-04-27T16:30:00Z,35,2.229651


(2460, 113)

In [8]:
filepath = Path('../data/fpl_df.csv')
fpl_df = pd.read_csv(filepath, index_col=0, low_memory=False)
fpl_df = fpl_df[fpl_df.season=='23-24']
display(fpl_df.head())
display(fpl_df.shape)

Unnamed: 0,assists,bonus,bps,clean_sheets,corners_and_indirect_freekicks_order,creativity,creativity_rank,creativity_rank_type,direct_freekicks_order,dreamteam_count,...,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,name,data_retrieved_datetime
18839,0,0,2,0,,0.0,554,210,,0,...,218.0,77.0,218.0,77.0,7.0,3.0,0.0,0.0,Gabriel dos Santos Magalhães,2023-08-14 21:41:02.445217
18840,0,0,11,0,,30.3,22,16,,0,...,156.0,73.0,156.0,73.0,76.0,25.0,1.0,0.0,Kai Havertz,2023-08-14 21:41:02.445217
18841,1,0,23,0,4.0,42.5,11,7,3.0,0,...,51.0,16.0,51.0,16.0,29.0,8.0,1.06,0.0,Gabriel Martinelli Silva,2023-08-14 21:41:02.445217
18842,0,2,26,1,,5.9,110,10,,0,...,17.0,5.0,17.0,5.0,88.0,16.0,1.25,1.25,Eddie Nketiah,2023-08-14 21:41:02.445217
18843,0,0,11,0,3.0,23.4,39,27,1.0,0,...,158.0,74.0,158.0,74.0,14.0,6.0,1.0,0.0,Martin Ødegaard,2023-08-14 21:41:02.445217


(9780, 209)

In [9]:
df = fpl_df.groupby('name').last().reset_index()[['id', 'name','points_per_game', 'total_points',]]
#df['id'] = df.id.astype(int)
df['games_played'] = np.round(np.where(df['points_per_game']!=0, df['total_points'] / df['points_per_game'], 0),0)
display(df.head())
display(df.shape)

Unnamed: 0,id,name,points_per_game,total_points,games_played
0,530.0,Aaron Cresswell,0.9,7,8.0
1,104.0,Aaron Hickey,1.9,17,9.0
2,17.0,Aaron Ramsdale,3.3,20,6.0
3,675.0,Aaron Ramsey,1.3,18,14.0
4,401.0,Aaron Wan-Bissaka,2.2,35,16.0


(558, 5)

In [10]:
# drop duplicate players (some players get new spelling for their name during the season causing duplicates)
duplicate_ids = df.loc[df.id.duplicated(), 'id'].unique()
for id in duplicate_ids:
    ix = df.loc[df.id==id, 'games_played'].idxmin()
    df = df.drop(ix)
display(df.head())
display(df.shape)

Unnamed: 0,id,name,points_per_game,total_points,games_played
0,530.0,Aaron Cresswell,0.9,7,8.0
1,104.0,Aaron Hickey,1.9,17,9.0
2,17.0,Aaron Ramsdale,3.3,20,6.0
3,675.0,Aaron Ramsey,1.3,18,14.0
4,401.0,Aaron Wan-Bissaka,2.2,35,16.0


(555, 5)

In [11]:
# drop unneccesary columns
df = df.drop(['points_per_game', 'total_points','games_played'], axis=1)
display(df.head())
display(df.shape)

Unnamed: 0,id,name
0,530.0,Aaron Cresswell
1,104.0,Aaron Hickey
2,17.0,Aaron Ramsdale
3,675.0,Aaron Ramsey
4,401.0,Aaron Wan-Bissaka


(555, 2)

In [12]:
# merge id info to projections
projections = projections.merge(df, on='name', how='left')
#projections.head()

In [13]:
# add xMins variable that is needed later
projections['xMins'] = 90

In [14]:
projections[['id', 'expected_points', 'xMins', 'gameweek']]

Unnamed: 0,id,expected_points,xMins,gameweek
0,530.0,1.588187,90,35
1,530.0,1.668047,90,36
2,530.0,2.597269,90,37
3,530.0,1.419396,90,38
4,104.0,2.229651,90,35
...,...,...,...,...
2455,687.0,2.782655,90,35
2456,687.0,3.341578,90,36
2457,687.0,3.008492,90,37
2458,687.0,2.813614,90,37


In [15]:
projections_pivot = (
    projections
    .pivot_table(
    columns=['gameweek',],
    index='id', 
    values=['expected_points','xMins'], 
    aggfunc='sum'
    )
)

projections_pivot.head()

Unnamed: 0_level_0,expected_points,expected_points,expected_points,expected_points,xMins,xMins,xMins,xMins
gameweek,35,36,37,38,35,36,37,38
id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2.0,1.959224,2.340624,2.539836,2.409091,90,90,90,90
3.0,1.905912,2.1482,2.365622,2.19372,90,90,90,90
4.0,2.256864,2.573502,2.775642,2.62015,90,90,90,90
5.0,3.76872,4.246051,4.458658,4.357558,90,90,90,90
6.0,4.399917,5.001973,5.560245,5.089595,90,90,90,90


In [16]:
new_cols = []
for col in projections_pivot.columns:
    if col[0] == 'expected_points':
        new_col = str(col[1]) + '_Pts'
        new_cols.append(new_col)
    elif col[0] == 'xMins':
        new_col = str(col[1]) + '_xMins'
        new_cols.append(new_col)

projections_pivot.columns = new_cols
projections_pivot = projections_pivot.reset_index()
projections_pivot = projections_pivot.rename(columns={'id':'ID'})

projections_pivot.head()

Unnamed: 0,ID,35_Pts,36_Pts,37_Pts,38_Pts,35_xMins,36_xMins,37_xMins,38_xMins
0,2.0,1.959224,2.340624,2.539836,2.409091,90,90,90,90
1,3.0,1.905912,2.1482,2.365622,2.19372,90,90,90,90
2,4.0,2.256864,2.573502,2.775642,2.62015,90,90,90,90
3,5.0,3.76872,4.246051,4.458658,4.357558,90,90,90,90
4,6.0,4.399917,5.001973,5.560245,5.089595,90,90,90,90


In [17]:
filepath = Path('../../../repos/fpl-optimization/data/fplreview.csv')
projections_pivot.to_csv(filepath)

### Option to make manual changes, e.g., to xPts (based on injuries)

### Bradley

In [18]:
projections.loc[projections['name'].str.contains('Bradley'), 'id'].unique()

array([757.])

In [19]:
player_id = 757
display(projections.loc[projections.id==player_id, 'name'].unique())
display(projections_pivot[projections_pivot.ID==player_id])

array(['Conor Bradley'], dtype=object)

Unnamed: 0,ID,35_Pts,36_Pts,37_Pts,38_Pts,35_xMins,36_xMins,37_xMins,38_xMins
526,757.0,3.383682,3.135522,2.973232,3.902185,90,90,90,90


In [20]:
#player_id = 353
gameweeks = np.arange(latest_gameweek+1,39)
weights = [0, 0, 0.25, 0.25, 0.25]

for i in range(len(gameweeks)):
    projections_pivot.loc[projections_pivot.ID==player_id, f'{gameweeks[i]}_Pts'] = \
        weights[i]*projections_pivot.loc[projections_pivot.ID==player_id, f'{gameweeks[i]}_Pts']

display(projections_pivot[projections_pivot.ID==player_id])

Unnamed: 0,ID,35_Pts,36_Pts,37_Pts,38_Pts,35_xMins,36_xMins,37_xMins,38_xMins
526,757.0,0.0,0.0,0.743308,0.975546,90,90,90,90


### Trent

In [None]:
projections.loc[projections['name'].str.contains('Trent'), 'id'].unique()

In [None]:
player_id = 290
display(projections.loc[projections.id==player_id, 'name'].unique())
display(projections_pivot[projections_pivot.ID==player_id])

In [None]:
gameweeks = [34,35,36]
weights = [0.75,0.9,0.9]

for i in range(len(gameweeks)):
    projections_pivot.loc[projections_pivot.ID==player_id, f'{gameweeks[i]}_Pts'] = \
        weights[i]*projections_pivot.loc[projections_pivot.ID==player_id, f'{gameweeks[i]}_Pts']

display(projections_pivot[projections_pivot.ID==player_id])

### Turner

In [None]:
projections.loc[projections['name'].str.contains('Turner'), 'id'].unique()

In [None]:
projections_pivot[projections_pivot.ID==28]

In [None]:
gameweeks = np.arange(latest_gameweek+1,39)
np.zeros(len(gameweeks))

In [None]:
player_id = 28
gameweeks = np.arange(latest_gameweek+1,39)
weights = np.zeros(len(gameweeks))

for i in range(len(gameweeks)):
    projections_pivot.loc[projections_pivot.ID==player_id, f'{gameweeks[i]}_Pts'] = \
        weights[i]*projections_pivot.loc[projections_pivot.ID==player_id, f'{gameweeks[i]}_Pts']

display(projections_pivot[projections_pivot.ID==player_id])

In [21]:
# save data
filepath = Path('../../../repos/fpl-optimization/data/fplreview.csv')
projections_pivot.to_csv(filepath)

In [46]:
# check ids
projections.loc[projections['name'].str.contains('Verb'), ['id','name']]

Unnamed: 0,id,name
254,152.0,Bart Verbruggen
255,152.0,Bart Verbruggen
256,152.0,Bart Verbruggen
257,152.0,Bart Verbruggen
258,152.0,Bart Verbruggen


In [41]:
# banned players from solver 
projections.loc[projections.id.isin([77,343,31,301,10,230,590,546,145,687,430,424,368,369,216,148,152]), 'name'].unique()
#projections.loc[projections.id.isin([77,290,343,31,301,10,230,590,546]), 'name'].unique()

array(['Caoimhin Kelleher', 'Daniel Bentley', 'Jakub Kiwior',
       'Julián Álvarez', 'Kieran Trippier',
       'Matheus Santos Carneiro Da Cunha', 'Nick Pope',
       'Norberto Murara Neto', 'Oleksandr Zinchenko', 'Robert Sánchez',
       'Sam Johnstone', 'Đorđe Petrović'], dtype=object)