# Data Update (Run For Each GW)

This notebook updates the training data weekly for the gameweek points prediction model. The notebook takes in new PL stats per gameweek and adds them as new rows to the original training dataset.

In [312]:
#Import relevant libraries and packages
import pandas as pd
import numpy as np
import os
import sys
from pathlib import Path
import json
from ipynb.fs.full.Helpers import *

#paths
path = Path('fpl_model/Data')
path_22_23 = Path('fpl_model/Data/2022-23')

#read in data sets
training_data = pd.read_csv(path/'training_data.csv', index_col=0, 
                       dtype={'season':str,
                              'comp':str,
                              'squad':str})
season_gws = pd.read_csv(path/'remaining_season.csv', index_col=0)
player_stats = pd.read_csv(path_22_23/'gws/merged_gw.csv')

# CHANGE GAMEWEEK HERE:

In [313]:
#REMINDER TO CHANGE GW WEEKLY
gameweek = 1
player_stats = player_stats[player_stats['GW'] == gameweek]


relevant_columns = ['name', 'minutes','total_points', 'assists', 'bonus', 'bps', 'clean_sheets',
       'creativity', 'goals_conceded', 'goals_scored', 'ict_index',
       'influence', 'penalties_saved', 'red_cards', 'saves', 'threat',
       'yellow_cards']
player_stats = player_stats[relevant_columns]
player_stats = player_stats.rename(columns={'name': 'player'})
player_stats

Unnamed: 0,player,minutes,total_points,assists,bonus,bps,clean_sheets,creativity,goals_conceded,goals_scored,ict_index,influence,penalties_saved,red_cards,saves,threat,yellow_cards
0,Nathan Redmond,1,1,0,0,3,0,0.0,0,0,0.0,0.0,0,0,0,0.0,0
1,Junior Stanislas,1,1,0,0,3,0,0.0,0,0,0.0,0.0,0,0,0,0.0,0
2,Armando Broja,15,1,0,0,3,0,0.3,0,0,2.5,5.2,0,0,0,19.0,0
3,Fabian Schär,90,15,0,3,43,1,14.6,0,1,10.6,66.0,0,0,0,25.0,0
4,Jonny Evans,90,1,0,0,15,0,1.3,2,0,1.5,14.0,0,0,0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
568,Oliver Skipp,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0,0,0,0.0,0
569,Ryan Sessegnon,65,7,0,0,19,0,3.0,1,1,9.4,38.4,0,0,0,53.0,1
570,Ashley Young,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0,0,0,0.0,0
571,Jeremy Sarmiento Morante,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0,0,0,0.0,0


# CHANGE GAMEWEEK HERE:

In [314]:
#REMINDER TO CHANGE GW WEEKLY
season_gw1 = season_gws[season_gws['gw'] == gameweek]

season_gw1 = season_gw1[['player', 'position', 'gw', 'team', 'opponent_team', 'was_home', 'season']]
season_gw1

Unnamed: 0,player,position,gw,team,opponent_team,was_home,season
0,Vicente Guaita,1,1,Crystal Palace,Arsenal,True,2223
1,James Tomkins,2,1,Crystal Palace,Arsenal,True,2223
2,James McArthur,3,1,Crystal Palace,Arsenal,True,2223
3,Christian Benteke,4,1,Crystal Palace,Arsenal,True,2223
4,Joel Ward,2,1,Crystal Palace,Arsenal,True,2223
...,...,...,...,...,...,...,...
10452,Josh Wilson-Esbrand,2,1,Manchester City,West Ham United,False,2223
10453,Liam Delap,4,1,Manchester City,West Ham United,False,2223
10454,Stefan Ortega Moreno,1,1,Manchester City,West Ham United,False,2223
10455,Kalvin Phillips,3,1,Manchester City,West Ham United,False,2223


In [315]:
import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

def fuzzy_merge(df_1, df_2, key1, key2, threshold=90, limit=2):
    """
    :param df_1: the left table to join
    :param df_2: the right table to join
    :param key1: key column of the left table
    :param key2: key column of the right table
    :param threshold: how close the matches should be to return a match, based on Levenshtein distance
    :param limit: the amount of matches that will get returned, these are sorted high to low
    :return: dataframe with boths keys and matches
    """
    s = df_2[key2].tolist()
    
    m = df_1[key1].apply(lambda x: process.extract(x, s, limit=limit))    
    df_1['matches'] = m
    
    m2 = df_1['matches'].apply(lambda x: ', '.join([i[0] for i in x if i[1] >= threshold]))
    df_1['matches'] = m2
    
    return df_1

season_gw1 = fuzzy_merge(season_gw1, player_stats, 'player', 'player', threshold=91)
season_gw1

Unnamed: 0,player,position,gw,team,opponent_team,was_home,season,matches
0,Vicente Guaita,1,1,Crystal Palace,Arsenal,True,2223,Vicente Guaita
1,James Tomkins,2,1,Crystal Palace,Arsenal,True,2223,James Tomkins
2,James McArthur,3,1,Crystal Palace,Arsenal,True,2223,James McArthur
3,Christian Benteke,4,1,Crystal Palace,Arsenal,True,2223,Christian Benteke
4,Joel Ward,2,1,Crystal Palace,Arsenal,True,2223,Joel Ward
...,...,...,...,...,...,...,...,...
10452,Josh Wilson-Esbrand,2,1,Manchester City,West Ham United,False,2223,Josh Wilson-Esbrand
10453,Liam Delap,4,1,Manchester City,West Ham United,False,2223,Liam Delap
10454,Stefan Ortega Moreno,1,1,Manchester City,West Ham United,False,2223,Stefan Ortega Moreno
10455,Kalvin Phillips,3,1,Manchester City,West Ham United,False,2223,Kalvin Phillips


In [316]:
season_gw1_stats = season_gw1.merge(player_stats, left_on = 'matches', right_on = 'player')
season_gw1_stats = season_gw1_stats.drop(['player_x', 'matches'], axis=1)
season_gw1_stats = season_gw1_stats.rename(columns={'player_y': 'player'})
season_gw1_stats = season_gw1_stats[['player', 'position', 'gw', 'team', 'opponent_team', 'was_home',
       'season', 'minutes', 'total_points', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'goals_conceded', 'goals_scored',
       'ict_index', 'influence', 'penalties_saved', 'red_cards', 'saves',
       'threat', 'yellow_cards']]
season_gw1_stats

Unnamed: 0,player,position,gw,team,opponent_team,was_home,season,minutes,total_points,assists,...,creativity,goals_conceded,goals_scored,ict_index,influence,penalties_saved,red_cards,saves,threat,yellow_cards
0,Vicente Guaita,1,1,Crystal Palace,Arsenal,True,2223,90,1,0,...,0.0,2,0,1.4,13.8,0,0,1,0.0,0
1,James Tomkins,2,1,Crystal Palace,Arsenal,True,2223,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
2,James McArthur,3,1,Crystal Palace,Arsenal,True,2223,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
3,Christian Benteke,4,1,Crystal Palace,Arsenal,True,2223,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
4,Joel Ward,2,1,Crystal Palace,Arsenal,True,2223,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
529,Josh Wilson-Esbrand,2,1,Manchester City,West Ham United,False,2223,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
530,Liam Delap,4,1,Manchester City,West Ham United,False,2223,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
531,Stefan Ortega Moreno,1,1,Manchester City,West Ham United,False,2223,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
532,Kalvin Phillips,3,1,Manchester City,West Ham United,False,2223,1,1,0,...,0.5,0,0,0.1,0.6,0,0,0,0.0,0


In [317]:
keep = ['player', 'position', 'gw', 'team', 'opponent_team', 'was_home',
       'season', 'minutes', 'total_points', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'goals_conceded', 'goals_scored',
       'ict_index', 'influence', 'penalties_saved', 'red_cards', 'saves',
       'threat', 'yellow_cards']
training_data = training_data[keep]
training_data = pd.concat([training_data, season_gw1_stats])
training_data = training_data.drop_duplicates()
training_data = training_data.reset_index()
training_data = training_data.drop('index', axis=1)
training_data.to_csv(path/'training_data_updated.csv', index=False)

In [318]:
training_data

Unnamed: 0,player,position,gw,team,opponent_team,was_home,season,minutes,total_points,assists,...,creativity,goals_conceded,goals_scored,ict_index,influence,penalties_saved,red_cards,saves,threat,yellow_cards
0,Aaron Cresswell,2,1,West Ham United,Chelsea,False,1617,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
1,Aaron Lennon,3,1,Everton,Tottenham Hotspur,True,1617,15,1,0,...,0.3,0,0,0.9,8.2,0,0,0,0.0,0
2,Aaron Ramsey,3,1,Arsenal,Liverpool,True,1617,60,2,0,...,4.9,3,0,3.0,2.2,0,0,0,23.0,0
3,Abdoulaye Doucouré,3,1,Watford,Southampton,False,1617,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
4,Abdul Rahman Baba,2,1,Chelsea,West Ham United,True,1617,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140040,Josh Wilson-Esbrand,2,1,Manchester City,West Ham United,False,2223,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
140041,Liam Delap,4,1,Manchester City,West Ham United,False,2223,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
140042,Stefan Ortega Moreno,1,1,Manchester City,West Ham United,False,2223,0,0,0,...,0.0,0,0,0.0,0.0,0,0,0,0.0,0
140043,Kalvin Phillips,3,1,Manchester City,West Ham United,False,2223,1,1,0,...,0.5,0,0,0.1,0.6,0,0,0,0.0,0
