# Data Update (Run For Each GW)

This notebook updates the training data weekly for the gameweek points prediction model. The notebook takes in new PL stats per gameweek and adds them as new rows to the original training dataset.

### REMINDERS BEFORE RUNNING:
- Download updated player_stats_2223 (https://github.com/vaastav/Fantasy-Premier-League/tree/master/data/2022-23)
- Download updated team_standard_stats_2223 & player_standard_stats_2223 (https://fbref.com/en/comps/9/stats/Premier-League-Stats)

In [1]:
#Import relevant libraries and packages
import pandas as pd
import numpy as np
import os
import sys
from pathlib import Path
import json
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

#Paths
path = Path('Data')
path_20_21 = Path('Data/2020-21')
path_21_22 = Path('Data/2021-22')
path_22_23 = Path('Data/2022-23')

#Import data sets
#Training data
training_data = pd.read_csv(path/'training_data_updated.csv', index_col=0, 
                       dtype={'season':str,
                              'comp':str,
                              'squad':str})
training_data = training_data.reset_index()

#22-23 season gameweeks
season_gws = pd.read_csv(path/'remaining_season.csv', index_col=0)

#Player info 21-22 and 22-23
player_stats_2122 = pd.read_csv(path_21_22/'gws/merged_gw.csv')
player_stats_2223 = pd.read_csv(path_22_23/'gws/merged_gw.csv')

#Team stats 20-21, 21-22 and 22-23
team_standard_stats_2021 = pd.read_csv(path_20_21/'team_standard_stats_2021.csv')
team_standard_stats_2122 = pd.read_csv(path_21_22/'team_standard_stats_2122.csv')
team_standard_stats_2223 = pd.read_csv(path_22_23/'team_standard_stats_2223.csv')

#Player stats 21-22 and 22-23
player_standard_stats_2122 = pd.read_csv(path_21_22/'player_standard_stats_2122.csv')
player_standard_stats_2223 = pd.read_csv(path_22_23/'player_standard_stats_2223.csv')

#Player's raw data 22-23
players_raw = pd.read_csv(path_22_23/'players_raw.csv')

#22-23 Teams (Teams & IDs)
teams = pd.read_csv(path_22_23/'teams.csv')

#Cleaned player data 22-23
cleaned_players = pd.read_csv(path_22_23/'cleaned_players.csv')

In [2]:
#Function to merge dfs by partial match (substring)
def fuzzy_merge(df_1, df_2, key1, key2, threshold=90, limit=2):
    """
    :param df_1: the left table to join
    :param df_2: the right table to join
    :param key1: key column of the left table
    :param key2: key column of the right table
    :param threshold: how close the matches should be to return a match, based on Levenshtein distance
    :param limit: the amount of matches that will get returned, these are sorted high to low
    :return: dataframe with boths keys and matches
    """
    s = df_2[key2].tolist()
    
    m = df_1[key1].apply(lambda x: process.extract(x, s, limit=limit))    
    df_1['matches'] = m
    
    m2 = df_1['matches'].apply(lambda x: ', '.join([i[0] for i in x if i[1] >= threshold]))
    df_1['matches'] = m2
    
    return df_1

We will create a new training data frame with the original training data from the 20-21 season, the 21-22 season (adding gw 38 data), and the 22-23 season (adding each new gw weekly):

## 20-21 Training Data:

In [3]:
# original_training_data_2021 = original_training_data[original_training_data['season'] == '2021'][['player', 'position', 'gw', 'team', 'opponent_team', 'was_home',
#        'season', 'minutes', 'total_points', 'assists', 'bonus', 'bps',
#        'clean_sheets', 'creativity', 'goals_conceded', 'goals_scored',
#        'ict_index', 'influence', 'penalties_saved', 'red_cards', 'saves',
#        'threat', 'yellow_cards', 'team_a_score', 'team_h_score', 'xg', 'xa', 'npxg']]

# original_training_data_2021['team'] = original_training_data_2021['team'].replace({'Manchester United': 'Manchester Utd', 
#                                           'Newcastle United': 'Newcastle Utd',
#                                           'West Ham United': 'West Ham',
#                                           'Tottenham Hotspur': 'Tottenham',
#                                           'Brighton and Hove Albion': 'Brighton',
#                                           'Wolverhampton Wanderers': 'Wolves',
#                                           'Sheffield United': 'Sheffield Utd',
#                                           'West Bromwich Albion': 'West Brom'})

# original_training_data_2021['opponent_team'] = original_training_data_2021['opponent_team'].replace({'Manchester United': 'Manchester Utd', 
#                                           'Newcastle United': 'Newcastle Utd',
#                                           'West Ham United': 'West Ham',
#                                           'Tottenham Hotspur': 'Tottenham',
#                                           'Brighton and Hove Albion': 'Brighton',
#                                           'Wolverhampton Wanderers': 'Wolves',
#                                           'Sheffield United': 'Sheffield Utd',
#                                           'West Bromwich Albion': 'West Brom'})

# training_data_2021 = original_training_data_2021.merge(team_standard_stats_2021, on='team')
# training_data_2021 = training_data_2021.sort_values(['gw','team'])
# training_data_2021 = training_data_2021.reset_index()
# training_data_2021 = training_data_2021.drop('index', axis=1)
# training_data_2021

## 21-22 Training Data:

In [4]:
# original_training_data_2122 = original_training_data[original_training_data['season'] == '2122'][['player', 'position', 
#        'gw', 'team', 'opponent_team', 'was_home',
#        'season', 'minutes', 'total_points', 'assists', 'bonus', 'bps',
#        'clean_sheets', 'creativity', 'goals_conceded', 'goals_scored',
#        'ict_index', 'influence', 'penalties_saved', 'red_cards', 'saves',
#        'threat', 'yellow_cards', 'team_a_score', 'team_h_score', 'xg', 'xa', 'npxg']]
                                                                                                 
# original_training_data_2122['team'] = original_training_data_2122['team'].replace({'Manchester United': 'Manchester Utd', 
#                                           'Newcastle United': 'Newcastle Utd',
#                                           'West Ham United': 'West Ham',
#                                           'Tottenham Hotspur': 'Tottenham',
#                                           'Brighton and Hove Albion': 'Brighton',
#                                           'Wolverhampton Wanderers': 'Wolves',
#                                           'Sheffield United': 'Sheffield Utd',
#                                           'West Bromwich Albion': 'West Brom'})

# original_training_data_2122['opponent_team'] = original_training_data_2122['opponent_team'].replace({'Manchester United': 'Manchester Utd', 
#                                           'Newcastle United': 'Newcastle Utd',
#                                           'West Ham United': 'West Ham',
#                                           'Tottenham Hotspur': 'Tottenham',
#                                           'Brighton and Hove Albion': 'Brighton',
#                                           'Wolverhampton Wanderers': 'Wolves',
#                                           'Sheffield United': 'Sheffield Utd',
#                                           'West Bromwich Albion': 'West Brom'})

# training_data_2122 = original_training_data_2122.merge(team_standard_stats_2122, on='team')
# training_data_2122 = training_data_2122.sort_values(['gw','team'])
# training_data_2122 = training_data_2122.reset_index()
# training_data_2122 = training_data_2122.drop('index', axis=1)
# training_data_2122

#### Add Gameweek 38 data:

In [5]:
# #Last season's gameweek 38 data (RUN JUST ONCE) 
# player_stats_2122 = player_stats_2122[player_stats_2122['GW'] == 38]
# player_stats_2122['season'] = '2122'
# relevant_columns = ['name', 'position', 'GW', 'team', 'opponent_team', 'was_home', 'season', 'minutes','total_points',
#                     'assists', 'bonus', 'bps', 'clean_sheets',
#                     'creativity', 'goals_conceded', 'goals_scored', 'ict_index',
#                     'influence', 'penalties_saved', 'red_cards', 'saves', 'threat',
#                     'yellow_cards', 'team_a_score', 'team_h_score']
# player_stats_2122 = player_stats_2122[relevant_columns]
# player_stats_2122 = player_stats_2122.rename(columns={'name': 'player', 'GW':'gw'})

# #Change position from string to int
# def position_assignment(data):
#     if data['position'] == 'GK':
#         return 1
#     if data['position'] == 'DEF':
#         return 2
#     if data['position'] == 'MID':
#         return 3
#     if data['position'] == 'FWD':
#         return 4
    
# player_stats_2122['position'] = player_stats_2122.apply(position_assignment, axis = 1)

# #Change opponent_team from int to string
# def team_assignment(data):
#     if data['opponent_team'] == 1:
#         return 'Arsenal'
#     if data['opponent_team'] == 2:
#         return 'Aston Villa'
#     if data['opponent_team'] == 3:
#         return 'Brentford'
#     if data['opponent_team'] == 4:
#         return 'Brighton'
#     if data['opponent_team'] == 5:
#         return 'Burnley'
#     if data['opponent_team'] == 6:
#         return 'Chelsea'
#     if data['opponent_team'] == 7:
#         return 'Crystal Palace'
#     if data['opponent_team'] == 8:
#         return 'Everton'
#     if data['opponent_team'] == 9:
#         return 'Leicester City'
#     if data['opponent_team'] == 10:
#         return 'Leeds'
#     if data['opponent_team'] == 11:
#         return 'Liverpool'
#     if data['opponent_team'] == 12:
#         return 'Manchester City'
#     if data['opponent_team'] == 13:
#         return 'Manchester Utd'
#     if data['opponent_team'] == 14:
#         return 'Newcastle Utd'
#     if data['opponent_team'] == 15:
#         return 'Norwich'
#     if data['opponent_team'] == 16:
#         return 'Southampton'
#     if data['opponent_team'] == 17:
#         return 'Tottenham'
#     if data['opponent_team'] == 18:
#         return 'Watford'
#     if data['opponent_team'] == 19:
#         return 'West Ham'
#     if data['opponent_team'] == 20:
#         return 'Wolves'
    
# player_stats_2122['opponent_team'] = player_stats_2122.apply(team_assignment, axis = 1)
# gw_38 = player_stats_2122
# gw_38 = gw_38.sort_values(['team'])
# gw_38

In [6]:
# training_data_2122 = pd.concat([training_data_2122, gw_38])
# training_data_2122

In [7]:
# #Concatenate 20-21 training data to 21-22 training data
# updated_training_data = pd.concat([training_data_2021, training_data_2122])

## Gameweek 22-23 Data:
#### UPDATE GAMEWEEK EVERY WEEK

In [8]:
#REMINDER TO CHANGE GW WEEKLY TO MOST RECENT GW
gameweek = 16

#Player stats for most recent gameweek
player_stats = player_stats_2223[player_stats_2223['GW'] == gameweek]
relevant_columns = ['name', 'minutes','total_points', 'assists', 'bonus', 'bps', 'clean_sheets',
       'creativity', 'goals_conceded', 'goals_scored', 'ict_index',
       'influence', 'penalties_saved', 'red_cards', 'saves', 'threat',
       'yellow_cards', 'team_a_score', 'team_h_score']
player_stats = player_stats[relevant_columns]
player_stats = player_stats.rename(columns={'name': 'player'})
player_stats

Unnamed: 0,player,minutes,total_points,assists,bonus,bps,clean_sheets,creativity,goals_conceded,goals_scored,ict_index,influence,penalties_saved,red_cards,saves,threat,yellow_cards,team_a_score,team_h_score
8491,Nathan Redmond,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0,0,0,0.0,0,1,3
8492,Junior Stanislas,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0,0,0,0.0,0,0,3
8493,Armando Broja,72,2,0,0,2,0,12.3,1,0,3.9,9.2,0,0,0,17.0,0,0,1
8494,Fabian Schär,90,7,0,1,26,1,10.9,0,0,5.3,22.8,0,0,0,19.0,0,0,1
8495,Jonny Evans,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0,0,0,0.0,0,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9153,Oliver Skipp,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0,0,0,0.0,0,3,4
9154,Ryan Sessegnon,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0,0,0,0.0,0,3,4
9155,Ashley Young,21,0,0,0,1,0,0.0,0,0,0.0,0.0,0,0,0,0.0,1,2,1
9156,Jeremy Sarmiento Morante,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0,0,0,0.0,0,2,1


In [9]:
#Player Raw Data Merged with player's team
players_raw = players_raw[['first_name', 'second_name', 'team_code']]
teams = teams[['code', 'name']]
players_raw = players_raw.merge(teams, left_on = 'team_code', right_on= 'code')
players_raw['player'] = players_raw['first_name'] + ' ' + players_raw['second_name']
players_raw = players_raw.rename(columns={'name': 'team'})
players_raw = players_raw[['player', 'team']]
players_raw['team'] = players_raw['team'].replace({'Man Utd': 'Manchester Utd', 
                                          'Newcastle United': 'Newcastle Utd',
                                          'West Ham United': 'West Ham',
                                          'Tottenham Hotspur': 'Tottenham',
                                          'Brighton and Hove Albion': 'Brighton',
                                          'Wolverhampton Wanderers': 'Wolves',
                                          'Leicester': 'Leicester City',
                                          'Man City': 'Manchester City',
                                          'Newcastle': 'Newcastle Utd',
                                          "Nott'm Forest": 'Nottingham Forest',
                                          'Spurs': 'Tottenham'})
players_raw

Unnamed: 0,player,team
0,Cédric Alves Soares,Arsenal
1,Granit Xhaka,Arsenal
2,Mohamed Elneny,Arsenal
3,Rob Holding,Arsenal
4,Thomas Partey,Arsenal
...,...,...
662,Diego Da Silva Costa,Wolves
663,Boubacar Traoré,Wolves
664,Nathan Fraser,Wolves
665,Harvey Griffiths,Wolves


#### UPDATE GAMEWEEK HERE:

In [10]:
#Gameweek Data
season_gws['opponent_team'] = season_gws['opponent_team'].replace({'Manchester United': 'Manchester Utd', 
                                          'Newcastle United': 'Newcastle Utd',
                                          'West Ham United': 'West Ham',
                                          'Tottenham Hotspur': 'Tottenham',
                                          'Brighton and Hove Albion': 'Brighton',
                                          'Wolverhampton Wanderers': 'Wolves'})

season_gws['team'] = season_gws['team'].replace({'Manchester United': 'Manchester Utd', 
                                          'Newcastle United': 'Newcastle Utd',
                                          'West Ham United': 'West Ham',
                                          'Tottenham Hotspur': 'Tottenham',
                                          'Brighton and Hove Albion': 'Brighton',
                                          'Wolverhampton Wanderers': 'Wolves'})

season_gws = season_gws[['gw','team', 'opponent_team', 'was_home', 'season']]
season_gws = season_gws.drop_duplicates()
season_gws = season_gws.reset_index().drop('index', axis=1)

#CHANGE GAMEWEEK HERE
season_gws = season_gws[season_gws['gw'] == 16]
season_gws

Unnamed: 0,gw,team,opponent_team,was_home,season
150,16,West Ham,Leicester City,True,2223
151,16,Tottenham,Leeds,True,2223
152,16,Nottingham Forest,Crystal Palace,True,2223
153,16,Newcastle Utd,Chelsea,True,2223
154,16,Wolves,Arsenal,True,2223
155,16,Liverpool,Southampton,True,2223
156,16,Fulham,Manchester Utd,True,2223
157,16,Brighton,Aston Villa,True,2223
158,16,Bournemouth,Everton,True,2223
159,16,Manchester City,Brentford,True,2223


In [11]:
#Merge gameweek info with player names
season_player_merge = season_gws.merge(players_raw, on='team')
season_player_merge = season_player_merge[['player', 'gw', 'team', 'opponent_team', 'was_home', 'season']]
season_player_merge

Unnamed: 0,player,gw,team,opponent_team,was_home,season
0,Lukasz Fabianski,16,West Ham,Leicester City,True,2223
1,Angelo Ogbonna,16,West Ham,Leicester City,True,2223
2,Aaron Cresswell,16,West Ham,Leicester City,True,2223
3,Michail Antonio,16,West Ham,Leicester City,True,2223
4,Craig Dawson,16,West Ham,Leicester City,True,2223
...,...,...,...,...,...,...
662,Mathias Jorgensen,16,Brentford,Manchester City,False,2223
663,Ryan Trevitt,16,Brentford,Manchester City,False,2223
664,Matthew Cox,16,Brentford,Manchester City,False,2223
665,Tristan Crama,16,Brentford,Manchester City,False,2223


In [12]:
#Add player's position
cleaned_players['player'] = cleaned_players['first_name'] + ' ' + cleaned_players['second_name']
cleaned_players = cleaned_players[['player', 'element_type']]
cleaned_players = cleaned_players.rename(columns={'element_type': 'position'})
season_player_merge = season_player_merge.merge(cleaned_players, on='player')

#Convert position from string to int
def position_assignment(data):
    if data['position'] == 'GK':
        return 1
    if data['position'] == 'DEF':
        return 2
    if data['position'] == 'MID':
        return 3
    if data['position'] == 'FWD':
        return 4

season_player_merge['position'] = season_player_merge.apply(position_assignment, axis = 1)

#Ordered and clean df with player gw data
season_player_merge = season_player_merge[['player', 'position', 'gw', 'team', 'opponent_team', 'was_home', 'season']]
season_player_merge = season_player_merge.drop_duplicates()
season_player_merge

Unnamed: 0,player,position,gw,team,opponent_team,was_home,season
0,Lukasz Fabianski,1,16,West Ham,Leicester City,True,2223
1,Angelo Ogbonna,2,16,West Ham,Leicester City,True,2223
2,Aaron Cresswell,2,16,West Ham,Leicester City,True,2223
3,Michail Antonio,4,16,West Ham,Leicester City,True,2223
4,Craig Dawson,2,16,West Ham,Leicester City,True,2223
...,...,...,...,...,...,...,...
664,Mathias Jorgensen,2,16,Brentford,Manchester City,False,2223
665,Ryan Trevitt,3,16,Brentford,Manchester City,False,2223
666,Matthew Cox,1,16,Brentford,Manchester City,False,2223
667,Tristan Crama,2,16,Brentford,Manchester City,False,2223


In [13]:
#Cleaning player standard stats
# player_standard_stats_2223 = player_standard_stats_2223[['Unnamed: 1', 'Per 90 Minutes.1', 
#                                                          'Unnamed: 28', 'Unnamed: 30']]
# player_standard_stats_2223.columns = player_standard_stats_2223.iloc[0]
# player_standard_stats_2223 = player_standard_stats_2223.drop(index=0)
# player_standard_stats_2223 = player_standard_stats_2223.rename(columns={'Player': 'player',
#                                                                 'xG': 'xg',
#                                                                 'xAG': 'xa',
#                                                                 'npxG': 'npxg'})
# player_standard_stats_2223.to_csv(path_22_23/'player_standard_stats_2223.csv', index=False)
player_standard_stats_2223

Unnamed: 0,player,xg,xa,npxg
0,Brenden Aaronson,0.14,0.22,0.14
1,Che Adams,0.33,0.14,0.33
2,Tyler Adams,0.00,0.07,0.00
3,Tosin Adarabioyo,0.01,0.00,0.01
4,Nayef Aguerd,0.07,0.06,0.07
...,...,...,...,...
466,Jordan Zemura,0.00,0.10,0.00
467,Oleksandr Zinchenko,0.02,0.07,0.02
468,Hakim Ziyech,0.09,0.07,0.09
469,Kurt Zouma,0.08,0.00,0.08


In [14]:
#Merging last gameweek's player stats with last gameweek's player info and adding players' xg, xa, and npxg
season_gw = fuzzy_merge(season_player_merge, player_stats, 'player', 'player', threshold=91)
season_gw_stats = season_gw.merge(player_stats, left_on = 'matches', right_on = 'player')
season_gw_stats = season_gw_stats.drop(['player_x', 'matches'], axis=1)
season_gw_stats = season_gw_stats.rename(columns={'player_y': 'player'})
season_gw_stats = season_gw_stats[['player', 'position', 'gw', 'team', 'opponent_team', 'was_home',
       'season', 'minutes', 'total_points', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'goals_conceded', 'goals_scored',
       'ict_index', 'influence', 'penalties_saved', 'red_cards', 'saves',
       'threat', 'yellow_cards', 'team_a_score', 'team_h_score']]

season_gw_stats = fuzzy_merge(season_gw_stats, player_standard_stats_2223, 'player', 'player', threshold=91)
season_gw_stats['matches'].replace('', np.nan, inplace=True)
season_gw_no_match = season_gw_stats[season_gw_stats['matches'].isna()]
season_gw_no_match[['xg', 'xa', 'npxg']] = 0
season_gw_no_match = season_gw_no_match.drop('matches', axis=1)
season_gw_stats = season_gw_stats.dropna(subset=['matches'])
season_gw_stats = season_gw_stats.merge(player_standard_stats_2223, left_on = 'matches', right_on = 'player')
season_gw_stats = season_gw_stats.drop(['player_x', 'matches'], axis=1)
season_gw_stats = season_gw_stats.rename(columns={'player_y': 'player'})
season_gw_stats = season_gw_stats[['player', 'position', 'gw', 'team', 'opponent_team', 'was_home',
       'season', 'minutes', 'total_points', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'goals_conceded', 'goals_scored',
       'ict_index', 'influence', 'penalties_saved', 'red_cards', 'saves',
       'threat', 'yellow_cards', 'team_a_score', 'team_h_score', 'xg', 'xa', 'npxg']]
season_gw_stats = pd.concat([season_gw_stats, season_gw_no_match])
season_gw_stats

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  season_gw_no_match[['xg', 'xa', 'npxg']] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  season_gw_no_match[['xg', 'xa', 'npxg']] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  season_gw_no_match[['xg', 'xa', 'npxg']] = 0


Unnamed: 0,player,position,gw,team,opponent_team,was_home,season,minutes,total_points,assists,...,penalties_saved,red_cards,saves,threat,yellow_cards,team_a_score,team_h_score,xg,xa,npxg
0,Angelo Ogbonna,2,16,West Ham,Leicester City,True,2223,0,0,0,...,0,0,0,0.0,0,2,0,0.00,0.39,0.00
1,Aaron Cresswell,2,16,West Ham,Leicester City,True,2223,90,1,0,...,0,0,0,0.0,0,2,0,0.02,0.18,0.02
2,Michail Antonio,4,16,West Ham,Leicester City,True,2223,0,0,0,...,0,0,0,0.0,0,2,0,0.30,0.06,0.30
3,Craig Dawson,2,16,West Ham,Leicester City,True,2223,90,0,0,...,0,0,0,4.0,1,2,0,0.02,0.02,0.02
4,Manuel Lanzini,3,16,West Ham,Leicester City,True,2223,0,0,0,...,0,0,0,0.0,0,2,0,0.04,0.00,0.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
657,Fin Stevens,2,16,Brentford,Manchester City,False,2223,0,0,0,...,0,0,0,0.0,0,2,1,0.00,0.00,0.00
661,Ryan Trevitt,3,16,Brentford,Manchester City,False,2223,0,0,0,...,0,0,0,0.0,0,2,1,0.00,0.00,0.00
662,Matthew Cox,1,16,Brentford,Manchester City,False,2223,0,0,0,...,0,0,0,0.0,0,2,1,0.00,0.00,0.00
663,Tristan Crama,2,16,Brentford,Manchester City,False,2223,0,0,0,...,0,0,0,0.0,0,2,1,0.00,0.00,0.00


In [15]:
#Cleaning team standard stats
# team_standard_stats_2223 = team_standard_stats_2223[['Unnamed: 0', 'Per 90 Minutes.1', 
#                                                          'Unnamed: 25', 'Unnamed: 27']]
# team_standard_stats_2223.columns = team_standard_stats_2223.iloc[0]
# team_standard_stats_2223 = team_standard_stats_2223.drop(index=0)
# team_standard_stats_2223 = team_standard_stats_2223.rename(columns={'Squad': 'team',
#                                                                 'xG': 'team_xg',
#                                                                 'xAG': 'team_xa',
#                                                                 'npxG': 'team_npxg'})
# team_standard_stats_2223['team'] = team_standard_stats_2223['team'].replace({'Leeds United': 'Leeds',
#                                                                              "Nott'ham Forest": 'Nottingham Forest'})
# team_standard_stats_2223.to_csv(path_22_23/'team_standard_stats_2223.csv', index=False)
team_standard_stats_2223

Unnamed: 0,team,team_xg,team_xa,team_npxg
0,Arsenal,1.87,1.31,1.84
1,Aston Villa,1.18,0.82,1.08
2,Bournemouth,0.77,0.63,0.77
3,Brentford,1.35,0.86,1.15
4,Brighton,1.53,1.12,1.38
5,Chelsea,1.1,0.83,1.0
6,Crystal Palace,1.0,0.77,0.88
7,Everton,1.03,0.74,1.03
8,Fulham,1.31,0.75,1.05
9,Leeds,1.45,0.96,1.34


#### UPDATE GAMEWEEK HERE:

In [16]:
#Add team xg, xa, npxg
gw16 = season_gw_stats 
gw16 = gw16.merge(team_standard_stats_2223, left_on = 'team', right_on ='team')
gw16 = gw16[['player', 'position', 'gw', 'team', 'opponent_team', 'was_home',
       'season', 'minutes', 'total_points', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'goals_conceded', 'goals_scored',
       'ict_index', 'influence', 'penalties_saved', 'red_cards', 'saves',
       'threat', 'yellow_cards', 'team_a_score', 'team_h_score', 'xg', 'xa', 'npxg',
       'team_xg', 'team_xa', 'team_npxg']]

gw16

Unnamed: 0,player,position,gw,team,opponent_team,was_home,season,minutes,total_points,assists,...,threat,yellow_cards,team_a_score,team_h_score,xg,xa,npxg,team_xg,team_xa,team_npxg
0,Angelo Ogbonna,2,16,West Ham,Leicester City,True,2223,0,0,0,...,0.0,0,2,0,0.00,0.39,0.00,1.24,0.77,1.08
1,Aaron Cresswell,2,16,West Ham,Leicester City,True,2223,90,1,0,...,0.0,0,2,0,0.02,0.18,0.02,1.24,0.77,1.08
2,Michail Antonio,4,16,West Ham,Leicester City,True,2223,0,0,0,...,0.0,0,2,0,0.30,0.06,0.30,1.24,0.77,1.08
3,Craig Dawson,2,16,West Ham,Leicester City,True,2223,90,0,0,...,4.0,1,2,0,0.02,0.02,0.02,1.24,0.77,1.08
4,Manuel Lanzini,3,16,West Ham,Leicester City,True,2223,0,0,0,...,0.0,0,2,0,0.04,0.00,0.04,1.24,0.77,1.08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
653,Fin Stevens,2,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.00,0.00,0.00,1.35,0.86,1.15
654,Ryan Trevitt,3,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.00,0.00,0.00,1.35,0.86,1.15
655,Matthew Cox,1,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.00,0.00,0.00,1.35,0.86,1.15
656,Tristan Crama,2,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.00,0.00,0.00,1.35,0.86,1.15


In [17]:
#Concatenating new data to training data
updated_training_data = pd.concat([training_data, gw16])
updated_training_data = updated_training_data.reset_index()
updated_training_data = updated_training_data.drop('index', axis=1)
updated_training_data

Unnamed: 0,player,position,gw,team,opponent_team,was_home,season,minutes,total_points,assists,...,threat,yellow_cards,team_a_score,team_h_score,xg,xa,npxg,team_xg,team_xa,team_npxg
0,Ainsley Maitland-Niles,2,1,Arsenal,Fulham,False,2021,90,6,0,...,4.0,0,3,0,0.0,0.0,0.0,1.41,0.96,1.29
1,Alexandre Lacazette,4,1,Arsenal,Fulham,False,2021,86,7,0,...,48.0,0,3,0,0.4,0.2,0.4,1.41,0.96,1.29
2,Bernd Leno,1,1,Arsenal,Fulham,False,2021,90,7,0,...,0.0,0,3,0,0.0,0.0,0.0,1.41,0.96,1.29
3,Bukayo Saka,3,1,Arsenal,Fulham,False,2021,0,0,0,...,0.0,0,3,0,,,,1.41,0.96,1.29
4,Calum Chambers,2,1,Arsenal,Fulham,False,2021,0,0,0,...,0.0,0,3,0,,,,1.41,0.96,1.29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58288,Fin Stevens,2,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.0,0.0,0.0,1.35,0.86,1.15
58289,Ryan Trevitt,3,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.0,0.0,0.0,1.35,0.86,1.15
58290,Matthew Cox,1,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.0,0.0,0.0,1.35,0.86,1.15
58291,Tristan Crama,2,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.0,0.0,0.0,1.35,0.86,1.15


In [18]:
#Make sure all team names are the same in entire df
updated_training_data.team.unique()

array(['Arsenal', 'Brighton', 'Chelsea', 'Crystal Palace', 'Everton',
       'Fulham', 'Leeds', 'Leicester City', 'Liverpool', 'Newcastle Utd',
       'Sheffield Utd', 'Southampton', 'Tottenham', 'West Brom',
       'West Ham', 'Wolves', 'Aston Villa', 'Burnley', 'Manchester City',
       'Manchester Utd', 'Brentford', 'Norwich', 'Watford', 'Bournemouth',
       'Nottingham Forest'], dtype=object)

In [19]:
#Make sure all opponent_team names are the same in entire df
updated_training_data.opponent_team.unique()

array(['Fulham', 'Chelsea', 'Brighton', 'Southampton', 'Tottenham',
       'Arsenal', 'Liverpool', 'West Brom', 'Leeds', 'West Ham', 'Wolves',
       'Crystal Palace', 'Everton', 'Leicester City', 'Newcastle Utd',
       'Sheffield Utd', 'Manchester Utd', 'Burnley', 'Aston Villa',
       'Manchester City', 'Brentford', 'Watford', 'Norwich',
       'Nottingham Forest', 'Bournemouth'], dtype=object)

In [20]:
#Save updated training data 
updated_training_data.to_csv(path/'training_data_updated.csv', index=False)
updated_training_data

Unnamed: 0,player,position,gw,team,opponent_team,was_home,season,minutes,total_points,assists,...,threat,yellow_cards,team_a_score,team_h_score,xg,xa,npxg,team_xg,team_xa,team_npxg
0,Ainsley Maitland-Niles,2,1,Arsenal,Fulham,False,2021,90,6,0,...,4.0,0,3,0,0.0,0.0,0.0,1.41,0.96,1.29
1,Alexandre Lacazette,4,1,Arsenal,Fulham,False,2021,86,7,0,...,48.0,0,3,0,0.4,0.2,0.4,1.41,0.96,1.29
2,Bernd Leno,1,1,Arsenal,Fulham,False,2021,90,7,0,...,0.0,0,3,0,0.0,0.0,0.0,1.41,0.96,1.29
3,Bukayo Saka,3,1,Arsenal,Fulham,False,2021,0,0,0,...,0.0,0,3,0,,,,1.41,0.96,1.29
4,Calum Chambers,2,1,Arsenal,Fulham,False,2021,0,0,0,...,0.0,0,3,0,,,,1.41,0.96,1.29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58288,Fin Stevens,2,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.0,0.0,0.0,1.35,0.86,1.15
58289,Ryan Trevitt,3,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.0,0.0,0.0,1.35,0.86,1.15
58290,Matthew Cox,1,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.0,0.0,0.0,1.35,0.86,1.15
58291,Tristan Crama,2,16,Brentford,Manchester City,False,2223,0,0,0,...,0.0,0,2,1,0.0,0.0,0.0,1.35,0.86,1.15
