### Obtaining up-to-date data to train ML models
Will be updated to include data up to Dec. 28, 2023 (GW 19 completed in the 2023/2024 season). 
Result: large dataframe containing player data from each game from season 2016/2017 to mid-2023/2024.

In [91]:
import pandas as pd
import numpy as np
import requests
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
from selenium.webdriver.support import expected_conditions as EC

In [3]:
# reading in initial data - gives player data for each GW of the season from 16/17 to 22/23
data = pd.read_csv("merged_seasons.csv")

  data = pd.read_csv("merged_seasons.csv")


In [4]:
data

Unnamed: 0,season_x,name,position,team_x,assists,bonus,bps,clean_sheets,creativity,element,...,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,GW
0,2016-17,Aaron Cresswell,DEF,,0,0,0,0,0.0,454,...,2.0,0.0,0,0,0,0,55,False,0,1
1,2016-17,Aaron Lennon,MID,,0,0,6,0,0.3,142,...,1.0,0.0,1,0,0,0,60,True,0,1
2,2016-17,Aaron Ramsey,MID,,0,0,5,0,4.9,16,...,3.0,23.0,2,0,0,0,80,True,0,1
3,2016-17,Abdoulaye Doucouré,MID,,0,0,0,0,0.0,482,...,1.0,0.0,0,0,0,0,50,False,0,1
4,2016-17,Adam Forshaw,MID,,0,0,3,0,1.3,286,...,1.0,0.0,1,0,0,0,45,True,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96164,2022-23,Oliver Skipp,MID,Spurs,0,0,16,0,0.0,441,...,1.0,0.0,2,100,742,642,43,False,0,38
96165,2022-23,Ryan Sessegnon,DEF,Spurs,0,0,0,0,0.0,436,...,1.0,0.0,0,-166,24,190,44,False,0,38
96166,2022-23,Ashley Young,DEF,Aston Villa,0,0,0,0,0.0,538,...,2.0,0.0,0,-1146,1522,2668,43,True,0,38
96167,2022-23,Jeremy Sarmiento Morante,MID,Brighton,0,0,0,0,0.0,119,...,2.0,0.0,0,-17,22,39,45,False,0,38


In [5]:
np.unique(data["season_x"]) # note that the current season is not included in the initial dataset

array(['2016-17', '2017-18', '2020-21', '2021-22', '2022-23'],
      dtype=object)

In [6]:
# reading in the list of player IDs to convert IDs to names
# we want the list of IDs so that we can obtain this season's info per player
player_IDs = pd.read_csv("player_IDs.csv")
IDs = player_IDs["id"]

In [7]:
player_IDs

Unnamed: 0,first_name,second_name,id
0,Folarin,Balogun,1
1,Cédric,Alves Soares,2
2,Mohamed,Elneny,3
3,Fábio,Ferreira Vieira,4
4,Gabriel,dos Santos Magalhães,5
...,...,...,...
753,Nathan,Fraser,704
754,Jean-Ricner,Bellegarde,715
755,Matthew,Whittingham,749
756,Tawanda,Chirewa,750


In [8]:
np.where(player_IDs.duplicated(subset=["second_name"])) # potentially repeated player names - keep in mind

(array([223, 243, 250, 314, 369, 385, 406, 414, 419, 435, 454, 475, 515,
        535, 543, 556, 561, 563, 578, 594, 597, 603, 608, 619, 632, 639,
        642, 646, 649, 650, 654, 659, 675, 676, 689, 707, 748, 753]),)

In [9]:
# obtain each player's data for each all GWs this season
# format: https://fantasy.premierleague.com/api/element-summary/{player_id}/
player_api_link = "https://fantasy.premierleague.com/api/element-summary/"

In [10]:
data.columns

Index(['season_x', 'name', 'position', 'team_x', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'element', 'fixture', 'goals_conceded',
       'goals_scored', 'ict_index', 'influence', 'kickoff_time', 'minutes',
       'opponent_team', 'opp_team_name', 'own_goals', 'penalties_missed',
       'penalties_saved', 'red_cards', 'round', 'saves', 'selected',
       'team_a_score', 'team_h_score', 'threat', 'total_points',
       'transfers_balance', 'transfers_in', 'transfers_out', 'value',
       'was_home', 'yellow_cards', 'GW'],
      dtype='object')

In [13]:
# function that gets player data from after a given UTC date (during the current season)
## player_ids: list of FPL player IDs (integers)
def get_player_stats(player_ids, date = "08-01-2023"): 

    to_add_df = pd.DataFrame()
    date = pd.to_datetime(date).tz_localize("UTC")
    
    for player_id in player_ids: 
        # get the player's name from name-id correspondance df
        player_row = player_IDs[player_IDs["id"] == player_id]
        player_name = f"{player_row['first_name'].iloc[0]} {player_row['second_name'].iloc[0]}"
        # access API endpoint and obtain dataframe of player's stats this season
        response = requests.get(f"{player_api_link}{player_id}/")
        api_data = response.json()
        fixture_stats = api_data.get("history", [])
        fixture_stats = [stat for stat in fixture_stats if pd.to_datetime(stat["kickoff_time"]) > latest]
        add_df = pd.DataFrame(fixture_stats)
        nrows = add_df.shape[0]
        names = [player_name] * nrows
        add_df.insert(0, column="name", value=names)
        # add this player's stats to the large dataframe w all players
        to_add_df = pd.concat([to_add_df, add_df], ignore_index=True)

    return to_add_df

In [14]:
to_add_df = get_player_stats(IDs)
to_add_df

Unnamed: 0,name,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,...,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out
0,Folarin Balogun,1,2,16,0,True,2023-08-12T12:00:00Z,2,1,1,...,0,0.00,0.00,0.00,0.00,45,0,59090,0,0
1,Folarin Balogun,1,12,8,0,False,2023-08-21T19:00:00Z,0,1,2,...,0,0.00,0.00,0.00,0.00,45,-4744,63768,4959,9703
2,Folarin Balogun,1,21,10,0,True,2023-08-26T14:00:00Z,2,2,3,...,0,0.00,0.00,0.00,0.00,45,-7682,58109,2507,10189
3,Folarin Balogun,1,31,14,0,True,2023-09-03T15:30:00Z,3,1,4,...,0,0.00,0.00,0.00,0.00,44,-9696,49814,2558,12254
4,Folarin Balogun,1,43,9,0,False,2023-09-17T15:30:00Z,0,1,5,...,0,0.00,0.00,0.00,0.00,44,-12786,37048,0,12786
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13679,Justin Hubner,751,149,6,0,True,2023-12-05T19:30:00Z,1,0,15,...,0,0.00,0.00,0.00,0.00,40,344,555,375,31
13680,Justin Hubner,751,160,16,0,True,2023-12-09T15:00:00Z,1,1,16,...,0,0.00,0.00,0.00,0.00,40,329,1078,435,106
13681,Justin Hubner,751,170,19,0,False,2023-12-17T14:00:00Z,3,0,17,...,0,0.00,0.00,0.00,0.00,40,229,1583,418,189
13682,Justin Hubner,751,180,7,0,True,2023-12-24T13:00:00Z,2,1,18,...,0,0.00,0.00,0.00,0.00,40,42,1763,197,155


In [15]:
to_add_df.dtypes

name                          object
element                        int64
fixture                        int64
opponent_team                  int64
total_points                   int64
was_home                        bool
kickoff_time                  object
team_h_score                   int64
team_a_score                   int64
round                          int64
minutes                        int64
goals_scored                   int64
assists                        int64
clean_sheets                   int64
goals_conceded                 int64
own_goals                      int64
penalties_saved                int64
penalties_missed               int64
yellow_cards                   int64
red_cards                      int64
saves                          int64
bonus                          int64
bps                            int64
influence                     object
creativity                    object
threat                        object
ict_index                     object
s

In [16]:
are_numeric = list(data.select_dtypes(include=['number']).columns)
are_numeric

['assists',
 'bonus',
 'bps',
 'clean_sheets',
 'creativity',
 'element',
 'fixture',
 'goals_conceded',
 'goals_scored',
 'ict_index',
 'influence',
 'minutes',
 'opponent_team',
 'own_goals',
 'penalties_missed',
 'penalties_saved',
 'red_cards',
 'round',
 'saves',
 'selected',
 'team_a_score',
 'team_h_score',
 'threat',
 'total_points',
 'transfers_balance',
 'transfers_in',
 'transfers_out',
 'value',
 'yellow_cards',
 'GW']

In [20]:
# convert columns from object dtype to numeric as in data df
are_numeric = [col for col in are_numeric if col in to_add_df.columns]
to_add_df["was_home"] = to_add_df["was_home"].astype(bool)
to_add_df[are_numeric] = to_add_df[are_numeric].apply(pd.to_numeric)

In [21]:
data.columns, to_add_df.columns

(Index(['season_x', 'name', 'position', 'team_x', 'assists', 'bonus', 'bps',
        'clean_sheets', 'creativity', 'element', 'fixture', 'goals_conceded',
        'goals_scored', 'ict_index', 'influence', 'kickoff_time', 'minutes',
        'opponent_team', 'opp_team_name', 'own_goals', 'penalties_missed',
        'penalties_saved', 'red_cards', 'round', 'saves', 'selected',
        'team_a_score', 'team_h_score', 'threat', 'total_points',
        'transfers_balance', 'transfers_in', 'transfers_out', 'value',
        'was_home', 'yellow_cards', 'GW'],
       dtype='object'),
 Index(['name', 'element', 'fixture', 'opponent_team', 'total_points',
        'was_home', 'kickoff_time', 'team_h_score', 'team_a_score', 'round',
        'minutes', 'goals_scored', 'assists', 'clean_sheets', 'goals_conceded',
        'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards',
        'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity',
        'threat', 'ict_index', 'starts', '

In [22]:
shared_cols = list(to_add_df.columns.intersection(data.columns))
shared_cols

['name',
 'element',
 'fixture',
 'opponent_team',
 'total_points',
 'was_home',
 'kickoff_time',
 'team_h_score',
 'team_a_score',
 'round',
 'minutes',
 'goals_scored',
 'assists',
 'clean_sheets',
 'goals_conceded',
 'own_goals',
 'penalties_saved',
 'penalties_missed',
 'yellow_cards',
 'red_cards',
 'saves',
 'bonus',
 'bps',
 'influence',
 'creativity',
 'threat',
 'ict_index',
 'value',
 'transfers_balance',
 'selected',
 'transfers_in',
 'transfers_out']

In [23]:
data = data[shared_cols]
to_add_df = to_add_df[shared_cols]

In [24]:
data.shape, to_add_df.shape

((96169, 32), (13684, 32))

In [25]:
# combining previously present data and new data 
full_data = pd.concat([data, to_add_df], ignore_index=True)
full_data

Unnamed: 0,name,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,...,bps,influence,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out
0,Aaron Cresswell,454,10,4,0,False,2016-08-15T19:00:00Z,2.0,1.0,1,...,0,0.0,0.0,0.0,0.0,55,0,14023,0,0
1,Aaron Lennon,142,3,17,1,True,2016-08-13T14:00:00Z,1.0,1.0,1,...,6,8.2,0.3,0.0,0.9,60,0,13918,0,0
2,Aaron Ramsey,16,8,9,2,True,2016-08-14T15:00:00Z,3.0,4.0,1,...,5,2.2,4.9,23.0,3.0,80,0,163170,0,0
3,Abdoulaye Doucouré,482,7,13,0,False,2016-08-13T14:00:00Z,1.0,1.0,1,...,0,0.0,0.0,0.0,0.0,50,0,1051,0,0
4,Adam Forshaw,286,6,14,1,True,2016-08-13T14:00:00Z,1.0,1.0,1,...,3,2.0,1.3,0.0,0.3,45,0,2723,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109848,Justin Hubner,751,149,6,0,True,2023-12-05T19:30:00Z,1.0,0.0,15,...,0,0.0,0.0,0.0,0.0,40,344,555,375,31
109849,Justin Hubner,751,160,16,0,True,2023-12-09T15:00:00Z,1.0,1.0,16,...,0,0.0,0.0,0.0,0.0,40,329,1078,435,106
109850,Justin Hubner,751,170,19,0,False,2023-12-17T14:00:00Z,3.0,0.0,17,...,0,0.0,0.0,0.0,0.0,40,229,1583,418,189
109851,Justin Hubner,751,180,7,0,True,2023-12-24T13:00:00Z,2.0,1.0,18,...,0,0.0,0.0,0.0,0.0,40,42,1763,197,155


In [67]:
full_data.to_csv("full_data_raw.csv", index=False)
full_data = pd.read_csv("full_data_raw.csv")

  full_data = pd.read_csv("full_data_raw.csv")


### Preprocessing
Adding club, position, season, opponent team name, and result column to the full data

In [68]:
# SEASON: 
# function that assigns the season to each row
def assign_season(ko_time):

    dates = ["2017-06-01", "2018-06-01", "2019-06-01", "2020-06-01", "2021-06-01", "2022-06-01", "2023-06-01", "2024-06-01"]

    for date in dates: 

        if ko_time < pd.to_datetime(date).tz_localize("UTC"): 

            year1 = int(date.split("-")[0])
            year0 = year1 - 1

            season = f"{year0}/{year1}"
            
            return season

In [69]:
full_data["kickoff_time"] = pd.to_datetime(full_data["kickoff_time"])

In [70]:
full_data['season'] = full_data["kickoff_time"].apply(assign_season)

In [73]:
full_data

Unnamed: 0,name,club,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,...,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,position,season
0,Aaron Cresswell,,454,10,4,0,False,2016-08-15 19:00:00+00:00,2.0,1.0,...,0.0,0.0,0.0,55,0,14023,0,0,,2016/2017
1,Aaron Lennon,,142,3,17,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.3,0.0,0.9,60,0,13918,0,0,,2016/2017
2,Abdoulaye Doucouré,,482,7,13,0,False,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.0,0.0,0.0,50,0,1051,0,0,,2016/2017
3,Adam Forshaw,,286,6,14,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,1.3,0.0,0.3,45,0,2723,0,0,,2016/2017
4,Adam Lallana,,205,8,1,11,False,2016-08-14 15:00:00+00:00,3.0,4.0,...,33.7,57.0,14.2,70,0,155525,0,0,,2016/2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109273,Justin Hubner,,751,149,6,0,True,2023-12-05 19:30:00+00:00,1.0,0.0,...,0.0,0.0,0.0,40,344,555,375,31,,2023/2024
109274,Justin Hubner,,751,160,16,0,True,2023-12-09 15:00:00+00:00,1.0,1.0,...,0.0,0.0,0.0,40,329,1078,435,106,,2023/2024
109275,Justin Hubner,,751,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,0.0,...,0.0,0.0,0.0,40,229,1583,418,189,,2023/2024
109276,Justin Hubner,,751,180,7,0,True,2023-12-24 13:00:00+00:00,2.0,1.0,...,0.0,0.0,0.0,40,42,1763,197,155,,2023/2024


In [74]:
# a previous dataset that already has player clubs and positions
prev_data = pd.read_csv("final_data.csv")

In [75]:
prev_data

Unnamed: 0,name,element,opponent_team,season,kickoff_time,position,assists,club,clean_sheets,goals_scored,ict_index,minutes,red_cards,saves,selected,result,transfers_balance,was_home,opponent_team_name,total_points
0,Aaron Cresswell,454,4,2016/2017,2016-08-15 19:00:00+00:00,Defender,0,West Ham United,0,0,0.0,0,0,0,14023,L,0,False,Chelsea,0
1,Aaron Lennon,142,17,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Everton,0,0,0.9,15,0,0,13918,D,0,True,Tottenham Hotspur,1
2,Abdoulaye Doucouré,482,13,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Watford,0,0,0.0,0,0,0,1051,D,0,False,Southampton,0
3,Adam Forshaw,286,14,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Middlesbrough,0,0,0.3,69,0,0,2723,D,0,True,Stoke City,1
4,Adam Lallana,205,1,2016/2017,2016-08-14 15:00:00+00:00,Midfielder,1,Liverpool,0,1,14.2,75,0,0,155525,W,0,False,Arsenal,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107084,Jean-Ricner Bellegarde,715,18,2023/2024,2023-11-11 12:30:00+00:00,Midfielder,0,Wolverhampton Wanderers,0,0,2.7,72,0,0,1231,W,389,True,Tottenham Hotspur,2
107085,Jean-Ricner Bellegarde,715,10,2023/2024,2023-11-27 20:00:00+00:00,Midfielder,1,Wolverhampton Wanderers,0,0,6.2,61,0,0,1516,L,221,False,Fulham,5
107086,Jean-Ricner Bellegarde,715,1,2023/2024,2023-12-02 15:00:00+00:00,Midfielder,0,Wolverhampton Wanderers,0,0,0.5,62,0,0,2527,L,952,False,Arsenal,2
107087,Jean-Ricner Bellegarde,715,6,2023/2024,2023-12-05 19:30:00+00:00,Midfielder,0,Wolverhampton Wanderers,0,0,0.4,11,0,0,3506,W,934,True,Burnley,1


In [76]:
# from a previous exploration into the data I've found that certain players' names aren't consistent with the FPL names
# wrong: list of player names used in search who yielded incorrect results
wrong = ["Andrew Robertson", "Alexandre Moreno Lopera", "Andriy Yarmolenko", "Antony Matheus dos Santos", "Benjamin Chilwell", 
        "Carlos Henrique Casimiro", "Carlos Ribeiro Dias", "Conrad Egan-Riley", "Crysencio Summerville", "Daniel Bentley", 
        "Daniel Chesters", "Danilo dos Santos de Oliveira", "Emerson Leite de Souza Junior", "Fabio Henrique Tavares", 
        "Fabricio Agosto Ramírez", "Facundo Buonanotte", "Faustino Anjorin", "Fernando Luiz Rosa", "Fernando Marçal", 
        "Francisco Jorge Tomás Oliveira", "Frederico Rodrigues de Paula Santos", "Fábio Ferreira Vieira", 
        "Gabriel dos Santos Magalhães", "James Bree", "Jonathan Castro Otto", "Jordan Henderson", "Jordan Hugill", 
        "Jorge Luiz Frello Filho", "Joseph Gomez", "Joseph Willock", "Josh Sims", "Joshua Sargent", "José Ignacio Peleteiro Romallo", 
        "Joshua Wilson-Esbrand", "Joseph Johnson", "Callum Scanlon", "Benjamin Chrisene", "Juan Camilo Hernández Suárez", 
        "Kell Watts", "Konstantinos Tsimikas", "Kyle Bartley", "Léo Bonatini", "Mahmoud Ahmed Ibrahim Hassan", 
        "Marcus Oliveira Alencar", "Mathias Jorgensen", "Matthew Cash", "Matthew Clarke", "Matthew Longstaff", "Matthew Pollock", 
        "Max Kilman", "Mohamed Dräger", "Norberto Bercique Gomes Betuncal", "Pelenda Joshua Dasilva", "Raphael Dias Belloli", 
        "Robbie Brady", "Rodrigo Hernandez", "Rui Pedro dos Santos Patrício", "Shaqai Forde", "Solomon March", "Victor da Silva", 
        "Vitor Ferreira", "Xande Nascimento da Costa Silva", "Yegor Yarmoliuk"]
# fixable: corresponding names that will yield correct results
fixable = ["Andy Robertson", "Álex Moreno", "Andrii Yarmolenko", "Antony", "Ben Chilwell", "Casemiro", "Cafú", "CJ Egan-Riley", 
          "Crysencio Summerville", "Dan Bentley", "Dan Chesters", "Danilo", "Emerson Royal", "Fabinho", "Fabri", "Facundo Buonanotte", 
          "Tino Anjorin", "Fernandinho", "Marçal", "Chiquinho", "Fred", "Fábio Vieira", "Gabriel Magalhães", "James Bree", "Jonny Otto", 
          "Jordan Henderson", "Jordan Hugill", "Jorginho", "Joe Gomez", "Joe Willock", "Josh Sims", "Josh Sargent", "Jota", 
          "Josh Wilson-Esbrand", "Joe Johnson", "Calum Scanlon", "Ben Chrisene", "Cucho Hernández", "Kelland Watts", "Kostas Tsimikas", 
          "Kyle Bartley", "Léo Bonatini", "Trézéguet", "Marquinhos", "Zanka", "Matty Cash", "Matt Clarke", "Matty Longstaff", 
          "Mattie Pollock", "Maximilian Kilman", "Mohamed Dräger", "Beto", "Josh Dasilva", "Raphinha", "Robert Brady", "Rodri", 
          "Rui Patrício", "Shaq Forde", "Solly March", "Vitinho", "Vitinha", "Xande Silva", "Yehor Yarmoliuk"]
# to_delete: players who are already accounted for under a different name OR who just don't exist - delete from full_data
to_delete = ["Benjamin White", "Daniel N'Lundulu", "Joe Taylor", "João Pedro Cavaco Cancelo", "Aaron Ramsey", "Adam Smith", "Mason Burstow", 
         "Matthew Smith", "Thomas McGill"]

In [77]:
# remove players who are already accounted for under a diff. name
full_data = full_data[~full_data["name"].isin(to_delete)]

In [78]:
# convert unconventional name to the conventional one
for wrong_player, fix_player in zip(wrong, fixable): 
    
    full_data.loc[full_data['name'] == wrong_player, 'name'] = fix_player

In [83]:
# assign each player in full_data with a position and club based on prev dataset
all_players = np.unique(full_data["name"])

for player in all_players: 
    
    try:
        # dataframe for this player
        player_df = prev_data[prev_data["name"] == player]
        # all seasons that this player has been in the PL
        unique_seasons = np.unique(player_df["season"])
    
        # POSITION: does not change over seasons
        position = player_df["position"].iloc[0]
        # set this player's position to what it is in prev. dataset
        full_data.loc[full_data['name'] == player, 'position'] = position
    
        # get the club the player played for during each season in the PL (might differ)
        for season in unique_seasons: 
    
            club = player_df.loc[player_df['season'] == season, 'club'].iloc[0]
            full_data.loc[(full_data['name'] == player) & (full_data['season'] == season), 'club'] = club  

    except IndexError: 
        continue

In [87]:
full_data

Unnamed: 0,name,club,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,...,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,position,season
0,Aaron Cresswell,West Ham United,454,10,4,0,False,2016-08-15 19:00:00+00:00,2.0,1.0,...,0.0,0.0,0.0,55,0,14023,0,0,Defender,2016/2017
1,Aaron Lennon,Everton,142,3,17,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.3,0.0,0.9,60,0,13918,0,0,Midfielder,2016/2017
2,Abdoulaye Doucouré,Watford,482,7,13,0,False,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.0,0.0,0.0,50,0,1051,0,0,Midfielder,2016/2017
3,Adam Forshaw,Middlesbrough,286,6,14,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,1.3,0.0,0.3,45,0,2723,0,0,Midfielder,2016/2017
4,Adam Lallana,Liverpool,205,8,1,11,False,2016-08-14 15:00:00+00:00,3.0,4.0,...,33.7,57.0,14.2,70,0,155525,0,0,Midfielder,2016/2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109273,Justin Hubner,,751,149,6,0,True,2023-12-05 19:30:00+00:00,1.0,0.0,...,0.0,0.0,0.0,40,344,555,375,31,,2023/2024
109274,Justin Hubner,,751,160,16,0,True,2023-12-09 15:00:00+00:00,1.0,1.0,...,0.0,0.0,0.0,40,329,1078,435,106,,2023/2024
109275,Justin Hubner,,751,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,0.0,...,0.0,0.0,0.0,40,229,1583,418,189,,2023/2024
109276,Justin Hubner,,751,180,7,0,True,2023-12-24 13:00:00+00:00,2.0,1.0,...,0.0,0.0,0.0,40,42,1763,197,155,,2023/2024


In [89]:
missing = full_data[full_data["club"].isna()]
missing_players = np.unique(missing["name"])
missing_players

array(['Adedapo Awokoya-Mebude', 'Ademipo Odubeko', 'Alex McCarthy',
       'Alfie Dorrington', 'Archie Gray', 'Bradley Ibrahim',
       'CJ Egan-Riley', 'Charlie Robinson', 'Cheikh Diaby',
       'Christian Marques', 'Conor Coventry', 'Danny Ward',
       'Denis Franchi', 'Diogo Teixeira da Silva',
       'Emerson Palmieri dos Santos', 'Frédéric Guilbert',
       'George Shelvey', 'Giulian Biancone', 'Harrison Ashby',
       'Harvey Davies', 'Hugo Bueno', 'Isaac Price', 'Ishé Samuels-Smith',
       'James Bree', 'James Garner', 'James Trafford', 'Jimmy Morgan',
       'Jordan Hugill', 'Josh Sims', 'Justin Hubner', 'Kacper Kozłowski',
       'Kaine Hayden', 'Karl Hein', 'Karlo Ziger', 'Kelland Watts',
       'Kristian Sekularac', 'Krisztián Hegyi', 'Lamare Bogarde',
       'Leigh Kavanagh', 'Louie Moulden', 'Léo Bonatini',
       'Mackenzie Hunt', 'Marcelo Flores', 'Mark Gillespie',
       'Mark O’Mahony', 'Mateusz Lis', 'Matt Clarke',
       'Matthew Whittingham', 'Meritan Shabani', '

In [97]:
driver_path = "/home/jasmine/Desktop/drivers/chromedriver-linux64/chromedriver"
# create a webdriver to automate searching players up
driver = webdriver.Chrome(executable_path=driver_path)
player_link = "https://www.premierleague.com/players"
# direct driver to this page
driver.get(player_link)
# click the accept all cookies button - one-time
cookie_button = WebDriverWait(driver, 3).until(
    EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler")))
cookie_button.click()
# find the search bar to search up players
search_input = WebDriverWait(driver, 5).until(
    EC.presence_of_element_located((By.ID, "search-input"))
)

time.sleep(5)

for player in missing_players[45:]: 

    # clear any content already in the search bar
    search_input.clear()

    # search up the player
    search_input.send_keys(player)
    search_input.send_keys(Keys.RETURN)

    time.sleep(6)

    try:
        search_results = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "player__name"))
        )
        first_result = search_results[0]
        first_result.click()

    except TimeoutException:
        print(f"No results found for {player}. Moving on to the next player.")
        driver.back()
        continue

    # get player position
    overview_info = driver.find_elements(By.CLASS_NAME, "player-overview__col")
    infos = [info.text for info in overview_info]
    pos_info = [i for i in infos if "Position" in i]
    position = pos_info[0].split("\n")[-1]
    # update position for this player in the main df
    full_data.loc[full_data['name'] == player, 'position'] = position

    # get club history for each season using BeautifulSoup
    page_html = driver.page_source
    soup = BeautifulSoup(page_html, features = "html.parser")

    # getting rid of shortened names
    short_names = soup.find_all("span", class_="player-club-history__team-name--short")
    for short_name in short_names:
        short_name.decompose()

    # find table w club history
    table = soup.find("table")
    history_df = pd.read_html(str(table))[0]
    # table contains lots of addl. info, so drop it
    history_df = history_df.dropna(subset=["Season"])
    # we only want one club occurrence per season; some players have two if they transferred (e.g. Jesus)
    # we keep the first occurrence (most recent club of the two)
    history_df = history_df.drop_duplicates(subset=["Season"])

    # dataframe for the player (subset of full_data)
    player_df = full_data[full_data["name"] == player]
    # all seasons that this player has been in the PL
    player_seasons = np.unique(history_df["Season"])

    # for current player, fill in club season by season
    for season in player_seasons: 

        # get the club associated w given season (will be same for most players)
        club = history_df.loc[history_df['Season'] == season, 'Club'].iloc[0]
        full_data.loc[(full_data['name'] == player) & (full_data['season'] == season), 'club'] = club

    print((player, position, club))

    driver.back()

('Mateusz Lis', 'Goalkeeper', 'Southampton U21')
('Matt Clarke', 'Defender', 'Brighton & Hove Albion U21 (Loan)')
('Matthew Whittingham', 'Midfielder', 'Wolverhampton Wanderers U21')
('Meritan Shabani', 'Midfielder', 'Wolverhampton Wanderers')
('Mohamed Dräger', 'Defender', 'Nottingham Forest')
('Nathan Bishop', 'Goalkeeper', 'Manchester United')
('Nathan Fraser', 'Forward', 'Wolverhampton Wanderers')
('Reece Welch', 'Defender', 'Everton')
('Reuell Walters', 'Defender', 'Arsenal U21')
('Robert Street', 'Forward', 'Crystal Palace')
('Ronnie Stutter', 'Forward', 'Chelsea U21')
('Ryan Bertrand', 'Defender', 'Leicester City U21')
('Stefan Parkes', 'Defender', 'Fulham U21')
('Tawanda Chirewa', 'Midfielder', 'Wolverhampton Wanderers U21')
('Tom McGill', 'Goalkeeper', 'Brighton & Hove Albion U21')
('Tyler Dibling', 'Midfielder', 'Southampton')
('Will Dennis', 'Goalkeeper', 'Bournemouth')


In [102]:
# ademipo is wrong
# there are two danny wards, check them. algorithm took first one (leicester)
# emerson palmeiro dos santos is wrong
# mark o'mahony literally does not work, remove all
np.unique(full_data[full_data["club"].isna()]["name"]), np.unique(full_data[full_data["position"].isna()]["name"])

(array(['Adedapo Awokoya-Mebude', 'Ademipo Odubeko',
        'Diogo Teixeira da Silva', 'Emerson Palmieri dos Santos',
        'Hugo Bueno', 'James Bree', 'Jordan Hugill', 'Josh Sims',
        'Kaine Hayden', 'Kelland Watts', 'Léo Bonatini', 'Mark O’Mahony',
        'Matt Clarke', 'Mohamed Dräger'], dtype=object),
 array(['Adedapo Awokoya-Mebude', 'Mark O’Mahony'], dtype=object))

In [133]:
# function that deletes rows for a player from df only for a certain season
def delete_row(df, player_name, season):
    df = df.loc[~(df["name"] == player_name) | (df["season"] != season)]
    return df

In [147]:
# manually correcting some errors
full_data = full_data[full_data["name"] != "Adedapo Awokoya-Mebude"]
full_data = full_data[full_data["name"] != "Diogo Teixeira da Silva"] # Jota is already accounted for
full_data = full_data[full_data["name"] != "Mark O’Mahony"] # there is an error fetching info from website
full_data.loc[full_data['name'] == "Ademipo Odubeko", 'club'] = "West Ham United"
full_data.loc[full_data['name'] == "Ademipo Odubeko", 'position'] = "Forward"
full_data.loc[full_data['name'] == "Hugo Bueno", 'club'] = "Wolverhampton Wanderers"
full_data.loc[full_data['name'] == "Hugo Bueno", 'position'] = "Defender"
full_data.loc[full_data['name'] == "Josh Sims", 'club'] = "Southampton"
full_data.loc[full_data['name'] == "Kelland Watts", 'club'] = "Newcastle United"
full_data.loc[full_data['name'] == "Léo Bonatini", 'club'] = "Wolverhampton Wanderers"
full_data.loc[full_data['name'] == "Matt Clarke", 'club'] = "Brighton and Hove Albion"
full_data.loc[full_data['name'] == "Mohamed Dräger", 'club'] = "Nottingham Forest"
full_data.loc[(full_data["name"] == "Emerson Palmieri dos Santos") & (full_data["season"].isin(["2017/2018", "2018/19", 
                                                                                "2019/2020", "2020/2021", 
                                                                                "2021/2022"])), "club"] = "Chelsea"
full_data.loc[(full_data["name"] == "Emerson Palmieri dos Santos") & (full_data["season"].isin(["2022/2023", "2023/2024"])), "club"] = "West Ham United"

In [141]:
# delete rows for a player for a certain season
error_players = ["James Bree", "Jordan Hugill", "Kaine Hayden"]
error_seasons = ["2020/2021", "2020/2021", "2021/2022"]

for player, szn in zip(error_players, error_seasons): 
    full_data = delete_row(full_data, player, szn)

In [148]:
np.unique(full_data[full_data["club"].isna()]["name"])

array([], dtype=object)

In [149]:
np.unique(full_data["club"])

array(['Arsenal', 'Arsenal U21', 'Aston Villa', 'Aston Villa U21',
       'Bournemouth', 'Brentford', 'Brighton & Hove Albion',
       'Brighton & Hove Albion U21', 'Brighton and Hove Albion',
       'Burnley', 'Burnley U21', 'Chelsea', 'Chelsea U21',
       'Crystal Palace', 'Crystal Palace U21', 'Everton', 'Everton U18',
       'Everton U21', 'Fulham', 'Fulham U21', 'Huddersfield Town',
       'Hull City', 'Leeds United', 'Leeds United U21', 'Leicester City',
       'Leicester City U21', 'Liverpool', 'Liverpool U21', 'Luton Town',
       'Manchester City', 'Manchester City U21', 'Manchester United',
       'Manchester United U21', 'Middlesbrough', 'Newcastle United',
       'Newcastle United U21', 'Norwich City', 'Nottingham Forest',
       'Nottingham Forest U21', 'Sheffield United', 'Southampton',
       'Southampton U18', 'Southampton U21', 'Stoke City', 'Sunderland',
       'Swansea City', 'Tottenham Hotspur', 'Tottenham Hotspur U21',
       'Watford', 'West Bromwich Albion', 'We

In [152]:
# remove "U21" from all club names if present
full_data['club'] = full_data['club'].str.replace(r'U21', '', regex=True).str.strip()
full_data['club'] = full_data['club'].str.replace(r'U18', '', regex=True).str.strip()

In [153]:
np.unique(full_data["club"])

array(['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford',
       'Brighton & Hove Albion', 'Brighton and Hove Albion', 'Burnley',
       'Chelsea', 'Crystal Palace', 'Everton', 'Fulham',
       'Huddersfield Town', 'Hull City', 'Leeds United', 'Leicester City',
       'Liverpool', 'Luton Town', 'Manchester City', 'Manchester United',
       'Middlesbrough', 'Newcastle United', 'Norwich City',
       'Nottingham Forest', 'Sheffield United', 'Southampton',
       'Stoke City', 'Sunderland', 'Swansea City', 'Tottenham Hotspur',
       'Watford', 'West Bromwich Albion', 'West Ham United',
       'Wolverhampton Wanderers'], dtype=object)

In [155]:
full_data.to_csv("full_data_new.csv", index=False)

In [156]:
full_data

Unnamed: 0,name,club,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,...,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,position,season
0,Aaron Cresswell,West Ham United,454,10,4,0,False,2016-08-15 19:00:00+00:00,2.0,1.0,...,0.0,0.0,0.0,55,0,14023,0,0,Defender,2016/2017
1,Aaron Lennon,Everton,142,3,17,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.3,0.0,0.9,60,0,13918,0,0,Midfielder,2016/2017
2,Abdoulaye Doucouré,Watford,482,7,13,0,False,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.0,0.0,0.0,50,0,1051,0,0,Midfielder,2016/2017
3,Adam Forshaw,Middlesbrough,286,6,14,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,1.3,0.0,0.3,45,0,2723,0,0,Midfielder,2016/2017
4,Adam Lallana,Liverpool,205,8,1,11,False,2016-08-14 15:00:00+00:00,3.0,4.0,...,33.7,57.0,14.2,70,0,155525,0,0,Midfielder,2016/2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109273,Justin Hubner,Wolverhampton Wanderers,751,149,6,0,True,2023-12-05 19:30:00+00:00,1.0,0.0,...,0.0,0.0,0.0,40,344,555,375,31,Defender,2023/2024
109274,Justin Hubner,Wolverhampton Wanderers,751,160,16,0,True,2023-12-09 15:00:00+00:00,1.0,1.0,...,0.0,0.0,0.0,40,329,1078,435,106,Defender,2023/2024
109275,Justin Hubner,Wolverhampton Wanderers,751,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,0.0,...,0.0,0.0,0.0,40,229,1583,418,189,Defender,2023/2024
109276,Justin Hubner,Wolverhampton Wanderers,751,180,7,0,True,2023-12-24 13:00:00+00:00,2.0,1.0,...,0.0,0.0,0.0,40,42,1763,197,155,Defender,2023/2024


In [158]:
# adding opponent team name
opps = pd.read_csv("opps_final.csv")
opps

Unnamed: 0,season,team,opponent_team
0,2016/2017,1,Arsenal
1,2016/2017,2,Bournemouth
2,2016/2017,3,Burnley
3,2016/2017,4,Chelsea
4,2016/2017,5,Crystal Palace
...,...,...,...
155,2023/2024,16,Nottingham Forest
156,2023/2024,17,Sheffield United
157,2023/2024,18,Tottenham Hotspur
158,2023/2024,19,West Ham United


In [159]:
seasons = np.unique(opps["season"])
mapping_dict = dict()
for season in seasons: 

    season_df = opps[opps["season"] == season]
    mapping_dict[season] = dict(zip(season_df["team"], season_df["opponent_team"]))

In [160]:
mapping_dict

{'2016/2017': {1: 'Arsenal',
  2: 'Bournemouth',
  3: 'Burnley',
  4: 'Chelsea',
  5: 'Crystal Palace',
  6: 'Everton',
  7: 'Hull City',
  8: 'Leicester City',
  9: 'Liverpool',
  10: 'Manchester City',
  11: 'Manchester United',
  12: 'Middlesbrough',
  13: 'Southampton',
  14: 'Stoke City',
  15: 'Sunderland',
  16: 'Swansea City',
  17: 'Tottenham Hotspur',
  18: 'Watford',
  19: 'West Bromwich Albion',
  20: 'West Ham United'},
 '2017/2018': {1: 'Arsenal',
  2: 'Bournemouth',
  3: 'Brighton & Hove Albion',
  4: 'Burnley',
  5: 'Chelsea',
  6: 'Crystal Palace',
  7: 'Everton',
  8: 'Huddersfield Town',
  9: 'Leicester City',
  10: 'Liverpool',
  11: 'Manchester City',
  12: 'Manchester United',
  13: 'Newcastle United',
  14: 'Southampton',
  15: 'Stoke City',
  16: 'Swansea City',
  17: 'Tottenham Hotspur',
  18: 'Watford',
  19: 'West Bromwich Albion',
  20: 'West Ham United'},
 '2018/2019': {1: 'Arsenal',
  2: 'Bournemouth',
  3: 'Brighton & Hove Albion',
  4: 'Burnley',
  5: 'C

In [161]:
for season, mapping in mapping_dict.items():
    
    full_data.loc[full_data['season'] == season, "opponent_team_name"] = full_data['opponent_team'].map(mapping)

In [162]:
full_data

Unnamed: 0,name,club,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,...,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,position,season,opponent_team_name
0,Aaron Cresswell,West Ham United,454,10,4,0,False,2016-08-15 19:00:00+00:00,2.0,1.0,...,0.0,0.0,55,0,14023,0,0,Defender,2016/2017,Chelsea
1,Aaron Lennon,Everton,142,3,17,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.0,0.9,60,0,13918,0,0,Midfielder,2016/2017,Tottenham Hotspur
2,Abdoulaye Doucouré,Watford,482,7,13,0,False,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.0,0.0,50,0,1051,0,0,Midfielder,2016/2017,Southampton
3,Adam Forshaw,Middlesbrough,286,6,14,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.0,0.3,45,0,2723,0,0,Midfielder,2016/2017,Stoke City
4,Adam Lallana,Liverpool,205,8,1,11,False,2016-08-14 15:00:00+00:00,3.0,4.0,...,57.0,14.2,70,0,155525,0,0,Midfielder,2016/2017,Arsenal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109273,Justin Hubner,Wolverhampton Wanderers,751,149,6,0,True,2023-12-05 19:30:00+00:00,1.0,0.0,...,0.0,0.0,40,344,555,375,31,Defender,2023/2024,Burnley
109274,Justin Hubner,Wolverhampton Wanderers,751,160,16,0,True,2023-12-09 15:00:00+00:00,1.0,1.0,...,0.0,0.0,40,329,1078,435,106,Defender,2023/2024,Nottingham Forest
109275,Justin Hubner,Wolverhampton Wanderers,751,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,0.0,...,0.0,0.0,40,229,1583,418,189,Defender,2023/2024,West Ham United
109276,Justin Hubner,Wolverhampton Wanderers,751,180,7,0,True,2023-12-24 13:00:00+00:00,2.0,1.0,...,0.0,0.0,40,42,1763,197,155,Defender,2023/2024,Chelsea


In [163]:
# adding result column
full_data["result"] = None
full_data

Unnamed: 0,name,club,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,...,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,position,season,opponent_team_name,result
0,Aaron Cresswell,West Ham United,454,10,4,0,False,2016-08-15 19:00:00+00:00,2.0,1.0,...,0.0,55,0,14023,0,0,Defender,2016/2017,Chelsea,
1,Aaron Lennon,Everton,142,3,17,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.9,60,0,13918,0,0,Midfielder,2016/2017,Tottenham Hotspur,
2,Abdoulaye Doucouré,Watford,482,7,13,0,False,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.0,50,0,1051,0,0,Midfielder,2016/2017,Southampton,
3,Adam Forshaw,Middlesbrough,286,6,14,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.3,45,0,2723,0,0,Midfielder,2016/2017,Stoke City,
4,Adam Lallana,Liverpool,205,8,1,11,False,2016-08-14 15:00:00+00:00,3.0,4.0,...,14.2,70,0,155525,0,0,Midfielder,2016/2017,Arsenal,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109273,Justin Hubner,Wolverhampton Wanderers,751,149,6,0,True,2023-12-05 19:30:00+00:00,1.0,0.0,...,0.0,40,344,555,375,31,Defender,2023/2024,Burnley,
109274,Justin Hubner,Wolverhampton Wanderers,751,160,16,0,True,2023-12-09 15:00:00+00:00,1.0,1.0,...,0.0,40,329,1078,435,106,Defender,2023/2024,Nottingham Forest,
109275,Justin Hubner,Wolverhampton Wanderers,751,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,0.0,...,0.0,40,229,1583,418,189,Defender,2023/2024,West Ham United,
109276,Justin Hubner,Wolverhampton Wanderers,751,180,7,0,True,2023-12-24 13:00:00+00:00,2.0,1.0,...,0.0,40,42,1763,197,155,Defender,2023/2024,Chelsea,


In [164]:
# function that assigns a result label (W, D, or L) to the result column of df
def assign_result(df): 
    df.loc[(df["was_home"]) & (df["team_h_score"] > df["team_a_score"]), "result"] = "W"
    df.loc[(df["was_home"]) & (df["team_h_score"] < df["team_a_score"]), "result"] = "L"
    df.loc[(df["team_h_score"] == df["team_a_score"]), "result"] = "D"
    df.loc[~df["was_home"] & (df["team_h_score"] < df["team_a_score"]), "result"] = "W"
    df.loc[~df["was_home"] & (df["team_h_score"] > df["team_a_score"]), "result"] = "L"
    return df

In [165]:
full_data = assign_result(full_data)
full_data

Unnamed: 0,name,club,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,...,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,position,season,opponent_team_name,result
0,Aaron Cresswell,West Ham United,454,10,4,0,False,2016-08-15 19:00:00+00:00,2.0,1.0,...,0.0,55,0,14023,0,0,Defender,2016/2017,Chelsea,L
1,Aaron Lennon,Everton,142,3,17,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.9,60,0,13918,0,0,Midfielder,2016/2017,Tottenham Hotspur,D
2,Abdoulaye Doucouré,Watford,482,7,13,0,False,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.0,50,0,1051,0,0,Midfielder,2016/2017,Southampton,D
3,Adam Forshaw,Middlesbrough,286,6,14,1,True,2016-08-13 14:00:00+00:00,1.0,1.0,...,0.3,45,0,2723,0,0,Midfielder,2016/2017,Stoke City,D
4,Adam Lallana,Liverpool,205,8,1,11,False,2016-08-14 15:00:00+00:00,3.0,4.0,...,14.2,70,0,155525,0,0,Midfielder,2016/2017,Arsenal,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109273,Justin Hubner,Wolverhampton Wanderers,751,149,6,0,True,2023-12-05 19:30:00+00:00,1.0,0.0,...,0.0,40,344,555,375,31,Defender,2023/2024,Burnley,W
109274,Justin Hubner,Wolverhampton Wanderers,751,160,16,0,True,2023-12-09 15:00:00+00:00,1.0,1.0,...,0.0,40,329,1078,435,106,Defender,2023/2024,Nottingham Forest,D
109275,Justin Hubner,Wolverhampton Wanderers,751,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,0.0,...,0.0,40,229,1583,418,189,Defender,2023/2024,West Ham United,L
109276,Justin Hubner,Wolverhampton Wanderers,751,180,7,0,True,2023-12-24 13:00:00+00:00,2.0,1.0,...,0.0,40,42,1763,197,155,Defender,2023/2024,Chelsea,W


In [166]:
full_data.to_csv("final_data_official.csv", index=False)