In [2]:
import numpy as np
import pandas as pd
import requests
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
from selenium.webdriver.support import expected_conditions as EC

In [68]:
full_data = pd.read_csv("final_data.csv")
full_data

Unnamed: 0,name,element,opponent_team,season,kickoff_time,position,assists,club,clean_sheets,goals_scored,ict_index,minutes,red_cards,saves,selected,result,transfers_balance,was_home,opponent_team_name,total_points
0,Aaron Cresswell,454,4,2016/2017,2016-08-15 19:00:00+00:00,Defender,0,West Ham United,0,0,0.0,0,0,0,14023,L,0,False,Chelsea,0
1,Aaron Lennon,142,17,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Everton,0,0,0.9,15,0,0,13918,D,0,True,Tottenham Hotspur,1
2,Abdoulaye Doucouré,482,13,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Watford,0,0,0.0,0,0,0,1051,D,0,False,Southampton,0
3,Adam Forshaw,286,14,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Middlesbrough,0,0,0.3,69,0,0,2723,D,0,True,Stoke City,1
4,Adam Lallana,205,1,2016/2017,2016-08-14 15:00:00+00:00,Midfielder,1,Liverpool,0,1,14.2,75,0,0,155525,W,0,False,Arsenal,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107084,Jean-Ricner Bellegarde,715,18,2023/2024,2023-11-11 12:30:00+00:00,Midfielder,0,Wolverhampton Wanderers,0,0,2.7,72,0,0,1231,W,389,True,Tottenham Hotspur,2
107085,Jean-Ricner Bellegarde,715,10,2023/2024,2023-11-27 20:00:00+00:00,Midfielder,1,Wolverhampton Wanderers,0,0,6.2,61,0,0,1516,L,221,False,Fulham,5
107086,Jean-Ricner Bellegarde,715,1,2023/2024,2023-12-02 15:00:00+00:00,Midfielder,0,Wolverhampton Wanderers,0,0,0.5,62,0,0,2527,L,952,False,Arsenal,2
107087,Jean-Ricner Bellegarde,715,6,2023/2024,2023-12-05 19:30:00+00:00,Midfielder,0,Wolverhampton Wanderers,0,0,0.4,11,0,0,3506,W,934,True,Burnley,1


In [4]:
latest = pd.to_datetime(max(full_data["kickoff_time"]))
latest

Timestamp('2023-12-10 16:30:00+0000', tz='UTC')

In [5]:
# reading in the list of player IDs to convert IDs to names
player_IDs = pd.read_csv("player_IDs.csv")
IDs = player_IDs["id"]

In [6]:
player_api_link = "https://fantasy.premierleague.com/api/element-summary/"

In [7]:
# sample
player_row = player_IDs[player_IDs["id"] == 1]
player_name = player_row["first_name"] + " " + player_row["second_name"]
response = requests.get(f"{player_api_link}{1}/")
api_data = response.json()
fixture_stats = api_data.get("history", [])
fixture_stats = [stat for stat in fixture_stats if pd.to_datetime(stat["kickoff_time"]) > latest]
colnames = list(fixture_stats[1].keys())
fixture_stats

[{'element': 1,
  'fixture': 161,
  'opponent_team': 5,
  'total_points': 0,
  'was_home': True,
  'kickoff_time': '2023-12-17T14:00:00Z',
  'team_h_score': 2,
  'team_a_score': 0,
  'round': 17,
  'minutes': 0,
  'goals_scored': 0,
  'assists': 0,
  'clean_sheets': 0,
  'goals_conceded': 0,
  'own_goals': 0,
  'penalties_saved': 0,
  'penalties_missed': 0,
  'yellow_cards': 0,
  'red_cards': 0,
  'saves': 0,
  'bonus': 0,
  'bps': 0,
  'influence': '0.0',
  'creativity': '0.0',
  'threat': '0.0',
  'ict_index': '0.0',
  'starts': 0,
  'expected_goals': '0.00',
  'expected_assists': '0.00',
  'expected_goal_involvements': '0.00',
  'expected_goals_conceded': '0.00',
  'value': 44,
  'transfers_balance': -382,
  'selected': 21444,
  'transfers_in': 0,
  'transfers_out': 382},
 {'element': 1,
  'fixture': 174,
  'opponent_team': 11,
  'total_points': 0,
  'was_home': False,
  'kickoff_time': '2023-12-23T17:30:00Z',
  'team_h_score': None,
  'team_a_score': None,
  'round': 18,
  'minutes

In [9]:
to_add_df = pd.DataFrame(columns=["name"] + colnames)

for player_id in IDs: 
    player_row = player_IDs[player_IDs["id"] == player_id]
    player_name = f"{player_row['first_name'].iloc[0]} {player_row['second_name'].iloc[0]}"
    
    try:
        response = requests.get(f"{player_api_link}{player_id}/")
        response.raise_for_status()  # Raise an error for bad responses (e.g., 404)
        api_data = response.json()
        fixture_stats = api_data.get("history", [])
        fixture_stats = [stat for stat in fixture_stats if pd.to_datetime(stat["kickoff_time"]) > latest]
        add_df = pd.DataFrame(fixture_stats)
        nrows = add_df.shape[0]
        names = [player_name] * nrows
        add_df.insert(0, column="name", value=names)
        to_add_df = pd.concat([to_add_df, add_df], ignore_index=True)
        
    except requests.exceptions.RequestException as e:
        # Handle the exception here (e.g., print an error message)
        print(f"Error fetching data for player ID {player_id}: {e}")
    
    # Add a delay of 1 second between requests
    time.sleep(1)

to_add_df

Error fetching data for player ID 399: HTTPSConnectionPool(host='fantasy.premierleague.com', port=443): Max retries exceeded with url: /api/element-summary/399/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f7a4d21b850>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))
Error fetching data for player ID 400: HTTPSConnectionPool(host='fantasy.premierleague.com', port=443): Max retries exceeded with url: /api/element-summary/400/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f7a4d2327d0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))
Error fetching data for player ID 505: HTTPSConnectionPool(host='fantasy.premierleague.com', port=443): Max retries exceeded with url: /api/element-summary/505/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f7a4d23afd0>: Failed to establish a new connection: [Errno -3] Temporar

Unnamed: 0,name,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,...,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out
0,Folarin Balogun,1,161,5,0,True,2023-12-17T14:00:00Z,2.0,0.0,17,...,0,0.00,0.00,0.00,0.00,44,-382,21444,0,382
1,Folarin Balogun,1,174,11,0,False,2023-12-23T17:30:00Z,,,18,...,0,0.00,0.00,0.00,0.00,44,-268,21188,0,268
2,Cédric Alves Soares,2,161,5,0,True,2023-12-17T14:00:00Z,2.0,0.0,17,...,0,0.00,0.00,0.00,0.00,39,-579,44654,1150,1729
3,Cédric Alves Soares,2,174,11,0,False,2023-12-23T17:30:00Z,,,18,...,0,0.00,0.00,0.00,0.00,39,-631,44095,632,1263
4,Mohamed Elneny,3,161,5,0,True,2023-12-17T14:00:00Z,2.0,0.0,17,...,0,0.00,0.00,0.00,0.00,44,-569,9176,124,693
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1358,Matthew Whittingham,749,180,7,0,True,2023-12-24T13:00:00Z,,,18,...,0,0.00,0.00,0.00,0.00,45,0,167,18,18
1359,Tawanda Chirewa,750,170,19,0,False,2023-12-17T14:00:00Z,3.0,0.0,17,...,0,0.00,0.00,0.00,0.00,45,34,317,90,56
1360,Tawanda Chirewa,750,180,7,0,True,2023-12-24T13:00:00Z,,,18,...,0,0.00,0.00,0.00,0.00,45,-2,337,41,43
1361,Justin Hubner,751,170,19,0,False,2023-12-17T14:00:00Z,3.0,0.0,17,...,0,0.00,0.00,0.00,0.00,40,229,1583,418,189


In [10]:
to_add_df

Unnamed: 0,name,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,...,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out
0,Folarin Balogun,1,161,5,0,True,2023-12-17T14:00:00Z,2.0,0.0,17,...,0,0.00,0.00,0.00,0.00,44,-382,21444,0,382
1,Folarin Balogun,1,174,11,0,False,2023-12-23T17:30:00Z,,,18,...,0,0.00,0.00,0.00,0.00,44,-268,21188,0,268
2,Cédric Alves Soares,2,161,5,0,True,2023-12-17T14:00:00Z,2.0,0.0,17,...,0,0.00,0.00,0.00,0.00,39,-579,44654,1150,1729
3,Cédric Alves Soares,2,174,11,0,False,2023-12-23T17:30:00Z,,,18,...,0,0.00,0.00,0.00,0.00,39,-631,44095,632,1263
4,Mohamed Elneny,3,161,5,0,True,2023-12-17T14:00:00Z,2.0,0.0,17,...,0,0.00,0.00,0.00,0.00,44,-569,9176,124,693
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1358,Matthew Whittingham,749,180,7,0,True,2023-12-24T13:00:00Z,,,18,...,0,0.00,0.00,0.00,0.00,45,0,167,18,18
1359,Tawanda Chirewa,750,170,19,0,False,2023-12-17T14:00:00Z,3.0,0.0,17,...,0,0.00,0.00,0.00,0.00,45,34,317,90,56
1360,Tawanda Chirewa,750,180,7,0,True,2023-12-24T13:00:00Z,,,18,...,0,0.00,0.00,0.00,0.00,45,-2,337,41,43
1361,Justin Hubner,751,170,19,0,False,2023-12-17T14:00:00Z,3.0,0.0,17,...,0,0.00,0.00,0.00,0.00,40,229,1583,418,189


In [11]:
to_add_df.to_csv("to_add_df.csv", index=False)

In [45]:
to_add_df = pd.read_csv("to_add_df.csv")

In [46]:
shared_cols = list(to_add_df.columns.intersection(full_data.columns))
shared_cols

['name',
 'element',
 'fixture',
 'opponent_team',
 'total_points',
 'was_home',
 'kickoff_time',
 'team_h_score',
 'team_a_score',
 'round',
 'minutes',
 'goals_scored',
 'assists',
 'clean_sheets',
 'goals_conceded',
 'own_goals',
 'penalties_saved',
 'penalties_missed',
 'yellow_cards',
 'red_cards',
 'saves',
 'bonus',
 'bps',
 'influence',
 'creativity',
 'threat',
 'ict_index',
 'value',
 'transfers_balance',
 'selected',
 'transfers_in',
 'transfers_out']

In [47]:
to_add_df = to_add_df.drop(columns=to_add_df.columns.difference(shared_cols))
to_add_df

Unnamed: 0,name,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,...,bps,influence,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out
0,Folarin Balogun,1,161,5,0,True,2023-12-17T14:00:00Z,2.0,0.0,17,...,0,0.0,0.0,0.0,0.0,44,-382,21444,0,382
1,Folarin Balogun,1,174,11,0,False,2023-12-23T17:30:00Z,,,18,...,0,0.0,0.0,0.0,0.0,44,-268,21188,0,268
2,Cédric Alves Soares,2,161,5,0,True,2023-12-17T14:00:00Z,2.0,0.0,17,...,0,0.0,0.0,0.0,0.0,39,-579,44654,1150,1729
3,Cédric Alves Soares,2,174,11,0,False,2023-12-23T17:30:00Z,,,18,...,0,0.0,0.0,0.0,0.0,39,-631,44095,632,1263
4,Mohamed Elneny,3,161,5,0,True,2023-12-17T14:00:00Z,2.0,0.0,17,...,0,0.0,0.0,0.0,0.0,44,-569,9176,124,693
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1358,Matthew Whittingham,749,180,7,0,True,2023-12-24T13:00:00Z,,,18,...,0,0.0,0.0,0.0,0.0,45,0,167,18,18
1359,Tawanda Chirewa,750,170,19,0,False,2023-12-17T14:00:00Z,3.0,0.0,17,...,0,0.0,0.0,0.0,0.0,45,34,317,90,56
1360,Tawanda Chirewa,750,180,7,0,True,2023-12-24T13:00:00Z,,,18,...,0,0.0,0.0,0.0,0.0,45,-2,337,41,43
1361,Justin Hubner,751,170,19,0,False,2023-12-17T14:00:00Z,3.0,0.0,17,...,0,0.0,0.0,0.0,0.0,40,229,1583,418,189


In [48]:
# converting columns to the correct data types
are_numeric = list(full_data.select_dtypes(include=['number']).columns)
to_add_df[are_numeric] = to_add_df[are_numeric].apply(pd.to_numeric)
to_add_df["was_home"] = to_add_df["was_home"].astype(bool)
to_add_df.dtypes

name                  object
element                int64
fixture                int64
opponent_team          int64
total_points           int64
was_home                bool
kickoff_time          object
team_h_score         float64
team_a_score         float64
round                  int64
minutes                int64
goals_scored           int64
assists                int64
clean_sheets           int64
goals_conceded         int64
own_goals              int64
penalties_saved        int64
penalties_missed       int64
yellow_cards           int64
red_cards              int64
saves                  int64
bonus                  int64
bps                    int64
influence            float64
creativity           float64
threat               float64
ict_index            float64
value                  int64
transfers_balance      int64
selected               int64
transfers_in           int64
transfers_out          int64
dtype: object

In [49]:
# converting KO time to a datetime
to_add_df["kickoff_time"] = pd.to_datetime(to_add_df["kickoff_time"])

In [50]:
# function that assigns the season to each row
def assign_season(ko_time):

    dates = ["2017-06-01", "2018-06-01", "2019-06-01", "2020-06-01", "2021-06-01", "2022-06-01", "2023-06-01", "2024-06-01"]

    for date in dates: 

        if ko_time < pd.to_datetime(date).tz_localize("UTC"): 

            year1 = int(date.split("-")[0])
            year0 = year1 - 1

            season = f"{year0}/{year1}"
            
            return season

In [51]:
to_add_df['season'] = to_add_df["kickoff_time"].apply(assign_season)

In [52]:
to_add_df.insert(1, "club", None)
to_add_df.insert(2, "position", None)
to_add_df

Unnamed: 0,name,club,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,...,influence,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,season
0,Folarin Balogun,,,1,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,0.0,0.0,44,-382,21444,0,382,2023/2024
1,Folarin Balogun,,,1,174,11,0,False,2023-12-23 17:30:00+00:00,,...,0.0,0.0,0.0,0.0,44,-268,21188,0,268,2023/2024
2,Cédric Alves Soares,,,2,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,0.0,0.0,39,-579,44654,1150,1729,2023/2024
3,Cédric Alves Soares,,,2,174,11,0,False,2023-12-23 17:30:00+00:00,,...,0.0,0.0,0.0,0.0,39,-631,44095,632,1263,2023/2024
4,Mohamed Elneny,,,3,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,0.0,0.0,44,-569,9176,124,693,2023/2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1358,Matthew Whittingham,,,749,180,7,0,True,2023-12-24 13:00:00+00:00,,...,0.0,0.0,0.0,0.0,45,0,167,18,18,2023/2024
1359,Tawanda Chirewa,,,750,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,0.0,0.0,45,34,317,90,56,2023/2024
1360,Tawanda Chirewa,,,750,180,7,0,True,2023-12-24 13:00:00+00:00,,...,0.0,0.0,0.0,0.0,45,-2,337,41,43,2023/2024
1361,Justin Hubner,,,751,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,0.0,0.0,40,229,1583,418,189,2023/2024


In [53]:
# all unique players
unique_players = np.unique(to_add_df["name"])
unique_elements = np.unique(to_add_df["element"])
current_szn = "2023/2024"
new_elements = []

for ele in unique_elements:

    # use whatever club and position is already in full_data to fill to_add_df
    try:
        ele_df = full_data[(full_data["element"] == ele) & (full_data["season"] == current_szn)]
        position = np.unique(ele_df["position"])[0]
        club = np.unique(ele_df["club"])[0]
        
        to_add_df.loc[(to_add_df['element'] == ele) & (to_add_df['season'] == current_szn), 'position'] = position
        to_add_df.loc[(to_add_df['element'] == ele) & (to_add_df['season'] == current_szn), 'club'] = club

    except IndexError: 

        new_elements.append(ele)
        print(f"Player with ID {ele} not found. Moving on to next player.")
        continue

Player with ID 29 not found. Moving on to next player.
Player with ID 84 not found. Moving on to next player.
Player with ID 141 not found. Moving on to next player.
Player with ID 166 not found. Moving on to next player.
Player with ID 169 not found. Moving on to next player.
Player with ID 294 not found. Moving on to next player.
Player with ID 336 not found. Moving on to next player.
Player with ID 431 not found. Moving on to next player.
Player with ID 438 not found. Moving on to next player.
Player with ID 444 not found. Moving on to next player.
Player with ID 529 not found. Moving on to next player.
Player with ID 532 not found. Moving on to next player.
Player with ID 623 not found. Moving on to next player.
Player with ID 646 not found. Moving on to next player.
Player with ID 675 not found. Moving on to next player.
Player with ID 717 not found. Moving on to next player.
Player with ID 719 not found. Moving on to next player.
Player with ID 736 not found. Moving on to next pl

In [54]:
to_add_df

Unnamed: 0,name,club,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,...,influence,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,season
0,Folarin Balogun,Arsenal,Forward,1,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,0.0,0.0,44,-382,21444,0,382,2023/2024
1,Folarin Balogun,Arsenal,Forward,1,174,11,0,False,2023-12-23 17:30:00+00:00,,...,0.0,0.0,0.0,0.0,44,-268,21188,0,268,2023/2024
2,Cédric Alves Soares,Fulham,Defender,2,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,0.0,0.0,39,-579,44654,1150,1729,2023/2024
3,Cédric Alves Soares,Fulham,Defender,2,174,11,0,False,2023-12-23 17:30:00+00:00,,...,0.0,0.0,0.0,0.0,39,-631,44095,632,1263,2023/2024
4,Mohamed Elneny,Arsenal,Midfielder,3,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,0.0,0.0,44,-569,9176,124,693,2023/2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1358,Matthew Whittingham,,,749,180,7,0,True,2023-12-24 13:00:00+00:00,,...,0.0,0.0,0.0,0.0,45,0,167,18,18,2023/2024
1359,Tawanda Chirewa,,,750,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,0.0,0.0,45,34,317,90,56,2023/2024
1360,Tawanda Chirewa,,,750,180,7,0,True,2023-12-24 13:00:00+00:00,,...,0.0,0.0,0.0,0.0,45,-2,337,41,43,2023/2024
1361,Justin Hubner,,,751,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,0.0,0.0,40,229,1583,418,189,2023/2024


In [55]:
new_players = []
for id in new_elements: 
    
    player_name = to_add_df.loc[to_add_df['element'] == id, 'name'].iloc[0]
    new_players.append(player_name)

new_players

['Benjamin White',
 'Adam Smith',
 'Tom McGill',
 'CJ Egan-Riley',
 'Denis Franchi',
 'Diogo Teixeira da Silva',
 'Joe Taylor',
 'Kell Watts',
 'Giulian Biancone',
 'Mohamed Dräger',
 'Conor Coventry',
 'Emerson Palmieri dos Santos',
 'Mason Burstow',
 'Karl Hein',
 'Aaron Ramsey',
 'Ronnie Stutter',
 'Mackenzie Hunt',
 'Reuell Walters',
 'Bradley Ibrahim',
 'Alfie Dorrington',
 'Mark O’Mahony',
 'Matthew Whittingham',
 'Tawanda Chirewa',
 'Justin Hubner',
 'Leigh Kavanagh']

In [56]:
# some players don't have club and position filled, drop for now
to_add_df = to_add_df.dropna()
to_add_df

Unnamed: 0,name,club,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,...,influence,creativity,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,season
0,Folarin Balogun,Arsenal,Forward,1,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,0.0,0.0,44,-382,21444,0,382,2023/2024
2,Cédric Alves Soares,Fulham,Defender,2,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,0.0,0.0,39,-579,44654,1150,1729,2023/2024
4,Mohamed Elneny,Arsenal,Midfielder,3,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,0.0,0.0,44,-569,9176,124,693,2023/2024
6,Fábio Ferreira Vieira,Arsenal,Midfielder,4,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,0.0,0.0,54,-274,9259,16,290,2023/2024
8,Gabriel dos Santos Magalhães,Arsenal,Defender,5,161,5,6,True,2023-12-17 14:00:00+00:00,2.0,...,10.0,2.1,19.0,3.1,49,-21353,1588625,19918,41271,2023/2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1347,Matt Doherty,Wolverhampton Wanderers,Defender,598,170,19,1,False,2023-12-17 14:00:00+00:00,3.0,...,0.6,0.7,2.0,0.3,43,-336,18766,306,642,2023/2024
1349,Santiago Bueno,Wolverhampton Wanderers,Defender,697,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,0.0,0.0,45,-49,3062,126,175,2023/2024
1351,Enso González,Wolverhampton Wanderers,Forward,698,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,0.0,0.0,50,-50,1360,72,122,2023/2024
1353,Nathan Fraser,Wolverhampton Wanderers,Forward,704,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,0.0,0.0,45,161,7098,627,466,2023/2024


In [57]:
# CSV file that corresponds each club to an ID
opps = pd.read_csv("opps_final.csv")

In [58]:
opps

Unnamed: 0,season,team,opponent_team
0,2016/2017,1,Arsenal
1,2016/2017,2,Bournemouth
2,2016/2017,3,Burnley
3,2016/2017,4,Chelsea
4,2016/2017,5,Crystal Palace
...,...,...,...
155,2023/2024,16,Nottingham Forest
156,2023/2024,17,Sheffield United
157,2023/2024,18,Tottenham Hotspur
158,2023/2024,19,West Ham United


In [59]:
# creating mapping from ID to team name
season_df = opps[opps["season"] == current_szn]
mapping = dict(zip(season_df["team"], season_df["opponent_team"]))

In [60]:
to_add_df["opponent_team_name"] = to_add_df["opponent_team"].map(mapping)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  to_add_df["opponent_team_name"] = to_add_df["opponent_team"].map(mapping)


In [61]:
to_add_df["result"] = None
to_add_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  to_add_df["result"] = None


Unnamed: 0,name,club,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,...,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,season,opponent_team_name,result
0,Folarin Balogun,Arsenal,Forward,1,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,44,-382,21444,0,382,2023/2024,Brighton & Hove Albion,
2,Cédric Alves Soares,Fulham,Defender,2,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,39,-579,44654,1150,1729,2023/2024,Brighton & Hove Albion,
4,Mohamed Elneny,Arsenal,Midfielder,3,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,44,-569,9176,124,693,2023/2024,Brighton & Hove Albion,
6,Fábio Ferreira Vieira,Arsenal,Midfielder,4,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,54,-274,9259,16,290,2023/2024,Brighton & Hove Albion,
8,Gabriel dos Santos Magalhães,Arsenal,Defender,5,161,5,6,True,2023-12-17 14:00:00+00:00,2.0,...,19.0,3.1,49,-21353,1588625,19918,41271,2023/2024,Brighton & Hove Albion,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1347,Matt Doherty,Wolverhampton Wanderers,Defender,598,170,19,1,False,2023-12-17 14:00:00+00:00,3.0,...,2.0,0.3,43,-336,18766,306,642,2023/2024,West Ham United,
1349,Santiago Bueno,Wolverhampton Wanderers,Defender,697,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,45,-49,3062,126,175,2023/2024,West Ham United,
1351,Enso González,Wolverhampton Wanderers,Forward,698,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,50,-50,1360,72,122,2023/2024,West Ham United,
1353,Nathan Fraser,Wolverhampton Wanderers,Forward,704,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,45,161,7098,627,466,2023/2024,West Ham United,


In [62]:
# function that propagates the result col of dataframe df with W, D, or L
def fill_result(df): 
    df.loc[(df["was_home"]) & (df["team_h_score"] > df["team_a_score"]), "result"] = "W"
    df.loc[(df["was_home"]) & (df["team_h_score"] < df["team_a_score"]), "result"] = "L"
    df.loc[(df["team_h_score"] == df["team_a_score"]), "result"] = "D"
    df.loc[~df["was_home"] & (df["team_h_score"] < df["team_a_score"]), "result"] = "W"
    df.loc[~df["was_home"] & (df["team_h_score"] > df["team_a_score"]), "result"] = "L"

In [63]:
fill_result(to_add_df)
to_add_df

Unnamed: 0,name,club,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,...,threat,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,season,opponent_team_name,result
0,Folarin Balogun,Arsenal,Forward,1,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,44,-382,21444,0,382,2023/2024,Brighton & Hove Albion,W
2,Cédric Alves Soares,Fulham,Defender,2,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,39,-579,44654,1150,1729,2023/2024,Brighton & Hove Albion,W
4,Mohamed Elneny,Arsenal,Midfielder,3,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,44,-569,9176,124,693,2023/2024,Brighton & Hove Albion,W
6,Fábio Ferreira Vieira,Arsenal,Midfielder,4,161,5,0,True,2023-12-17 14:00:00+00:00,2.0,...,0.0,0.0,54,-274,9259,16,290,2023/2024,Brighton & Hove Albion,W
8,Gabriel dos Santos Magalhães,Arsenal,Defender,5,161,5,6,True,2023-12-17 14:00:00+00:00,2.0,...,19.0,3.1,49,-21353,1588625,19918,41271,2023/2024,Brighton & Hove Albion,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1347,Matt Doherty,Wolverhampton Wanderers,Defender,598,170,19,1,False,2023-12-17 14:00:00+00:00,3.0,...,2.0,0.3,43,-336,18766,306,642,2023/2024,West Ham United,L
1349,Santiago Bueno,Wolverhampton Wanderers,Defender,697,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,45,-49,3062,126,175,2023/2024,West Ham United,L
1351,Enso González,Wolverhampton Wanderers,Forward,698,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,50,-50,1360,72,122,2023/2024,West Ham United,L
1353,Nathan Fraser,Wolverhampton Wanderers,Forward,704,170,19,0,False,2023-12-17 14:00:00+00:00,3.0,...,0.0,0.0,45,161,7098,627,466,2023/2024,West Ham United,L


In [33]:
# features / params I am unsure of - keeping just in case
to_keep = ["name", "element", "opponent_team", "season", "kickoff_time"]
# target variable
target = ["total_points"]
# specifically important features: position, club, ict_index, minutes, selected, result, transfers_balance, was_home, opponent_team_name
features = ["position", "assists", "club", "clean_sheets", "goals_scored", "ict_index", "minutes", "red_cards", "saves", 
            "selected", "result", "transfers_balance", "was_home", "opponent_team_name"]

In [64]:
to_add_df = to_add_df[to_keep + target + features]
to_add_df

Unnamed: 0,name,element,opponent_team,season,kickoff_time,total_points,position,assists,club,clean_sheets,goals_scored,ict_index,minutes,red_cards,saves,selected,result,transfers_balance,was_home,opponent_team_name
0,Folarin Balogun,1,5,2023/2024,2023-12-17 14:00:00+00:00,0,Forward,0,Arsenal,0,0,0.0,0,0,0,21444,W,-382,True,Brighton & Hove Albion
2,Cédric Alves Soares,2,5,2023/2024,2023-12-17 14:00:00+00:00,0,Defender,0,Fulham,0,0,0.0,0,0,0,44654,W,-579,True,Brighton & Hove Albion
4,Mohamed Elneny,3,5,2023/2024,2023-12-17 14:00:00+00:00,0,Midfielder,0,Arsenal,0,0,0.0,0,0,0,9176,W,-569,True,Brighton & Hove Albion
6,Fábio Ferreira Vieira,4,5,2023/2024,2023-12-17 14:00:00+00:00,0,Midfielder,0,Arsenal,0,0,0.0,0,0,0,9259,W,-274,True,Brighton & Hove Albion
8,Gabriel dos Santos Magalhães,5,5,2023/2024,2023-12-17 14:00:00+00:00,6,Defender,0,Arsenal,1,0,3.1,90,0,0,1588625,W,-21353,True,Brighton & Hove Albion
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1347,Matt Doherty,598,19,2023/2024,2023-12-17 14:00:00+00:00,1,Defender,0,Wolverhampton Wanderers,0,0,0.3,10,0,0,18766,L,-336,False,West Ham United
1349,Santiago Bueno,697,19,2023/2024,2023-12-17 14:00:00+00:00,0,Defender,0,Wolverhampton Wanderers,0,0,0.0,0,0,0,3062,L,-49,False,West Ham United
1351,Enso González,698,19,2023/2024,2023-12-17 14:00:00+00:00,0,Forward,0,Wolverhampton Wanderers,0,0,0.0,0,0,0,1360,L,-50,False,West Ham United
1353,Nathan Fraser,704,19,2023/2024,2023-12-17 14:00:00+00:00,0,Forward,0,Wolverhampton Wanderers,0,0,0.0,0,0,0,7098,L,161,False,West Ham United


In [71]:
full_data

Unnamed: 0,name,element,opponent_team,season,kickoff_time,position,assists,club,clean_sheets,goals_scored,ict_index,minutes,red_cards,saves,selected,result,transfers_balance,was_home,opponent_team_name,total_points
0,Aaron Cresswell,454,4,2016/2017,2016-08-15 19:00:00+00:00,Defender,0,West Ham United,0,0,0.0,0,0,0,14023,L,0,False,Chelsea,0
1,Aaron Lennon,142,17,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Everton,0,0,0.9,15,0,0,13918,D,0,True,Tottenham Hotspur,1
2,Abdoulaye Doucouré,482,13,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Watford,0,0,0.0,0,0,0,1051,D,0,False,Southampton,0
3,Adam Forshaw,286,14,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Middlesbrough,0,0,0.3,69,0,0,2723,D,0,True,Stoke City,1
4,Adam Lallana,205,1,2016/2017,2016-08-14 15:00:00+00:00,Midfielder,1,Liverpool,0,1,14.2,75,0,0,155525,W,0,False,Arsenal,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107084,Jean-Ricner Bellegarde,715,18,2023/2024,2023-11-11 12:30:00+00:00,Midfielder,0,Wolverhampton Wanderers,0,0,2.7,72,0,0,1231,W,389,True,Tottenham Hotspur,2
107085,Jean-Ricner Bellegarde,715,10,2023/2024,2023-11-27 20:00:00+00:00,Midfielder,1,Wolverhampton Wanderers,0,0,6.2,61,0,0,1516,L,221,False,Fulham,5
107086,Jean-Ricner Bellegarde,715,1,2023/2024,2023-12-02 15:00:00+00:00,Midfielder,0,Wolverhampton Wanderers,0,0,0.5,62,0,0,2527,L,952,False,Arsenal,2
107087,Jean-Ricner Bellegarde,715,6,2023/2024,2023-12-05 19:30:00+00:00,Midfielder,0,Wolverhampton Wanderers,0,0,0.4,11,0,0,3506,W,934,True,Burnley,1


In [73]:
full_data = pd.concat([full_data, to_add_df])
full_data

Unnamed: 0,name,element,opponent_team,season,kickoff_time,position,assists,club,clean_sheets,goals_scored,ict_index,minutes,red_cards,saves,selected,result,transfers_balance,was_home,opponent_team_name,total_points
0,Aaron Cresswell,454,4,2016/2017,2016-08-15 19:00:00+00:00,Defender,0,West Ham United,0,0,0.0,0,0,0,14023,L,0,False,Chelsea,0
1,Aaron Lennon,142,17,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Everton,0,0,0.9,15,0,0,13918,D,0,True,Tottenham Hotspur,1
2,Abdoulaye Doucouré,482,13,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Watford,0,0,0.0,0,0,0,1051,D,0,False,Southampton,0
3,Adam Forshaw,286,14,2016/2017,2016-08-13 14:00:00+00:00,Midfielder,0,Middlesbrough,0,0,0.3,69,0,0,2723,D,0,True,Stoke City,1
4,Adam Lallana,205,1,2016/2017,2016-08-14 15:00:00+00:00,Midfielder,1,Liverpool,0,1,14.2,75,0,0,155525,W,0,False,Arsenal,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1347,Matt Doherty,598,19,2023/2024,2023-12-17 14:00:00+00:00,Defender,0,Wolverhampton Wanderers,0,0,0.3,10,0,0,18766,L,-336,False,West Ham United,1
1349,Santiago Bueno,697,19,2023/2024,2023-12-17 14:00:00+00:00,Defender,0,Wolverhampton Wanderers,0,0,0.0,0,0,0,3062,L,-49,False,West Ham United,0
1351,Enso González,698,19,2023/2024,2023-12-17 14:00:00+00:00,Forward,0,Wolverhampton Wanderers,0,0,0.0,0,0,0,1360,L,-50,False,West Ham United,0
1353,Nathan Fraser,704,19,2023/2024,2023-12-17 14:00:00+00:00,Forward,0,Wolverhampton Wanderers,0,0,0.0,0,0,0,7098,L,161,False,West Ham United,0


In [74]:
full_data.to_csv("most_updated_data.csv", index=False)