The link to all these APIs is: https://www.game-change.co.uk/2023/02/10/a-complete-guide-to-the-fantasy-premier-league-fpl-api/

Descriptions of each dataframe are in the notes.

In [6]:
#Importing libraries

import pandas as pd 
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt 
import requests

print("Libraries imported successfully......")

Libraries imported successfully......


### i. Add fixture data to the teams data

a) Teams DF

In [7]:
# Imported full data from last seasons

url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()


#Convert to df

elements_df = pd.DataFrame(json['elements'])
elements_types_df = pd.DataFrame(json['element_types'])
teams_df = pd.DataFrame(json['teams'])

In [8]:
def append_event_id(teams_df):
    # Create an empty DataFrame to store the expanded data
    expanded_df = pd.DataFrame()

    # Iterate through events from 1 to 38
    for event_id in range(1, 39):
        # Create a temporary DataFrame for the current event
        temp_df = teams_df.copy()
        
        # Set the 'event_id' column to the current event ID
        temp_df['event_id'] = event_id
        
        # Append the temporary DataFrame to the expanded DataFrame
        expanded_df = pd.concat([expanded_df, temp_df], ignore_index=True)
    
    return expanded_df

# Call the function to create an expanded DataFrame with 'event_id'
expanded_teams_df = append_event_id(teams_df)


b) Fixture DF

In [9]:
import requests
import pandas as pd

def fetch_fixture_data(event_id):
    # Define the base URL with the event_id
    base_url = f'https://fantasy.premierleague.com/api/fixtures/?event={event_id}'
    
    # Replace the placeholder '{}' with the actual event_id
    formatted_url = base_url.format(event_id)
    
    # Send an HTTP GET request to the API endpoint
    response = requests.get(formatted_url)
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the JSON response to get fixture data
        fixture_json = response.json()
        
        # Create a DataFrame from the fixture data
        fixture_data = pd.DataFrame(fixture_json)
        
        return fixture_data
    else:
        print(f"Request failed for event {event_id} with status code {response.status_code}")
        return None  # Return None in case of a failed request


In [10]:
# Create an empty DataFrame to store the expanded fixture data
expanded_fixture_df = pd.DataFrame()

# Iterate through events from 1 to 38
for event_id in range(1, 39):
# Get fixture data for the current event
    fixture_data = fetch_fixture_data(event_id)
        
    # Create a temporary DataFrame for the fixture data
    temp_df = fixture_data.copy()
        
    # Set the 'event_id' column to the current event ID
    temp_df['event_id'] = event_id
        
    # Append the temporary DataFrame to the expanded fixture DataFrame
    expanded_fixture_df = pd.concat([expanded_fixture_df, temp_df], ignore_index=True)
    
    





In [11]:
# Renaming columns
expanded_teams_df = expanded_teams_df.rename(columns={'name' : 'team_name', 'id':'team_id', 'short_name' : 'team_short_name'})
expanded_fixture_df = expanded_fixture_df.rename(columns={'id':'fixture_id'})

#### i.i) Get fixture id

In [12]:
def get_fixture_id(event_id,team_id):
    for idx, row in expanded_fixture_df.iterrows():
        if event_id == row['event_id']:
            if team_id == row['team_a'] or team_id == row['team_h']:
                return row['fixture_id']
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture IDs for all teams in teams_df
expanded_teams_df['fixture_id'] = expanded_teams_df.apply(lambda row: get_fixture_id(row['event_id'], row['team_id']), axis=1)


#### i.ii) Get fixture difficulty



In [13]:
def get_fixture_difficulty(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            if team_id == row['team_a']:
                return row['team_a_difficulty']

            elif team_id == row['team_h']:
                return row['team_h_difficulty']
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['fixture_difficulty'] = expanded_teams_df.apply(lambda row: get_fixture_difficulty(row['team_id'], row['fixture_id']), axis=1)


#### i.iii) Team and opponent score

In [14]:
def get_opponent_team(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            if team_id == row['team_a']:
                return row['team_h']

            elif team_id == row['team_h']:
                return row['team_a']
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['opponent_team'] = expanded_teams_df.apply(lambda row: get_opponent_team(row['team_id'], row['fixture_id']), axis=1)


In [15]:
def get_opponent_score(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            if team_id == row['team_a']:
                return row['team_h_score']

            elif team_id == row['team_h']:
                return row['team_a_score']
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['opponent_score'] = expanded_teams_df.apply(lambda row: get_opponent_score(row['team_id'], row['fixture_id']), axis=1)


In [16]:
def get_team_score(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            if team_id == row['team_a']:
                return row['team_a_score']

            elif team_id == row['team_h']:
                return row['team_h_score']
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['team_score'] = expanded_teams_df.apply(lambda row: get_team_score(row['team_id'], row['fixture_id']), axis=1)


In [17]:
def get_finished_status(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            return row['finished']



# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['Game_played'] = expanded_teams_df.apply(lambda row: get_finished_status(row['team_id'], row['fixture_id']), axis=1)

#### i.iv) If fixture is home

In [18]:
def get_home_status(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            if team_id == row['team_a']:
                return False

            elif team_id == row['team_h']:
                return True
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['is_home'] = expanded_teams_df.apply(lambda row: get_home_status(row['team_id'], row['fixture_id']), axis=1)


### ii. Feature Engineering

In [19]:
# List of columns to drop
columns_to_drop = ['strength_overall_home', 'strength_overall_away',
       'strength_attack_home', 'strength_attack_away', 'strength_defence_home',
       'strength_defence_away']

# Use the drop method to remove the specified columns
expanded_teams_df.drop(columns=columns_to_drop, inplace=True)


Gameweek Deadline variable

In [20]:
expanded_fixture_df['kickoff_time'] = pd.to_datetime(expanded_fixture_df['kickoff_time'])


In [21]:
from datetime import timedelta


# Filter the DataFrame to get events where 'started' is False
filtered_events = expanded_fixture_df[expanded_fixture_df['started'] == False]

# Sort the filtered events by 'kickoff_time' in ascending order
filtered_events = filtered_events.sort_values(by='kickoff_time')

# Get the first event in the sorted list
first_event = filtered_events.iloc[0]

# Calculate an hour before the 'kickoff_time' of the first event
Gameweek_Deadline = first_event['kickoff_time'] - timedelta(hours=1)

# You can now use Gameweek_Deadline as your constant
print("Gameweek_Deadline:", Gameweek_Deadline)

Gameweek_Deadline: 2023-10-07 10:30:00+00:00


Team strength

In [22]:
expanded_teams_df.loc[:,['team_id','opponent_score', 'team_score','fixture_difficulty','Game_played']]

Unnamed: 0,team_id,opponent_score,team_score,fixture_difficulty,Game_played
0,1,1.0,2.0,2.0,True
1,2,5.0,1.0,4.0,True
2,3,1.0,1.0,2.0,True
3,4,2.0,2.0,3.0,True
4,5,1.0,4.0,2.0,True
...,...,...,...,...,...
755,16,,,2.0,False
756,17,,,3.0,False
757,18,,,2.0,False
758,19,,,5.0,False


In [49]:
# Define fixture difficulty weights
fixture_difficulty_weights = {
    1.0: 0.5,
    2.0: 1.0,
    3.0: 1.5,
    4.0: 2.0,
    5.0: 2.5
}


# Create a function to calculate team strength
def calculate_team_strength(row):
    team_score = row['team_score']
    opponent_score = row['opponent_score']
    fixture_difficulty = row['fixture_difficulty']

    # Calculate the fixture weight based on the fixture difficulty
    fixture_weight = fixture_difficulty_weights.get(fixture_difficulty, 1.0)

    # Calculate the team strength using the improved formula
    team_strength = (team_score - opponent_score) * fixture_weight

    return team_strength

# Apply the calculate_team_strength function to the DataFrame
expanded_teams_df['team_strength'] = expanded_teams_df.apply(calculate_team_strength, axis=1)





In [52]:
expanded_teams_df[expanded_teams_df['team_id'] == 13]

Unnamed: 0,code,draw,form,team_id,loss,team_name,played,points,position,team_short_name,...,fixture_id,fixture_difficulty,opponent_team,opponent_score,team_score,Game_played,is_home,team_strength,team_strength_h,team_strength_a
12,43,0,,13,0,Man City,0,0,0,MCI,...,1.0,2.0,6.0,0.0,3.0,True,False,3.0,,3.0
32,43,0,,13,0,Man City,0,0,0,MCI,...,16.0,4.0,15.0,0.0,1.0,True,True,2.0,2.2,
52,43,0,,13,0,Man City,0,0,0,MCI,...,30.0,2.0,17.0,1.0,2.0,True,False,1.0,,1.0
72,43,0,,13,0,Man City,0,0,0,MCI,...,39.0,2.0,10.0,1.0,5.0,True,True,4.0,4.4,
92,43,0,,13,0,Man City,0,0,0,MCI,...,49.0,3.0,19.0,1.0,3.0,True,False,3.0,,3.0
112,43,0,,13,0,Man City,0,0,0,MCI,...,59.0,2.0,16.0,0.0,2.0,True,True,2.0,2.2,
132,43,0,,13,0,Man City,0,0,0,MCI,...,70.0,2.0,20.0,2.0,1.0,True,False,-1.0,,-1.0
152,43,0,,13,0,Man City,0,0,0,MCI,...,71.0,4.0,1.0,,,False,False,,,
172,43,0,,13,0,Man City,0,0,0,MCI,...,86.0,3.0,5.0,,,False,True,,,
192,43,0,,13,0,Man City,0,0,0,MCI,...,98.0,4.0,14.0,,,False,False,,,


In [26]:
expanded_teams_df.columns

Index(['code', 'draw', 'form', 'team_id', 'loss', 'team_name', 'played',
       'points', 'position', 'team_short_name', 'strength', 'team_division',
       'unavailable', 'win', 'pulse_id', 'event_id', 'fixture_id',
       'fixture_difficulty', 'opponent_team', 'opponent_score', 'team_score',
       'Game_played', 'is_home', 'team_strength', 'team_strength_h',
       'team_strength_a'],
      dtype='object')

In [27]:
expanded_teams_df.to_csv('expanded_teams_data.csv', index=False)
