<a href="https://colab.research.google.com/github/donald-okara/FPL_point_predictor/blob/main/fpl_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Teams Filtering

The link to all these APIs is: https://www.game-change.co.uk/2023/02/10/a-complete-guide-to-the-fantasy-premier-league-fpl-api/

Descriptions of each dataframe are in the notes.

In [1]:
#Importing libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import requests

print("Libraries imported successfully......")

Libraries imported successfully......


### i. Add fixture data to the teams data

a) Teams DF

In [2]:
# Imported full data from last seasons

url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()


#Convert to df

elements_df = pd.DataFrame(json['elements'])
elements_types_df = pd.DataFrame(json['element_types'])
teams_df = pd.DataFrame(json['teams'])

In [3]:
def append_event_id(teams_df):
    # Create an empty DataFrame to store the expanded data
    expanded_df = pd.DataFrame()

    # Iterate through events from 1 to 38
    for event_id in range(1, 39):
        # Create a temporary DataFrame for the current event
        temp_df = teams_df.copy()

        # Set the 'event_id' column to the current event ID
        temp_df['event_id'] = event_id

        # Append the temporary DataFrame to the expanded DataFrame
        expanded_df = pd.concat([expanded_df, temp_df], ignore_index=True)

    return expanded_df

# Call the function to create an expanded DataFrame with 'event_id'
expanded_teams_df = append_event_id(teams_df)


b) Fixture DF

In [4]:
import requests
import pandas as pd

def fetch_fixture_data(event_id):
    # Define the base URL with the event_id
    base_url = f'https://fantasy.premierleague.com/api/fixtures/?event={event_id}'

    # Replace the placeholder '{}' with the actual event_id
    formatted_url = base_url.format(event_id)

    # Send an HTTP GET request to the API endpoint
    response = requests.get(formatted_url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the JSON response to get fixture data
        fixture_json = response.json()

        # Create a DataFrame from the fixture data
        fixture_data = pd.DataFrame(fixture_json)

        return fixture_data
    else:
        print(f"Request failed for event {event_id} with status code {response.status_code}")
        return None  # Return None in case of a failed request


In [5]:
# Create an empty DataFrame to store the expanded fixture data
expanded_fixture_df = pd.DataFrame()

# Iterate through events from 1 to 38
for event_id in range(1, 39):
# Get fixture data for the current event
    fixture_data = fetch_fixture_data(event_id)

    # Create a temporary DataFrame for the fixture data
    temp_df = fixture_data.copy()

    # Set the 'event_id' column to the current event ID
    temp_df['event_id'] = event_id

    # Append the temporary DataFrame to the expanded fixture DataFrame
    expanded_fixture_df = pd.concat([expanded_fixture_df, temp_df], ignore_index=True)



In [6]:
# Renaming columns
expanded_teams_df = expanded_teams_df.rename(columns={'name' : 'team_name', 'id':'team_id', 'short_name' : 'team_short_name'})
expanded_fixture_df = expanded_fixture_df.rename(columns={'id':'fixture_id'})

#### i.i) Get fixture id

In [7]:
def get_fixture_id(event_id,team_id):
    for idx, row in expanded_fixture_df.iterrows():
        if event_id == row['event_id']:
            if team_id == row['team_a'] or team_id == row['team_h']:
                return row['fixture_id']
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture IDs for all teams in teams_df
expanded_teams_df['fixture_id'] = expanded_teams_df.apply(lambda row: get_fixture_id(row['event_id'], row['team_id']), axis=1)


#### i.ii) Get fixture difficulty



In [8]:
def get_fixture_difficulty(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            if team_id == row['team_a']:
                return row['team_a_difficulty']

            elif team_id == row['team_h']:
                return row['team_h_difficulty']
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['fixture_difficulty'] = expanded_teams_df.apply(lambda row: get_fixture_difficulty(row['team_id'], row['fixture_id']), axis=1)


#### i.iii) Team and opponent score

In [9]:
def get_opponent_team(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            if team_id == row['team_a']:
                return row['team_h']

            elif team_id == row['team_h']:
                return row['team_a']
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['opponent_team'] = expanded_teams_df.apply(lambda row: get_opponent_team(row['team_id'], row['fixture_id']), axis=1)


In [10]:
def get_opponent_score(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            if team_id == row['team_a']:
                return row['team_h_score']

            elif team_id == row['team_h']:
                return row['team_a_score']
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['opponent_score'] = expanded_teams_df.apply(lambda row: get_opponent_score(row['team_id'], row['fixture_id']), axis=1)


In [11]:
def get_team_score(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            if team_id == row['team_a']:
                return row['team_a_score']

            elif team_id == row['team_h']:
                return row['team_h_score']
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['team_score'] = expanded_teams_df.apply(lambda row: get_team_score(row['team_id'], row['fixture_id']), axis=1)


In [12]:
def get_finished_status(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            return row['finished']



# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['Game_played'] = expanded_teams_df.apply(lambda row: get_finished_status(row['team_id'], row['fixture_id']), axis=1)

#### i.iv) If fixture is home

In [13]:
def get_home_status(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            if team_id == row['team_a']:
                return False

            elif team_id == row['team_h']:
                return True
    return None  # Return None if no matching fixture is found

# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['is_home'] = expanded_teams_df.apply(lambda row: get_home_status(row['team_id'], row['fixture_id']), axis=1)


#### i.v) If deadline passed

In [14]:
def get_kickoff_time(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            return row['kickoff_time']



# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['kickoff_time'] = expanded_teams_df.apply(lambda row: get_kickoff_time(row['team_id'], row['fixture_id']), axis=1)

In [15]:
def started(team_id, fixture_id):
    for idx, row in expanded_fixture_df.iterrows():
        if fixture_id == row['fixture_id']:
            return row['started']



# Assuming you want to calculate and assign fixture difficulties for all teams in teams_df
expanded_teams_df['started'] = expanded_teams_df.apply(lambda row: started(row['team_id'], row['fixture_id']), axis=1)

Gameweek Deadline variable

In [16]:
expanded_teams_df['kickoff_time'] = pd.to_datetime(expanded_teams_df['kickoff_time'])


In [17]:
from datetime import timedelta


# Filter the DataFrame to get events where 'started' is False
filtered_events = expanded_teams_df[expanded_teams_df['started'] == False]

# Sort the filtered events by 'kickoff_time' in ascending order
filtered_events = filtered_events.sort_values(by='kickoff_time')

# Get the first event in the sorted list
first_event = filtered_events.iloc[0]

# Calculate an hour before the 'kickoff_time' of the first event
Gameweek_Deadline = first_event['kickoff_time'] - timedelta(hours=1)

# You can now use Gameweek_Deadline as your constant
print("Gameweek_Deadline:", Gameweek_Deadline)

Gameweek_Deadline: 2023-10-23 18:00:00+00:00


### ii. Feature Engineering for team data

In [18]:
# List of columns to drop
columns_to_drop = ['played', 'points', 'position','strength_overall_home', 'strength_overall_away',
       'strength_attack_home', 'strength_attack_away', 'strength_defence_home',
       'strength_defence_away','draw', 'form','strength','win','loss','team_division']

# Use the drop method to remove the specified columns
expanded_teams_df.drop(columns=columns_to_drop, inplace=True)


In [19]:
expanded_teams_df = expanded_teams_df.sort_values(by='kickoff_time')


In [20]:
teams = expanded_teams_df


### General Stats

Games played


In [21]:
# Convert 'Game_played' column to integers (1 for True, 0 for False)
teams['Game_played'].fillna(False, inplace=True)
teams['Game_played'] = teams['Game_played'].astype(int)

# Calculate cumulative 'Games_Count' for each row
teams['Games_Count'] = teams.groupby(['team_id', 'team_name'])['Game_played'].cumsum()



Total goals

In [22]:
teams['Total_Goals'] = teams.groupby(['team_id', 'team_name'])['team_score'].cumsum()


Goals per game

In [23]:
teams['Goals_per_game'] = teams['Total_Goals'] / teams['Games_Count']

Goals conceded

In [24]:
teams['Total_Conceded'] = teams.groupby(['team_id', 'team_name'])['opponent_score'].cumsum()


In [25]:
teams['Conceded_per_game'] = teams['Total_Conceded'] / teams['Games_Count']

Win

In [26]:
teams['Win'] = teams['team_score'] > teams['opponent_score']
teams['Win'] = teams['Win'].astype(int)


Draw

In [27]:
teams['Draw'] = teams['team_score'] == teams['opponent_score']
teams['Draw'] = teams['Draw'].astype(int)

Loss

In [28]:
teams['Loss'] = teams['team_score'] < teams['opponent_score']
teams['Loss'] = teams['Loss'].astype(int)

Total wins

In [29]:
teams['Total_wins'] = teams.groupby(['team_id', 'team_name'])['Win'].cumsum()


Win percentage

In [30]:
teams['Win_percentage'] = (teams['Total_wins'] / teams['Games_Count']) * 100

Game Results

In [31]:
teams['result'] = teams.apply(lambda row: 1 if row['Win'] else (-1 if row['Loss'] else 0), axis=1)


Form

In [32]:
def calculate_form(group, num_games):
    forms = []
    current_form = ""

    for i, row in group.iterrows():
        result = row['result']
        if row['Game_played'] == True:
            if result == 1:
                current_form += 'W'
            elif result == -1:
                current_form += 'L'
            else:
                current_form += 'D'

            if len(current_form) > num_games:
                current_form = current_form[1:]

            forms.append(current_form)

        else:
            forms.append(None)
    return forms

# Calculate the form for each specific 'team_id' and assign it to the 'Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_forms = calculate_form(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Form'] = team_forms




Numeric Form

In [33]:
def calculate_numerical_form(group, num_games):
    numerical_forms = []
    current_form = []

    for i, row in group.iterrows():
        result = row['result']
        game_played = row['Game_played']

        if game_played:
            # Assign weights based on the position in the form
            weight = 1 + i / num_games

            if result == 1:
                value = 1
            elif result == -1:
                value = -1
            else:
                value = 0

            current_form.append(value * weight)

            if len(current_form) > num_games:
                current_form.pop(0)

            numerical_form = sum(current_form) / len(current_form)
            numerical_forms.append(numerical_form)

        else:
            numerical_forms.append(None)  # Append None for games that haven't been played

    return numerical_forms

# Calculate the numerical form for each specific 'team_id' and assign it to the 'Numerical_Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_numerical_forms = calculate_numerical_form(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Numerical_Form'] = team_numerical_forms



Team strength

In [34]:
teams['Strength'] = teams['Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Win_percentage']*2.0 + teams['Goals_per_game'] + teams['Conceded_per_game']*-1.0

In [35]:
Liverpool = teams[teams['team_id'] == 11]
Liverpool

Unnamed: 0,code,team_id,team_name,team_short_name,unavailable,pulse_id,event_id,fixture_id,fixture_difficulty,opponent_team,...,Conceded_per_game,Win,Draw,Loss,Total_wins,Win_percentage,result,Form,Numerical_Form,Strength
10,14,11,Liverpool,LIV,False,10,1,9.0,3.0,7.0,...,1.0,0,1,0,0,0.0,0,D,0.0,6.0
30,14,11,Liverpool,LIV,False,10,2,14.0,2.0,3.0,...,1.0,1,0,0,1,50.0,1,DW,3.5,113.75
50,14,11,Liverpool,LIV,False,10,3,29.0,4.0,15.0,...,1.0,1,0,0,2,66.666667,1,DWW,6.0,157.333333
70,14,11,Liverpool,LIV,False,10,4,37.0,3.0,2.0,...,0.75,1,0,0,3,75.0,1,DWWW,8.25,178.125
90,14,11,Liverpool,LIV,False,10,5,50.0,2.0,20.0,...,0.8,1,0,0,4,80.0,1,DWWWW,10.4,191.6
110,14,11,Liverpool,LIV,False,10,6,57.0,2.0,19.0,...,0.833333,1,0,0,5,83.333333,1,WWWWW,15.0,209.833333
130,14,11,Liverpool,LIV,False,10,7,68.0,3.0,18.0,...,1.0,0,0,1,5,71.428571,-1,WWWWL,8.2,170.642857
150,14,11,Liverpool,LIV,False,10,8,72.0,3.0,5.0,...,1.125,0,1,0,5,62.5,0,WWWLD,6.0,147.125
170,14,11,Liverpool,LIV,False,10,9,85.0,2.0,9.0,...,1.0,1,0,0,6,66.666667,1,WWLDW,10.0,163.555556
190,14,11,Liverpool,LIV,False,10,10,97.0,2.0,16.0,...,,0,0,0,6,66.666667,0,,,


### Home Stats

Games played home

In [36]:
# Calculate cumulative 'Games_Count' for each row
teams['Home_Count'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['Game_played'].cumsum()



Goals at home

In [37]:
teams['Home_Goals'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['team_score'].cumsum()


Goals per home game

In [38]:
teams['Goals_per_home'] = teams.apply(lambda row: row['Total_Goals'] / row['Home_Count'] if row['is_home'] else None, axis=1)


Goals conceded at home

In [39]:
teams['Home_Conceded'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['opponent_score'].cumsum()


Conceded per home game

In [40]:
teams['Conceded_per_home'] = teams.apply(lambda row: row['Home_Conceded'] / row['Home_Count'] if row['is_home'] else None, axis=1)


Total home wins

In [41]:
teams['Total_Home_wins'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['Win'].cumsum()


Home Win percentage

In [42]:
teams['Home_Win_percentage'] = (teams['Total_Home_wins'] / teams['Home_Count']) * 100

Home Form

In [43]:
def calculate_form_home(group, num_games):
    forms = []
    current_form = ""

    for i, row in group.iterrows():
        result = row['result']
        if row['Game_played'] == True and row['is_home'] == True:
            if result == 1:
                current_form += 'W'
            elif result == -1:
                current_form += 'L'
            else:
                current_form += 'D'

            if len(current_form) > num_games:
                current_form = current_form[1:]

            forms.append(current_form)

        else:
            forms.append(None)
    return forms

# Calculate the form for each specific 'team_id' and assign it to the 'Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_forms = calculate_form_home(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Home_Form'] = team_forms




Home Numeric Form

In [44]:
def calculate_numerical_form_home(group, num_games):
    numerical_forms = []
    current_form = []

    for i, row in group.iterrows():
        result = row['result']
        game_played = row['Game_played']

        if game_played and row['is_home'] == True:
            # Assign weights based on the position in the form
            weight = 1 + i / num_games

            if result == 1:
                value = 1
            elif result == -1:
                value = -1
            else:
                value = 0

            current_form.append(value * weight)

            if len(current_form) > num_games:
                current_form.pop(0)

            numerical_form = sum(current_form) / len(current_form)
            numerical_forms.append(numerical_form)

        else:
            numerical_forms.append(None)  # Append None for games that haven't been played

    return numerical_forms

# Calculate the numerical form for each specific 'team_id' and assign it to the 'Numerical_Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_numerical_forms = calculate_numerical_form_home(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Home_Numerical_Form'] = team_numerical_forms



In [45]:
teams['Home_Strength'] = teams['Home_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Home_Win_percentage']*2.0 + teams['Goals_per_home'] + teams['Conceded_per_home']*-1.0

### Away Stats

Games played away

In [46]:
# Calculate cumulative 'Games_Count' for each row
teams['Away_Count'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['Game_played'].cumsum()



Goals away

In [47]:
teams['Away_Goals'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['team_score'].cumsum()


Goals per away game

In [48]:
teams['Goals_per_away'] = teams.apply(lambda row: row['Total_Goals'] / row['Away_Count'] if row['is_home']==False else None, axis=1)


Goals conceded away

In [49]:
teams['Away_Conceded'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['opponent_score'].cumsum()


Conceded per away game

In [50]:
teams['Conceded_per_away'] = teams.apply(lambda row: row['Away_Conceded'] / row['Away_Count'] if row['is_home'] ==False else None, axis=1)


Total Away wins

In [51]:
teams['Total_Away_wins'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['Win'].cumsum()


Away Win percentage

In [52]:
teams['Away_Win_percentage'] = (teams['Total_Away_wins'] / teams['Away_Count']) * 100

Away Form

In [53]:
def calculate_form_away(group, num_games):
    forms = []
    current_form = ""

    for i, row in group.iterrows():
        result = row['result']
        if row['Game_played'] == True and row['is_home'] == False:
            if result == 1:
                current_form += 'W'
            elif result == -1:
                current_form += 'L'
            else:
                current_form += 'D'

            if len(current_form) > num_games:
                current_form = current_form[1:]

            forms.append(current_form)

        else:
            forms.append(None)
    return forms

# Calculate the form for each specific 'team_id' and assign it to the 'Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_forms = calculate_form_away(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Away_Form'] = team_forms




Away Numeric Form

In [54]:
def calculate_numerical_form_away(group, num_games):
    numerical_forms = []
    current_form = []

    for i, row in group.iterrows():
        result = row['result']
        game_played = row['Game_played']

        if game_played and row['is_home'] == False:
            # Assign weights based on the position in the form
            weight = 1 + i / num_games

            if result == 1:
                value = 1
            elif result == -1:
                value = -1
            else:
                value = 0

            current_form.append(value * weight)

            if len(current_form) > num_games:
                current_form.pop(0)

            numerical_form = sum(current_form) / len(current_form)
            numerical_forms.append(numerical_form)

        else:
            numerical_forms.append(None)  # Append None for games that haven't been played

    return numerical_forms

# Calculate the numerical form for each specific 'team_id' and assign it to the 'Numerical_Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_numerical_forms = calculate_numerical_form_away(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Away_Numerical_Form'] = team_numerical_forms



In [55]:
teams['Away_Strength'] = teams['Away_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Away_Win_percentage']*2.0 + teams['Goals_per_away'] + teams['Conceded_per_away']*-1.0

## Defense Strength stats

### Overall

In [56]:
teams['Defence_Strength'] = teams['Numerical_Form']*2.0 + teams['fixture_difficulty']*1.5 + teams['Conceded_per_game']*-1.5 + teams['Win_percentage']*1.5

Home

In [57]:
teams['Home_Defence_Strength'] = teams['Home_Numerical_Form']*2.0 + teams['fixture_difficulty']*1.5 + teams['Conceded_per_home']*-1.5 + teams['Home_Win_percentage']*1.5

Away

In [58]:
teams['Away_Defence_Strength'] = teams['Away_Numerical_Form']*2.0 + teams['fixture_difficulty']*1.5 + teams['Conceded_per_away']*-1.5 + teams['Away_Win_percentage']*1.5

## Attack Strength stats

### Overall

In [59]:
teams['Attack_Strength'] = teams['Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Win_percentage']*0.8 + teams['Goals_per_game']*2.0

Home

In [60]:
teams['Home_Attack_Strength'] = teams['Home_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Home_Win_percentage']*0.8 + teams['Goals_per_home']*2.0

Away

In [61]:
teams['Away_Attack_Strength'] = teams['Away_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Away_Win_percentage']*0.8 + teams['Goals_per_away']*2.0

# Preprocessing

In [62]:
teams.columns

Index(['code', 'team_id', 'team_name', 'team_short_name', 'unavailable',
       'pulse_id', 'event_id', 'fixture_id', 'fixture_difficulty',
       'opponent_team', 'opponent_score', 'team_score', 'Game_played',
       'is_home', 'kickoff_time', 'started', 'Games_Count', 'Total_Goals',
       'Goals_per_game', 'Total_Conceded', 'Conceded_per_game', 'Win', 'Draw',
       'Loss', 'Total_wins', 'Win_percentage', 'result', 'Form',
       'Numerical_Form', 'Strength', 'Home_Count', 'Home_Goals',
       'Goals_per_home', 'Home_Conceded', 'Conceded_per_home',
       'Total_Home_wins', 'Home_Win_percentage', 'Home_Form',
       'Home_Numerical_Form', 'Home_Strength', 'Away_Count', 'Away_Goals',
       'Goals_per_away', 'Away_Conceded', 'Conceded_per_away',
       'Total_Away_wins', 'Away_Win_percentage', 'Away_Form',
       'Away_Numerical_Form', 'Away_Strength', 'Defence_Strength',
       'Home_Defence_Strength', 'Away_Defence_Strength', 'Attack_Strength',
       'Home_Attack_Strength', 'Away_

In [63]:
num_cols_normalize = ['Numerical_Form', 'Home_Numerical_Form','Away_Numerical_Form', 'Strength', 'Home_Strength', 'Away_Strength', 'Defence_Strength', 'Home_Defence_Strength', 'Away_Defence_Strength', 'Away_Attack_Strength', 'Home_Attack_Strength', 'Attack_Strength']

In [64]:
from sklearn.preprocessing import MinMaxScaler


# Fill null values with previous values
teams[num_cols_normalize] = teams[num_cols_normalize]
# Initialize the Min-Max Scaler
scaler = MinMaxScaler()

# Fit and transform the selected columns to scale them between 0 and 1
teams[num_cols_normalize] = scaler.fit_transform(teams[num_cols_normalize])



In [65]:
teams.columns

Index(['code', 'team_id', 'team_name', 'team_short_name', 'unavailable',
       'pulse_id', 'event_id', 'fixture_id', 'fixture_difficulty',
       'opponent_team', 'opponent_score', 'team_score', 'Game_played',
       'is_home', 'kickoff_time', 'started', 'Games_Count', 'Total_Goals',
       'Goals_per_game', 'Total_Conceded', 'Conceded_per_game', 'Win', 'Draw',
       'Loss', 'Total_wins', 'Win_percentage', 'result', 'Form',
       'Numerical_Form', 'Strength', 'Home_Count', 'Home_Goals',
       'Goals_per_home', 'Home_Conceded', 'Conceded_per_home',
       'Total_Home_wins', 'Home_Win_percentage', 'Home_Form',
       'Home_Numerical_Form', 'Home_Strength', 'Away_Count', 'Away_Goals',
       'Goals_per_away', 'Away_Conceded', 'Conceded_per_away',
       'Total_Away_wins', 'Away_Win_percentage', 'Away_Form',
       'Away_Numerical_Form', 'Away_Strength', 'Defence_Strength',
       'Home_Defence_Strength', 'Away_Defence_Strength', 'Attack_Strength',
       'Home_Attack_Strength', 'Away_

In [66]:
filtered_teams = teams.loc[:, ['team_id', 'team_name', 'team_short_name',
    'unavailable','event_id', 'fixture_id', 'fixture_difficulty','is_home','kickoff_time', 'started',
    'Attack_Strength','team_score',
    'Form', 'Numerical_Form','Win_percentage', 'Strength', 'Defence_Strength',
    'Home_Form','Home_Numerical_Form','Home_Win_percentage','Home_Strength', 'Home_Defence_Strength', 'Home_Attack_Strength',
    'Away_Form', 'Away_Numerical_Form', 'Away_Win_percentage','Away_Strength', 'Away_Defence_Strength', 'Away_Attack_Strength'
    ]]

# 2. Manager data

In [67]:
import pandas as pd
import requests

# Manager ID and Gameweek
manager_id = 1873280
GW = 8
# API URL to get the manager's team for a specific Gameweek
url = f"https://fantasy.premierleague.com/api/entry/{manager_id}/event/{GW}/picks/"

# Make the API request to get the manager's team
r = requests.get(url)
data = r.json()

# Create a list of dictionaries to store 'picks' along with metadata
picks_data = data.get('picks', [])  # Ensure 'picks' data is available

# Extract metadata
active_chip = data.get('active_chip')
automatic_subs = data.get('automatic_subs')
entry_history = data.get('entry_history')

# Create a list with metadata for each 'picks' element
data_list = []
for pick in picks_data:
    data_list.append({
        'active_chip': active_chip,
        'automatic_subs': automatic_subs,
        'entry_history': entry_history,
        **pick,  # Include the 'picks' data
    })

# Convert the list of dictionaries to a DataFrame
manager = pd.DataFrame(data_list)


In [68]:
manager

Unnamed: 0,active_chip,automatic_subs,entry_history,element,position,multiplier,is_captain,is_vice_captain
0,,[],"{'event': 8, 'points': 49, 'total_points': 399...",230,1,1,False,False
1,,[],"{'event': 8, 'points': 49, 'total_points': 399...",519,2,1,False,False
2,,[],"{'event': 8, 'points': 49, 'total_points': 399...",36,3,1,False,False
3,,[],"{'event': 8, 'points': 49, 'total_points': 399...",20,4,1,False,False
4,,[],"{'event': 8, 'points': 49, 'total_points': 399...",19,5,1,False,False
5,,[],"{'event': 8, 'points': 49, 'total_points': 399...",504,6,1,False,False
6,,[],"{'event': 8, 'points': 49, 'total_points': 399...",373,7,1,False,False
7,,[],"{'event': 8, 'points': 49, 'total_points': 399...",516,8,2,True,False
8,,[],"{'event': 8, 'points': 49, 'total_points': 399...",140,9,1,False,False
9,,[],"{'event': 8, 'points': 49, 'total_points': 399...",355,10,1,False,True


In [69]:
manager.columns

Index(['active_chip', 'automatic_subs', 'entry_history', 'element', 'position',
       'multiplier', 'is_captain', 'is_vice_captain'],
      dtype='object')

In [70]:
FPL_team = manager.loc[:,['active_chip', 'automatic_subs', 'element', 'position',
       'multiplier', 'is_captain', 'is_vice_captain']]

# 3. Model Training

## i. Player data Current Season


In [72]:
### Function to return player url
def format_player_url(player_id):
    # Define the base URL without curly brackets
    base_url = 'https://fantasy.premierleague.com/api/element-summary/{}/'

    # Replace the placeholder '{}' with the actual player_id
    formatted_url = base_url.format(player_id)

    return formatted_url

# Example usage:
player_id = 447  # Replace this with the actual player ID
formatted_url = format_player_url(player_id)
print(formatted_url)


https://fantasy.premierleague.com/api/element-summary/447/


In [73]:
# Imported full data from last seasons

url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()


#Convert to df

elements_df = pd.DataFrame(json['elements'])
elements = elements_df.loc[:,['id','team','web_name','first_name','second_name']]
elements = elements.rename(columns = {'id': 'player_id', 'team':'team_id'})
# List of columns to drop



## ii. Feature Engineering players

Features_engineered
>['Minutes_per_game', 'Player_Strength', 'XA', 'XG', 'XS']

intended_features
>['fixture_difficulty','kickoff_time', 'started',
       'is_home', 'Attack_Strength', 'team_score', 'Form', 'Numerical_Form',
       'Win_percentage', 'Strength', 'Defence_Strength', 'Home_Form',
       'Home_Numerical_Form', 'Home_Win_percentage', 'Home_Strength',
       'Home_Defence_Strength', 'Home_Attack_Strength', 'Away_Form',
       'Away_Numerical_Form', 'Away_Win_percentage', 'Away_Strength',
       'Away_Defence_Strength', 'Away_Attack_Strength']

intended_labels
>['goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'saves']

## iii. Model Selection

In [74]:
# Importing libraries
import pandas as pd
import numpy as np
import requests
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_regression

num_top_features = 8  # Set it to the total number of features

def load_data_for_player(player_id):
    # Define the URL for the player's data
    player_url = format_player_url(player_id)  # You can use the provided function

    # Send a GET request to the player's URL
    response = requests.get(player_url)

    if response.status_code == 200:
        # Parse the JSON data
        player_data = response.json()

        # Extract the relevant data and preprocess it
        player_history_df = pd.DataFrame(player_data['history'])

        player_history_df = player_history_df.rename(columns={'element': 'player_id', 'round': 'event_id'})
        columns_to_drop = ['was_home', 'bps', 'kickoff_time', 'influence', 'creativity', 'threat', 'ict_index', 'starts',
                           'expected_goals', 'expected_assists', 'expected_goal_involvements', 'expected_goals_conceded',
                           'value', 'transfers_balance', 'selected', 'transfers_in', 'transfers_out', 'fixture',
                           'team_h_score', 'team_a_score', 'event_id']

        # Use the drop method to remove the specified columns
        player_history_df.drop(columns=columns_to_drop, inplace=True)
        player_history_df = player_history_df.merge(elements, on='player_id')
        player_history_df = player_history_df.merge(filtered_teams, on=['team_id'])
        player_history_df['player_fixture_id'] = player_history_df['player_id'].astype(str) + '_' + player_history_df['event_id'].astype(str)

        # Perform any necessary data cleaning and feature engineering here
        player_history_df['kickoff_time'] = pd.to_datetime(player_history_df['kickoff_time'])
        player_history_df.set_index('kickoff_time', inplace=True)
        player_history_df = player_history_df.fillna(method='ffill')
        player_history_df['is_home'] = player_history_df['is_home'].astype(int)
        labels = ['goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'saves']

        player_future_fxt = player_history_df[player_history_df['started'] == False].head(3)
        player_future_fxt.drop(columns=labels, inplace=True)
        player_history_df = player_history_df[player_history_df['started'] == True]

        # Return the processed data for the player
        return player_history_df, player_future_fxt
    else:
        # Handle the case when the request fails (e.g., return None or raise an exception)
        return None, None

num_top_features = 8  # Set it to the total number of features

def train_model(X_train, y_train):
    # Define a scikit-learn pipeline
    pipeline = Pipeline([
        ('feature_selector', SelectKBest(score_func=f_regression, k=num_top_features)),
        ('scaler', StandardScaler(with_mean=False)),
        ('model', tf.keras.Sequential([
            tf.keras.layers.Input(shape=(num_top_features,)),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dense(1)  # Output layer with 1 neuron for regression
        ]))
    ])

    # Compile the model within the pipeline with run_eagerly=True
    pipeline.named_steps['model'].compile(optimizer='adam', loss='mean_squared_error', run_eagerly=True)

    # Train the model within the pipeline on the provided training data
    pipeline.named_steps['model'].fit(X_train, y_train, epochs=100, batch_size=32)

    return pipeline

# Define a function to process a player
def process_player(player_id):
    Don_Team = pd.DataFrame()
    Don_Team_list = []    # Load data for the specified player using player_id
    player_history_df, player_future_fxt = load_data_for_player(player_id)

    if player_history_df is not None and player_future_fxt is not None:
        # Define features based on the data for the player
        features = ['fixture_difficulty', 'is_home', 'Attack_Strength', 'team_score', 'Numerical_Form', 'Win_percentage',
                    'Strength', 'Defence_Strength']
        labels = ['goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'saves']

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(player_history_df[features], player_history_df[labels],
                                            test_size=0.2, random_state=42)

        # Create an empty dictionary to store model results for each label
        model_results = {}

        for label_to_predict in labels:
            # Train a separate model for each label
            model = train_model(X_train, y_train[label_to_predict])

            # Make predictions for player_future_fxt for the current label
            label_features = player_future_fxt[features]
            label_predictions = model.named_steps['model'].predict(label_features)

            # Create a DataFrame for the player's data
            player_data = player_history_df.copy()

            # Add predictions to player_future_fxt
            player_future_fxt[label_to_predict] = label_predictions

        # Append the combined data to the Don_Team DataFrame
        Don_Team = pd.concat([Don_Team, player_future_fxt], ignore_index=True)
        # Reset the index of Don_Team
        Don_Team.reset_index(drop=True, inplace=True)

        Don_Team = Don_Team.drop_duplicates(subset=['opponent_team', 'total_points', 'player_fixture_id'])

        return Don_Team

    else:
        # Handle the case when data loading fails
        print(f"Data loading failed for player ID: {player_id}")




In [76]:
# List of player IDs to process
player_ids = FPL_team.element  # Add the IDs you want to process



In [77]:
Salah = process_player(308)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [78]:
Salah

Unnamed: 0,player_id,opponent_team,total_points,minutes,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,bonus,...,Away_Win_percentage,Away_Strength,Away_Defence_Strength,Away_Attack_Strength,player_fixture_id,goals_scored,assists,clean_sheets,goals_conceded,saves
0,308,7,5,76,0,0,0,0,0,0,...,40.0,0.47251,0.473029,0.524037,308_10,0.742086,0.321618,0.266624,0.702026,0.14339
1,308,7,5,76,0,0,0,0,0,0,...,40.0,0.47251,0.473029,0.524037,308_11,0.578072,0.730108,0.130467,0.901504,0.147308
2,308,7,5,76,0,0,0,0,0,0,...,40.0,0.47251,0.473029,0.524037,308_12,0.768586,0.19662,0.233874,0.731696,-0.064293


# iv. Points

In [None]:
Manager_team = pd.DataFrame()

for i in player_ids:
  print("Player id: ", i)
  Manager_team = pd.concat([Manager_team, process_player(i)], ignore_index=True)


In [80]:
Manager_team

Unnamed: 0,player_id,opponent_team,total_points,minutes,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,bonus,...,Away_Win_percentage,Away_Strength,Away_Defence_Strength,Away_Attack_Strength,player_fixture_id,goals_scored,assists,clean_sheets,goals_conceded,saves
0,230,17,6,90,0,0,0,0,0,0,...,40.0,0.426241,0.431128,0.436514,230_10,-0.007701,0.001549,0.457146,0.849772,2.456955
1,230,17,6,90,0,0,0,0,0,0,...,40.0,0.426241,0.431128,0.436514,230_11,0.033916,-0.00913,0.256731,0.846586,1.593165
2,230,17,6,90,0,0,0,0,0,0,...,40.0,0.426241,0.431128,0.436514,230_12,0.004966,-0.019679,0.367869,0.879416,2.060438
3,519,4,1,90,0,0,0,0,0,0,...,60.0,0.696533,0.700346,0.761284,519_9,-0.08663,0.157007,0.603476,0.610443,0.036099
4,519,4,1,90,0,0,0,0,0,0,...,60.0,0.696533,0.700346,0.761284,519_10,-0.02764,0.202238,0.614783,0.526133,0.207931
5,519,4,1,90,0,0,0,0,0,0,...,60.0,0.696533,0.700346,0.761284,519_11,-0.02914,0.361221,0.503685,0.744303,0.037251
6,36,15,-1,86,0,0,0,1,0,0,...,40.0,0.486158,0.485251,0.551009,36_10,0.119666,0.191138,0.085806,1.180479,-0.056776
7,36,15,-1,86,0,0,0,1,0,0,...,40.0,0.486158,0.485251,0.551009,36_11,0.118407,0.26674,0.168683,1.24334,-0.07986
8,36,15,-1,86,0,0,0,1,0,0,...,40.0,0.486158,0.485251,0.551009,36_12,0.119666,0.191138,0.085806,1.180479,-0.056776
9,20,16,5,90,0,0,0,0,0,0,...,75.0,0.8043,0.805405,0.841514,20_10,0.086803,0.089247,0.443085,0.762828,-0.053233


In [81]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
