<a href="https://colab.research.google.com/github/donald-okara/FPL_point_predictor/blob/main/fpl_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Importing libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import requests

print("Libraries imported successfully......")


Libraries imported successfully......


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# i. Player data Current Season


In [None]:
### Function to return player url
def format_player_url(player_id):
    # Define the base URL without curly brackets
    base_url = 'https://fantasy.premierleague.com/api/element-summary/{}/'

    # Replace the placeholder '{}' with the actual player_id
    formatted_url = base_url.format(player_id)

    return formatted_url

# Example usage:
player_id = 447  # Replace this with the actual player ID
formatted_url = format_player_url(player_id)
print(formatted_url)


https://fantasy.premierleague.com/api/element-summary/447/


In [None]:
# Imported full data from last seasons

url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()


#Convert to df

elements_df = pd.DataFrame(json['elements'])
elements = elements_df.loc[:,['id','team','web_name','first_name','second_name']]
elements = elements.rename(columns = {'id': 'player_id', 'team':'team_id'})
# List of columns to drop



In [None]:
filtered_teams = pd.read_csv('/content/drive/MyDrive/FPL/filtered_teams.csv')


# ii. Feature Engineering

Features_engineered
>['Minutes_per_game', 'Player_Strength', 'XA', 'XG', 'XS']

intended_features
>['fixture_difficulty','kickoff_time', 'started',
       'is_home', 'Attack_Strength', 'team_score', 'Form', 'Numerical_Form',
       'Win_percentage', 'Strength', 'Defence_Strength', 'Home_Form',
       'Home_Numerical_Form', 'Home_Win_percentage', 'Home_Strength',
       'Home_Defence_Strength', 'Home_Attack_Strength', 'Away_Form',
       'Away_Numerical_Form', 'Away_Win_percentage', 'Away_Strength',
       'Away_Defence_Strength', 'Away_Attack_Strength']

intended_labels
>['goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'saves']

# iii. Model Selection

In [None]:
# Importing libraries
import pandas as pd
import numpy as np
import requests
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_regression

num_top_features = 8  # Set it to the total number of features

def load_data_for_player(player_id):
    # Define the URL for the player's data
    player_url = format_player_url(player_id)  # You can use the provided function

    # Send a GET request to the player's URL
    response = requests.get(player_url)

    if response.status_code == 200:
        # Parse the JSON data
        player_data = response.json()

        # Extract the relevant data and preprocess it
        player_history_df = pd.DataFrame(player_data['history'])

        player_history_df = player_history_df.rename(columns={'element': 'player_id', 'round': 'event_id'})
        columns_to_drop = ['was_home', 'bps', 'kickoff_time', 'influence', 'creativity', 'threat', 'ict_index', 'starts',
                           'expected_goals', 'expected_assists', 'expected_goal_involvements', 'expected_goals_conceded',
                           'value', 'transfers_balance', 'selected', 'transfers_in', 'transfers_out', 'fixture',
                           'team_h_score', 'team_a_score', 'event_id']

        # Use the drop method to remove the specified columns
        player_history_df.drop(columns=columns_to_drop, inplace=True)
        player_history_df = player_history_df.merge(elements, on='player_id')
        player_history_df = player_history_df.merge(filtered_teams, on=['team_id'])
        player_history_df['player_fixture_id'] = player_history_df['player_id'].astype(str) + '_' + player_history_df['event_id'].astype(str)

        # Perform any necessary data cleaning and feature engineering here
        player_history_df['kickoff_time'] = pd.to_datetime(player_history_df['kickoff_time'])
        player_history_df.set_index('kickoff_time', inplace=True)
        player_history_df = player_history_df.fillna(method='ffill')
        player_history_df['is_home'] = player_history_df['is_home'].astype(int)
        labels = ['goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'saves']

        player_future_fxt = player_history_df[player_history_df['started'] == False].head(3)
        player_future_fxt.drop(columns=labels, inplace=True)
        player_history_df = player_history_df[player_history_df['started'] == True]

        # Return the processed data for the player
        return player_history_df, player_future_fxt
    else:
        # Handle the case when the request fails (e.g., return None or raise an exception)
        return None, None

num_top_features = 8  # Set it to the total number of features

def train_model(X_train, y_train):
    # Define a scikit-learn pipeline
    pipeline = Pipeline([
        ('feature_selector', SelectKBest(score_func=f_regression, k=num_top_features)),
        ('scaler', StandardScaler(with_mean=False)),
        ('model', tf.keras.Sequential([
            tf.keras.layers.Input(shape=(num_top_features,)),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dense(1)  # Output layer with 1 neuron for regression
        ]))
    ])

    # Compile the model within the pipeline with run_eagerly=True
    pipeline.named_steps['model'].compile(optimizer='adam', loss='mean_squared_error', run_eagerly=True)

    # Train the model within the pipeline on the provided training data
    pipeline.named_steps['model'].fit(X_train, y_train, epochs=100, batch_size=32)

    return pipeline

# Define a function to process a player
def process_player(player_id):
    Don_Team = pd.DataFrame()
    Don_Team_list = []    # Load data for the specified player using player_id
    player_history_df, player_future_fxt = load_data_for_player(player_id)

    if player_history_df is not None and player_future_fxt is not None:
        # Define features based on the data for the player
        features = ['fixture_difficulty', 'is_home', 'Attack_Strength', 'team_score', 'Numerical_Form', 'Win_percentage',
                    'Strength', 'Defence_Strength']
        labels = ['goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'saves']

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(player_history_df[features], player_history_df[labels],
                                            test_size=0.2, random_state=42)

        # Create an empty dictionary to store model results for each label
        model_results = {}

        for label_to_predict in labels:
            # Train a separate model for each label
            model = train_model(X_train, y_train[label_to_predict])

            # Make predictions for player_future_fxt for the current label
            label_features = player_future_fxt[features]
            label_predictions = model.named_steps['model'].predict(label_features)

            # Create a DataFrame for the player's data
            player_data = player_history_df.copy()

            # Add predictions to player_future_fxt
            player_future_fxt[label_to_predict] = label_predictions

        # Append the combined data to the Don_Team DataFrame
        Don_Team = pd.concat([Don_Team, player_future_fxt], ignore_index=True)
        # Reset the index of Don_Team
        Don_Team.reset_index(drop=True, inplace=True)

        Don_Team = Don_Team.drop_duplicates(subset=['opponent_team', 'total_points', 'player_fixture_id'])

        return Don_Team

    else:
        # Handle the case when data loading fails
        print(f"Data loading failed for player ID: {player_id}")




In [None]:
FPL_Team = pd.read_csv('/content/drive/MyDrive/FPL/FPL_team.csv')

# List of player IDs to process
player_ids = FPL_Team.element  # Add the IDs you want to process



In [None]:
Salah = process_player(308)

In [None]:
Salah

Unnamed: 0,player_id,opponent_team,total_points,minutes,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,bonus,...,Away_Win_percentage,Away_Strength,Away_Defence_Strength,Away_Attack_Strength,player_fixture_id,goals_scored,assists,clean_sheets,goals_conceded,saves
0,308,7,5,76,0,0,0,0,0,0,...,40.0,0.446777,0.446035,0.481458,308_9,0.453387,0.507423,0.074876,0.834734,-0.064191
1,308,7,5,76,0,0,0,0,0,0,...,40.0,0.446777,0.446035,0.481458,308_10,0.453387,0.507423,0.074876,0.834734,-0.064191
2,308,7,5,76,0,0,0,0,0,0,...,40.0,0.446777,0.446035,0.481458,308_11,0.537156,0.5406,0.133858,0.759508,-0.015694


# iv. Points

In [None]:
FPL_team = pd.read_csv('/content/drive/MyDrive/FPL/FPL_team.csv')

player_ids = FPL_Team.element  # Add the IDs you want to process
Manager_team = pd.DataFrame()

for i in player_ids:
  print("Player id: ", i)
  Manager_team = pd.concat([Manager_team, process_player(i)], ignore_index=True)


In [None]:
Manager_team

Unnamed: 0,player_id,opponent_team,total_points,minutes,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,bonus,...,Away_Win_percentage,Away_Strength,Away_Defence_Strength,Away_Attack_Strength,player_fixture_id,goals_scored,assists,clean_sheets,goals_conceded,saves
0,308,7,5,76,0,0,0,0,0,0,...,40.000000,0.446777,0.446035,0.481458,308_9,0.453387,0.507423,0.074876,0.834734,-0.064191
1,308,7,5,76,0,0,0,0,0,0,...,40.000000,0.446777,0.446035,0.481458,308_10,0.453387,0.507423,0.074876,0.834734,-0.064191
2,308,7,5,76,0,0,0,0,0,0,...,40.000000,0.446777,0.446035,0.481458,308_11,0.537156,0.540600,0.133858,0.759508,-0.015694
3,230,17,6,90,0,0,0,0,0,0,...,50.000000,0.534868,0.541320,0.548705,230_9,-0.025860,-0.057484,0.529517,0.663692,2.485050
4,230,17,6,90,0,0,0,0,0,0,...,50.000000,0.534868,0.541320,0.548705,230_10,-0.007288,0.026159,0.445507,0.759628,2.487953
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,139,12,0,0,0,0,0,0,0,0,...,66.666667,0.625676,0.608653,0.623195,139_10,-0.056787,0.187682,-0.033685,0.496447,-0.074930
410,139,12,0,0,0,0,0,0,0,0,...,66.666667,0.625676,0.608653,0.623195,139_11,-0.016917,0.246383,0.002964,0.446557,0.074572
411,466,1,0,0,0,0,0,0,0,0,...,20.000000,0.268099,0.274750,0.307082,466_9,-0.096516,0.027181,0.025036,-0.001536,-0.056913
412,466,1,0,0,0,0,0,0,0,0,...,20.000000,0.268099,0.274750,0.307082,466_10,0.016625,-0.047463,0.056420,0.004050,-0.020988


