<a href="https://colab.research.google.com/github/donald-okara/FPL_point_predictor/blob/main/fpl_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
#Importing libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import requests

print("Libraries imported successfully......")


Libraries imported successfully......


# i. Player data Current Season


In [7]:
### Function to return player url
def format_player_url(player_id):
    # Define the base URL without curly brackets
    base_url = 'https://fantasy.premierleague.com/api/element-summary/{}/'

    # Replace the placeholder '{}' with the actual player_id
    formatted_url = base_url.format(player_id)

    return formatted_url

# Example usage:
player_id = 447  # Replace this with the actual player ID
formatted_url = format_player_url(player_id)
print(formatted_url)


https://fantasy.premierleague.com/api/element-summary/447/


In [8]:
# Imported full data from last seasons

url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()


#Convert to df

elements_df = pd.DataFrame(json['elements'])
elements = elements_df.loc[:,['id','team','web_name','first_name','second_name']]
elements = elements.rename(columns = {'id': 'player_id', 'team':'team_id'})
# List of columns to drop



In [9]:
filtered_teams = pd.read_csv('/content/drive/MyDrive/FPL/filtered_teams.csv')


# ii. Feature Engineering

Features_engineered
>['Minutes_per_game', 'Player_Strength', 'XA', 'XG', 'XS']

intended_features
>['fixture_difficulty','kickoff_time', 'started',
       'is_home', 'Attack_Strength', 'team_score', 'Form', 'Numerical_Form',
       'Win_percentage', 'Strength', 'Defence_Strength', 'Home_Form',
       'Home_Numerical_Form', 'Home_Win_percentage', 'Home_Strength',
       'Home_Defence_Strength', 'Home_Attack_Strength', 'Away_Form',
       'Away_Numerical_Form', 'Away_Win_percentage', 'Away_Strength',
       'Away_Defence_Strength', 'Away_Attack_Strength']

intended_labels
>['goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'saves']

# iii. Model Selection

In [10]:
Don_Team = pd.DataFrame()
Don_Team_list = []



In [11]:
# Importing libraries
import pandas as pd
import numpy as np
import requests
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_selection import SelectKBest, f_regression

num_top_features = 8  # Set it to the total number of features


def load_data_for_player(player_id):
    # Define the URL for the player's data
    player_url = format_player_url(player_id)  # You can use the provided function

    # Send a GET request to the player's URL
    response = requests.get(player_url)

    if response.status_code == 200:
        # Parse the JSON data
        player_data = response.json()

        # Extract the relevant data and preprocess it
        player_history_df = pd.DataFrame(player_data['history'])

        player_history_df = player_history_df.rename(columns={'element': 'player_id', 'round': 'event_id'})
        columns_to_drop = ['was_home', 'bps', 'kickoff_time', 'influence', 'creativity', 'threat', 'ict_index', 'starts',
                           'expected_goals', 'expected_assists', 'expected_goal_involvements', 'expected_goals_conceded',
                           'value', 'transfers_balance', 'selected', 'transfers_in', 'transfers_out', 'fixture',
                           'team_h_score', 'team_a_score', 'event_id']

        # Use the drop method to remove the specified columns
        player_history_df.drop(columns=columns_to_drop, inplace=True)
        player_history_df = player_history_df.merge(elements, on='player_id')
        player_history_df = player_history_df.merge(filtered_teams, on=['team_id'])

        # Perform any necessary data cleaning and feature engineering here
        player_history_df['kickoff_time'] = pd.to_datetime(player_history_df['kickoff_time'])
        player_history_df.set_index('kickoff_time', inplace=True)
        player_history_df = player_history_df.fillna(method='ffill')
        player_history_df['is_home'] = player_history_df['is_home'].astype(int)
        labels = ['goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'saves']


        player_future_fxt = player_history_df[player_history_df['started'] == False].head(3)
        player_future_fxt.drop(columns=labels, inplace=True)
        player_history_df = player_history_df[player_history_df['started'] == True]

        # Return the processed data for the player
        return player_history_df, player_future_fxt
    else:
        # Handle the case when the request fails (e.g., return None or raise an exception)
        return None, None

num_top_features = 8  # Set it to the total number of features

def train_model(X_train, y_train):
    # Define a scikit-learn pipeline
    pipeline = Pipeline([
        ('feature_selector', SelectKBest(score_func=f_regression, k=num_top_features)),
        ('scaler', StandardScaler(with_mean=False)),
        ('model', tf.keras.Sequential([
            tf.keras.layers.Input(shape=(num_top_features,)),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dense(1)  # Output layer with 1 neuron for regression
        ]))
    ])

    # Compile the model within the pipeline with run_eagerly=True
    pipeline.named_steps['model'].compile(optimizer='adam', loss='mean_squared_error', run_eagerly=True)

    # Train the model within the pipeline on the provided training data
    pipeline.named_steps['model'].fit(X_train, y_train, epochs=100, batch_size=32)

    return pipeline

def process_player(player_id):
    global Don_Team  # Declare Don_Team as a global variable

    # Load data for the specified player using player_id
    player_history_df, player_future_fxt = load_data_for_player(player_id)

    if player_history_df is not None and player_future_fxt is not None:
        # Define features based on the data for the player
        features = ['fixture_difficulty', 'is_home', 'Attack_Strength', 'team_score', 'Numerical_Form', 'Win_percentage',
                    'Strength', 'Defence_Strength']
        labels = ['goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'saves']

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(player_history_df[features], player_history_df[labels],
                                            test_size=0.2, random_state=42)

        # Create an empty dictionary to store model results for each label
        model_results = {}

        for label_to_predict in labels:
            # Train a separate model for each label
            model = train_model(X_train, y_train[label_to_predict])

            # Make predictions for player_future_fxt for the current label
            label_features = player_future_fxt[features]
            label_predictions = model.named_steps['model'].predict(label_features)

            # Create a DataFrame for the player's data
            player_data = player_history_df.copy()
            player_data['player_fixture_id'] = player_data['player_id'].astype(str) + '_' + player_data['event_id'].astype(str)
            player_future_fxt['player_fixture_id'] = player_future_fxt['player_id'].astype(str) + '_' + player_future_fxt['event_id'].astype(str)

            # Add predictions to player_future_fxt
            player_future_fxt[label_to_predict] = label_predictions

            # Join player_data with player_future_fxt
            combined_data = player_data.merge(player_future_fxt, on='player_fixture_id')

            # Append the combined data to the list
            Don_Team_list.append(combined_data)


            # Print the predictions for the player's future fixtures
            player_name = player_history_df['web_name'].iloc[0]
            print(f"Player '{player_name}': Predictions for future fixtures ({label_to_predict}):")
            print(label_predictions[0][0])

            # After processing all players, concatenate the DataFrames in the list into a single DataFrame
            Don_Team = pd.concat(Don_Team_list, ignore_index=True)
    else:
        # Handle the case when data loading fails
        print(f"Data loading failed for player ID: {player_id}")





In [12]:
FPL_Team = pd.read_csv('/content/drive/MyDrive/FPL/FPL_team.csv')

# List of player IDs to process
player_ids = FPL_Team.element  # Add the IDs you want to process



In [None]:
# Loop through player IDs and process the data
for player_id in player_ids:
    process_player(player_id)
    print('Player ID:============>',player_id)

# iv. Points