In [None]:
#Importing libraries

import pandas as pd 
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt 
import requests

print("Libraries imported successfully......")


# i. Player data Current Season


In [None]:
### Function to return player url 
def format_player_url(player_id):
    # Define the base URL without curly brackets
    base_url = 'https://fantasy.premierleague.com/api/element-summary/{}/'
    
    # Replace the placeholder '{}' with the actual player_id
    formatted_url = base_url.format(player_id)
    
    return formatted_url

# Example usage:
player_id = 447  # Replace this with the actual player ID
formatted_url = format_player_url(player_id)
print(formatted_url)


In [None]:
# Imported full data from last seasons

url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()


#Convert to df

elements_df = pd.DataFrame(json['elements'])
elements = elements_df.loc[:,['id','team','web_name','first_name','second_name']]
elements = elements.rename(columns = {'id': 'player_id', 'team':'team_id'})
# List of columns to drop



In [None]:
filtered_teams = pd.read_csv('filtered_teams.csv')


# ii. Feature Engineering

Features_engineered 
>['Minutes_per_game', 'Player_Strength', 'XA', 'XG', 'XS']

intended_features
>['fixture_difficulty','kickoff_time', 'started',
       'is_home', 'Attack_Strength', 'team_score', 'Form', 'Numerical_Form',
       'Win_percentage', 'Strength', 'Defence_Strength', 'Home_Form',
       'Home_Numerical_Form', 'Home_Win_percentage', 'Home_Strength',
       'Home_Defence_Strength', 'Home_Attack_Strength', 'Away_Form',
       'Away_Numerical_Form', 'Away_Win_percentage', 'Away_Strength',
       'Away_Defence_Strength', 'Away_Attack_Strength']

intended_labels 
>['goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'saves']

# iii. Model Selection

In [None]:
# Importing libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import requests
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin


def load_data_for_player(player_id):
    

    ######
    # Define the URL for the player's data
    player_url = format_player_url(player_id)  # You can use the provided function

    # Send a GET request to the player's URL
    response = requests.get(player_url)

    if response.status_code == 200:
        # Parse the JSON data
        player_data = response.json()

        # Extract the relevant data and preprocess it
        #player_fixtures_df = pd.DataFrame(player_data['fixtures'])
        player_history_df = pd.DataFrame(player_data['history'])


        player_history_df = player_history_df.rename(columns = {'element': 'player_id', 'round': 'event_id'})
        columns_to_drop = ['was_home', 'bps', 'kickoff_time','influence', 'creativity',
        'threat', 'ict_index', 'starts', 'expected_goals', 'expected_assists',
        'expected_goal_involvements', 'expected_goals_conceded', 'value',
        'transfers_balance', 'selected', 'transfers_in', 'transfers_out','fixture', 'team_h_score',
        'team_a_score']

        # Use the drop method to remove the specified columns
        player_history_df.drop(columns=columns_to_drop, inplace=True)
        player_history_df =  player_history_df.merge(elements, on = 'player_id')
        player_history_df = player_history_df.merge(filtered_teams, on = ['team_id','event_id'])
        # Perform any necessary data cleaning and feature engineering here
        player_history_df['kickoff_time'] = pd.to_datetime(player_history_df['kickoff_time'])

        player_history_df.set_index('kickoff_time', inplace=True)

        player_history_df = player_history_df[player_history_df['started']==True]
        player_history_df['is_home'] = player_history_df['is_home'].astype(int)



        # Return the processed data for the player
        return player_history_df
    else:
        # Handle the case when the request fails (e.g., return None or raise an exception)
        return None


# Define a function to process data for a specific player
def process_player(player_id):
    # Load data for the specified player using player_id
    player_data = load_data_for_player(player_id)

    if player_data is not None:
        player_history_df = player_data

        # Define features and labels based on the data for the player
        features = ['fixture_difficulty', 'is_home', 'Attack_Strength', 'team_score', 'Numerical_Form', 'Win_percentage', 'Strength', 'Defence_Strength']
        labels = ['goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'saves']

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(player_history_df[features], player_history_df[labels], test_size=0.2, random_state=42)

        # Create an empty dictionary to store model results for each label
        model_results = {}

        from sklearn.feature_selection import SelectKBest, f_regression
        feature_selector = SelectKBest(score_func=f_regression, k=len(features))  # Select all features

        # Loop through each label
        for label_to_predict in labels:
            # Define a scikit-learn pipeline
            pipeline = Pipeline([
                ('feature_selector', feature_selector),
                ('scaler', StandardScaler(with_mean=False)),  # Set with_mean=False to avoid scaling the label
                ('model', tf.keras.Sequential([
                    tf.keras.layers.Input(shape=(len(features),)),
                    tf.keras.layers.Dense(64, activation='relu'),
                    tf.keras.layers.Dense(32, activation='relu'),
                    tf.keras.layers.Dense(1)  # Output layer with 1 neuron for regression
                ]))
            ])
            
            # Compile the model within the pipeline
            pipeline.named_steps['model'].compile(optimizer='adam', loss='mean_squared_error')
            
            # Train the model within the pipeline
            pipeline.named_steps['model'].fit(X_train, y_train[label_to_predict], epochs=100, batch_size=32)
            
            # Evaluate the model within the pipeline
            mse = pipeline.named_steps['model'].evaluate(X_test, y_test[label_to_predict])
            model_results[label_to_predict] = mse

        # Print the Mean Squared Error for each label
        for label, mse in model_results.items():
            # Get the 'web_name' for the player ID
            player_name = player_data[player_data['player_id'] == player_id]['web_name'].values[0]
            
            print(f"Player '{player_name}': Mean Squared Error for '{label}' on the test set: {mse:.2f}")
    else:
        # Handle the case when data loading fails
        print(f"Data loading failed for player ID: {player_id}")





In [None]:
FPL_Team = pd.read_csv('FPL_team.csv')

# List of player IDs to process
player_ids = FPL_Team.element  # Add the IDs you want to process



In [None]:
player_ids

In [None]:
# Loop through player IDs and process the data
for player_id in player_ids:
    process_player(player_id)


# iv. Points