In [1]:
import requests
import pandas as pd

In [2]:
# Define the player's name
player_name = 'McGinn'

class PlayerData:
    def __init__(self, data):
        self.player_data = {player['web_name']: {'id': player['id'], 'element_type': player['element_type']} for player in data}
            
    def __getattr__(self, name):
        if name in self.player_data:
            return self.player_data[name]
        else:
            raise AttributeError(f"Player '{name}' not found.")


# Function to retrieve player data from the API
def get_player_data():
    url = f'https://fantasy.premierleague.com/api/bootstrap-static/'
    response = requests.get(url)
    data = response.json()
    return data['elements']

# Function to get teams difficulties
def get_difficulty():
    url = f'https://fantasy.premierleague.com/api/bootstrap-static/'
    response = requests.get(url)
    data = response.json()
    difficulty = pd.DataFrame(data['teams'])
    difficulty.drop(columns=['code','draw','form','loss','name','played','points','position','team_division','unavailable','win','pulse_id'], inplace=True)
    return difficulty

# Function to get player's fixtures
def get_fixtures(player_id):
    url = f'https://fantasy.premierleague.com/api/element-summary/{player_id}/'
    response = requests.get(url)
    data = response.json()
    fixtures = data['fixtures']
    return fixtures

# Function to get player's historical performance
def get_history(player_id):
    url = f'https://fantasy.premierleague.com/api/element-summary/{player_id}/'
    response = requests.get(url)
    data = response.json()
    history = pd.DataFrame(data['history'])
    history.drop(columns=['kickoff_time','element','fixture', 'team_h_score', 'team_a_score','goals_conceded','own_goals','penalties_saved','penalties_missed','yellow_cards','red_cards','bonus','bps','starts','expected_goal_involvements','expected_goals_conceded','value','transfers_balance','selected','transfers_in','transfers_out'], inplace=True)
    return history

# Player data retrieval
player_data = get_player_data()

# Create an instance of PlayerData
player_info = PlayerData(player_data)

# Retrieve player data using the variable
player_info_data = player_info.__getattr__(player_name)

# Extract player ID and element type
player_id = player_info_data['id']
element_type = player_info_data['element_type']

positions = {
    1: 'GKP',
    2: 'DEF',
    3: 'MID',
    4: 'FWD'
}

# Get position based on element type
position = positions.get(element_type, 'Unknown')


# Get teams difficulties
difficulties = get_difficulty()

# Get player's fixtures
fixtures = get_fixtures(player_id)

# Get player's historical performance
history = get_history(player_id)

# Convert fixtures and history to pandas DataFrames
fixtures_df = pd.DataFrame(fixtures)
history_df = pd.DataFrame(history)
difficulty_df = pd.DataFrame(difficulties)

history_df = pd.merge(history_df, difficulty_df[['id', 'strength', 'strength_overall_home','strength_overall_away','strength_attack_home','strength_attack_away','strength_defence_home','strength_defence_away']], 
                      left_on='opponent_team', right_on='id', 
                      suffixes=('_player', '_opponent'))

history_df.drop(columns='id', inplace=True)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Sort the DataFrame by 'round'
history_df = history_df.sort_values(by='round')

# Display the updated DataFrame
print("\nHistory")
print(history_df)

# Extracting the first row from fixtures_df
first_fixture = fixtures_df.head(1)
first_fixture.drop(columns=['id','code','team_h_score','team_a_score','event','finished','provisional_start_time','kickoff_time','event_name','difficulty'], inplace=True)

# Display the first fixture DataFrame
print("\nFirst Fixture Data")

# Merge based on `team_h` if `is_home` is False, otherwise merge based on `team_a`
if first_fixture['is_home'].iloc[0] == False:
    merged_data = pd.merge(first_fixture, difficulty_df[['id', 'strength', 'strength_overall_home','strength_overall_away','strength_attack_home','strength_attack_away','strength_defence_home','strength_defence_away']], 
                           left_on='team_h', right_on='id', 
                           suffixes=('_player', '_opponent'))
    merged_data.drop(columns=['id', 'team_h','team_a'], inplace=True)
else:
    merged_data = pd.merge(first_fixture, difficulty_df[['id', 'strength', 'strength_overall_home','strength_overall_away','strength_attack_home','strength_attack_away','strength_defence_home','strength_defence_away']], 
                           left_on='team_a', right_on='id', 
                           suffixes=('_player', '_opponent'))
    merged_data.drop(columns=['id', 'team_a','team_h'], inplace=True)

columns_to_add = ['total_points', 'goals_scored', 'assists', 'clean_sheets', 'saves', 
                  'influence', 'creativity', 'threat', 'ict_index', 'expected_goals', 'expected_assists']

merged_data[columns_to_add] = 0
print("Merged Data:")
print(merged_data)



History
    opponent_team  total_points  was_home  round  minutes  goals_scored  \
0              15             2     False      1       63             0   
2               9            10      True      2       90             1   
4               6             2     False      3       90             0   
6              11             2     False      4       90             0   
7               8             1      True      5       70             0   
8               7             3     False      6       74             0   
9               5             5      True      7       90             0   
10             20             1     False      8       90             0   
11             19             5      True      9       90             0   
12             12             8      True     10       90             1   
13             16             2     False     11       90             0   
14             10            10      True     12       90             1   
16             1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_fixture.drop(columns=['id','code','team_h_score','team_a_score','event','finished','provisional_start_time','kickoff_time','event_name','difficulty'], inplace=True)


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Define features
features = [col for col in history_df.columns if col not in ['opponent_team', 'round', 'total_points']]
def get_features(position, history_df):
    if position == 'GKP':
        return [col for col in history_df.columns if col not in ['opponent_team', 'round', 'total_points','saves']]
    elif position == 'DEF':
        return [col for col in history_df.columns if col not in ['opponent_team', 'round', 'total_points', 'saves']]
    elif position in ('MID', 'FWD'):
        return [col for col in history_df.columns if col not in ['opponent_team', 'round', 'total_points', 'saves', 'clean_sheets']]
    else:
        raise ValueError("Invalid position. Position should be one of 'GKP', 'DEF', 'MID', or 'FWD'.")

# Get features based on player's position
features = get_features(position, history_df)

# Define target variable
target = 'total_points'

split_index = int(len(history_df) * 0.8)  
test_index = split_index + 5  # Include the last 5 games for testing

X_train = history_df[features][:split_index]
y_train = history_df[target][:split_index]
X_test = history_df[features][split_index:test_index]
y_test = history_df[target][split_index:test_index]

weights_train = [1] * split_index  # Default weight of 1 for all samples
weights_train[-5:] = [2, 2, 2, 2, 2]  # Assign higher weight to the last 5 games
            
model = RandomForestRegressor(random_state=42)

model.fit(X_train, y_train, sample_weight=weights_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error (MSE):", mse)

predictions_df = pd.DataFrame({'Actual Points': y_test, 'Predicted Points': y_pred})

# Display the DataFrame
print(predictions_df)

print(features)

Mean Squared Error (MSE): 1.5958999999999999
    Actual Points  Predicted Points
3               3              1.60
1               1              2.44
22              8              6.63
24              2              3.13
15              1              1.89
['was_home', 'minutes', 'goals_scored', 'assists', 'influence', 'creativity', 'threat', 'ict_index', 'expected_goals', 'expected_assists', 'strength', 'strength_overall_home', 'strength_overall_away', 'strength_attack_home', 'strength_attack_away', 'strength_defence_home', 'strength_defence_away']


In [4]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate Root Mean Squared Error (RMSE)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate Mean Absolute Percentage Error (MAPE)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

# Calculate R-squared (R2)
r2 = r2_score(y_test, y_pred)

# Print the metrics
print("Mean Absolute Error (MAE):", mae)
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Percentage Error (MAPE):", mape)
print("R-squared (R2):", r2)


Mean Absolute Error (MAE): 1.246
Root Mean Squared Error (RMSE): 1.2632893571941466
Mean Absolute Percentage Error (MAPE): 70.65833333333332
R-squared (R2): 0.7653088235294118


In [5]:
def preprocess_merged_data(data, position):
    # Select features based on player's position
    features = get_features(position, history_df)
    
    data['was_home'] = data['is_home']
    
    merged_df = pd.DataFrame(data)[features]
    
    merged_df.fillna(0, inplace=True)
    
    return merged_df

preprocessed_merged_data = preprocess_merged_data(merged_data, position)

predicted_total_points = model.predict(preprocessed_merged_data)

print("Predicted Total Points:", predicted_total_points)

predicted_data = pd.concat([preprocessed_merged_data, pd.DataFrame({'Predicted Total Points': predicted_total_points})], axis=1)

print("Predicted Data:")
print(predicted_data)


Predicted Total Points: [2.05]
Predicted Data:
   was_home  minutes  goals_scored  assists  influence  creativity  threat  \
0      True        0             0        0          0           0       0   

   ict_index  expected_goals  expected_assists  strength  \
0          0               0                 0         3   

   strength_overall_home  strength_overall_away  strength_attack_home  \
0                   1065                   1100                  1050   

   strength_attack_away  strength_defence_home  strength_defence_away  \
0                  1120                   1080                   1100   

   Predicted Total Points  
0                    2.05  


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

features_2 = ['strength', 'strength_overall_home', 'strength_overall_away', 'strength_attack_home', 'strength_attack_away', 'strength_defence_home', 'strength_defence_away']

# Define target variable
target_2 = ['minutes','goals_scored', 'assists', 'clean_sheets','saves','influence','creativity','threat', 'ict_index','expected_goals', 'expected_assists']
split_index = int(len(history_df) * 0.8)  
test_index = split_index + 5  
print('features' ,features_2)
print('target',target_2)

X_train = history_df[features_2][:split_index]
y_train = history_df[target_2][:split_index]
X_test = history_df[features_2][split_index:test_index]
y_test = history_df[target_2][split_index:test_index]

weights_train = [1] * split_index  
weights_train[-5:] = [3, 3, 3, 3, 3] 

model_2 = RandomForestRegressor(random_state=42)

model_2.fit(X_train, y_train, sample_weight=weights_train)

y_pred_2 = model_2.predict(X_test)

mse_2 = mean_squared_error(y_test, y_pred_2)
print("Mean Squared Error (MSE):", mse_2)

y_test_1d = np.ravel(y_test)
y_pred_1d = np.ravel(y_pred_2)

predictions_df_2 = pd.DataFrame({'Actual': y_test_1d, 'Predicted': y_pred_1d,})

print(predictions_df_2)

merged_data2 = merged_data
merged_data2.drop(columns=['expected_assists','saves','influence','creativity','threat','ict_index','expected_goals','minutes','total_points','goals_scored','assists','clean_sheets'], inplace=True)
print(merged_data2)

predictions2 = model_2.predict(merged_data2[features_2])

predictions_df2 = pd.DataFrame(predictions2, columns=target_2)

print(predictions_df2)


features ['strength', 'strength_overall_home', 'strength_overall_away', 'strength_attack_home', 'strength_attack_away', 'strength_defence_home', 'strength_defence_away']
target ['minutes', 'goals_scored', 'assists', 'clean_sheets', 'saves', 'influence', 'creativity', 'threat', 'ict_index', 'expected_goals', 'expected_assists']
Mean Squared Error (MSE): 99.10923787947432
   Actual  Predicted
0      88  88.540000
1       0   0.520000
2       0   0.040000
3       1   0.510000
4       0   0.000000
5     7.6  28.242000
6     6.9  22.617600
7     6.0  19.748000
8     2.1   7.063000
9    0.12   0.475720
10   0.23   0.137280
11     90  71.860000
12      0   0.110000
13      0   0.080000
14      0   0.180000
15      0   0.000000
16   10.2   9.262000
17    7.3  17.381000
18   14.0   5.200000
19    3.2   3.195000
20   0.20   0.047700
21   0.07   0.064900
22     70  89.740000
23      1   0.190000
24      0   0.000000
25      1   0.000000
26      0   0.000000
27   32.2  19.598714
28    2.0  30.1150

In [7]:
for column in predictions_df2.columns:
    # Check if the column exists in preprocessed_merged_data
    if column in preprocessed_merged_data.columns:
        # Iterate through each row in the column
        for index, value in predictions_df2[column].items():
            # Check if the value in preprocessed_merged_data is 0
            if preprocessed_merged_data.at[index, column] == 0:
                # Replace the value in preprocessed_merged_data with the value from predictions_df2
                preprocessed_merged_data.at[index, column] = value
    else:
        print(f"Column '{column}' does not exist in preprocessed_merged_data.")
print(preprocessed_merged_data)

Column 'clean_sheets' does not exist in preprocessed_merged_data.
Column 'saves' does not exist in preprocessed_merged_data.
   was_home  minutes  goals_scored  assists  influence  creativity  threat  \
0      True    89.54          0.24     0.01      20.01       28.79   16.39   

   ict_index  expected_goals  expected_assists  strength  \
0      6.504          0.1701            0.3575         3   

   strength_overall_home  strength_overall_away  strength_attack_home  \
0                   1065                   1100                  1050   

   strength_attack_away  strength_defence_home  strength_defence_away  
0                  1120                   1080                   1100  


In [8]:
predicted_total_points = model.predict(preprocessed_merged_data)

# Display the predicted total_points
print("Predicted Total Points:", predicted_total_points)

Predicted Total Points: [3.07]
