In [1]:
import pandas as pd
import numpy as np
import requests
from sklearn.model_selection import train_test_split



In [39]:
def get_past_data(element_id):
    url_past = f'https://fantasy.premierleague.com/api/element-summary/{element_id}/'
    response = requests.get(url_past)
    past_data = response.json()
    history_past_df = pd.DataFrame(past_data['history_past'])
    
    return history_past_df

#Get individual players' historical data using their ID
element_id = 2
history_past_df = get_past_data(element_id)

# Seee all columns
pd.set_option("display.max_columns", None)
history_past_df


Unnamed: 0,season_name,element_code,start_cost,end_cost,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded
0,2016/17,205651,90,91,67,651,7,4,3,5,0,0,0,2,0,0,12,258,335.8,165.9,436.0,92.9,0,0.0,0.0,0.0,0.0
1,2017/18,205651,105,104,126,1660,13,7,9,12,0,0,2,6,0,0,18,473,603.6,360.3,1089.0,204.9,0,0.0,0.0,0.0,0.0
2,2018/19,205651,105,99,79,1017,7,3,2,11,0,0,0,1,0,0,7,259,395.2,300.5,864.0,156.3,0,0.0,0.0,0.0,0.0
3,2019/20,205651,95,99,146,2018,14,8,12,14,0,0,1,3,0,0,17,535,757.6,447.7,1620.0,282.1,0,0.0,0.0,0.0,0.0
4,2020/21,205651,95,91,115,2056,9,5,11,17,0,0,0,2,0,0,16,453,501.0,467.5,931.0,190.0,0,0.0,0.0,0.0,0.0
5,2021/22,205651,85,86,120,1871,8,8,11,15,0,0,0,1,0,0,16,488,567.4,530.9,1126.0,222.3,0,0.0,0.0,0.0,0.0
6,2022/23,205651,80,81,125,2064,11,7,9,23,0,0,0,6,0,0,17,401,660.6,427.8,1331.0,241.9,24,14.24,2.91,17.15,23.15
7,2023/24,205651,80,77,85,1470,4,7,11,11,0,0,0,6,0,0,10,271,364.8,403.0,756.0,152.7,17,6.27,3.26,9.53,11.67


In [42]:
#convert objects to int/floats

history_past_df["influence"] = history_past_df["influence"].astype(float)
history_past_df["creativity"] = history_past_df["creativity"].astype(float)
history_past_df["threat"] = history_past_df["threat"].astype(float)
history_past_df["expected_goals"] = history_past_df["expected_goals"].astype(float)
history_past_df["expected_assists"] = history_past_df["expected_assists"].astype(float)
history_past_df["expected_goal_involvements"] = history_past_df["expected_goal_involvements"].astype(float)
history_past_df["expected_goals_conceded"] = history_past_df["expected_goals_conceded"].astype(float)


In [43]:
history_past_df.dtypes

season_name                    object
element_code                    int64
start_cost                      int64
end_cost                        int64
total_points                    int64
minutes                         int64
goals_scored                    int64
assists                         int64
clean_sheets                    int64
goals_conceded                  int64
own_goals                       int64
penalties_saved                 int64
penalties_missed                int64
yellow_cards                    int64
red_cards                       int64
saves                           int64
bonus                           int64
bps                             int64
influence                     float64
creativity                    float64
threat                        float64
ict_index                     float64
starts                          int64
expected_goals                float64
expected_assists              float64
expected_goal_involvements    float64
expected_goa

In [12]:
history_past_df.columns

Index(['season_name', 'element_code', 'start_cost', 'end_cost', 'total_points',
       'minutes', 'goals_scored', 'assists', 'clean_sheets', 'goals_conceded',
       'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards',
       'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity',
       'threat', 'ict_index', 'starts', 'expected_goals', 'expected_assists',
       'expected_goal_involvements', 'expected_goals_conceded'],
      dtype='object')

In [13]:
past_history_df = history_past_df[['season_name', 'total_points',
       'minutes', 'clean_sheets', 'goals_conceded', 'expected_goals_conceded',
       'own_goals', 'penalties_saved', 'saves', 'starts', 'end_cost']]
past_history_df

Unnamed: 0,season_name,total_points,minutes,clean_sheets,goals_conceded,expected_goals_conceded,own_goals,penalties_saved,saves,starts,end_cost
0,2016/17,67,651,3,5,0.0,0,0,0,0,91
1,2017/18,126,1660,9,12,0.0,0,0,0,0,104
2,2018/19,79,1017,2,11,0.0,0,0,0,0,99
3,2019/20,146,2018,12,14,0.0,0,0,0,0,99
4,2020/21,115,2056,11,17,0.0,0,0,0,0,91
5,2021/22,120,1871,11,15,0.0,0,0,0,0,86
6,2022/23,125,2064,9,23,23.15,0,0,0,24,81
7,2023/24,85,1470,11,11,11.67,0,0,0,17,77


In [47]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression


#create X & y variables
X = past_history_df.drop(columns=['season_name', 'end_cost'])
y = past_history_df['end_cost']

#test-training split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

#create model
model = LinearRegression()

model.fit(X_train, y_train)


In [50]:
#Make predictions

prediction = model.predict(X_test)

#Evaluate models with mse and r2

mse1 = mean_squared_error(y_test, prediction)
r21 = r2_score(y_test, prediction)

print(f"All Features:")
print(f"mean squared error (MSE): {mse1}")
print(f"R-squared (R2): {r21}")


All Features:
mean squared error (MSE): 37.89413183860479
R-squared (R2): 0.5321712118690767
