In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
players_df = pd.read_csv("merged_players.csv")
players_df.head()

Unnamed: 0,first_name,second_name,goals_scored,assists,total_points,minutes,goals_conceded,creativity,influence,threat,bonus,bps,ict_index,clean_sheets,red_cards,yellow_cards,selected_by_percent,now_cost,element_type,season
0,Mesut,Özil,-0.439281,-0.516027,-0.815157,-0.883668,-0.863743,-0.615741,-0.803089,-0.56651,-0.559369,-0.82345,-0.745808,-0.715118,-0.229523,-0.712549,-0.261123,1.606123,2,2020-21
1,Sokratis,Papastathopoulos,-0.439281,-0.516027,-0.815157,-0.883668,-0.863743,-0.615741,-0.803089,-0.56651,-0.559369,-0.82345,-0.745808,-0.715118,-0.229523,-0.712549,-0.332518,-0.062392,1,2020-21
2,David,Luiz Moreira Marinho,-0.097047,-0.516027,0.071575,0.491433,0.314851,-0.4008,0.205476,0.019016,-0.559369,0.335255,-0.052719,0.085016,3.994786,-0.282034,-0.207577,0.464508,1,2020-21
3,Pierre-Emerick,Aubameyang,2.983057,0.824325,2.01806,1.41145,0.668429,1.048097,1.184258,2.779934,1.549486,0.980099,1.897089,1.951997,-0.229523,0.14848,0.934743,5.645687,2,2020-21
4,Cédric,Soares,-0.439281,-0.069243,-0.209584,-0.150806,-0.215516,-0.087363,-0.357163,-0.298144,0.015774,-0.193719,-0.284801,-0.181695,-0.229523,-0.282034,-0.29682,-0.238025,1,2020-21


In [3]:
X = players_df.drop(['total_points', 'first_name', 'second_name', 'season'], axis=1) 
y = players_df['total_points']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
model = RandomForestRegressor()
model.fit(X_train, y_train)

RandomForestRegressor()

In [6]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

Mean Squared Error: 0.014370598187245832
R-squared: 0.9865631579496101


In [7]:
players_df['predicted_score'] = model.predict(X)

In [8]:
players_df.head(20)

Unnamed: 0,first_name,second_name,goals_scored,assists,total_points,minutes,goals_conceded,creativity,influence,threat,...,bps,ict_index,clean_sheets,red_cards,yellow_cards,selected_by_percent,now_cost,element_type,season,predicted_score
0,Mesut,Özil,-0.439281,-0.516027,-0.815157,-0.883668,-0.863743,-0.615741,-0.803089,-0.56651,...,-0.82345,-0.745808,-0.715118,-0.229523,-0.712549,-0.261123,1.606123,2,2020-21,-0.815157
1,Sokratis,Papastathopoulos,-0.439281,-0.516027,-0.815157,-0.883668,-0.863743,-0.615741,-0.803089,-0.56651,...,-0.82345,-0.745808,-0.715118,-0.229523,-0.712549,-0.332518,-0.062392,1,2020-21,-0.815157
2,David,Luiz Moreira Marinho,-0.097047,-0.516027,0.071575,0.491433,0.314851,-0.4008,0.205476,0.019016,...,0.335255,-0.052719,0.085016,3.994786,-0.282034,-0.207577,0.464508,1,2020-21,0.088445
3,Pierre-Emerick,Aubameyang,2.983057,0.824325,2.01806,1.41145,0.668429,1.048097,1.184258,2.779934,...,0.980099,1.897089,1.951997,-0.229523,0.14848,0.934743,5.645687,2,2020-21,1.846553
4,Cédric,Soares,-0.439281,-0.069243,-0.209584,-0.150806,-0.215516,-0.087363,-0.357163,-0.298144,...,-0.193719,-0.284801,-0.181695,-0.229523,-0.282034,-0.29682,-0.238025,1,2020-21,-0.210881
5,Alexandre,Lacazette,4.009759,0.824325,1.974805,1.003648,0.37378,0.799097,1.621329,2.674214,...,1.453656,1.944452,1.151862,-0.229523,0.578995,0.41713,3.099005,3,2020-21,1.94431
6,Shkodran,Mustafi,-0.439281,-0.516027,-0.750274,-0.839342,-0.804813,-0.611138,-0.756403,-0.56651,...,-0.742844,-0.725284,-0.715118,-0.229523,-0.712549,-0.350367,0.113241,1,2020-21,-0.748544
7,Bernd,Leno,-0.439281,-0.516027,2.01806,2.200458,1.316655,-0.615741,2.022984,-0.558378,...,2.325203,0.364082,2.218708,3.994786,-0.712549,0.952592,0.113241,0,2020-21,2.016762
8,Granit,Xhaka,-0.097047,0.377541,0.698776,1.59762,0.727358,0.892069,1.032128,0.059677,...,1.171537,0.738256,1.951997,3.994786,2.301053,-0.278972,0.288875,2,2020-21,0.718025
9,Pablo,Marí,-0.439281,-0.516027,-0.079818,0.002858,-0.392305,-0.494232,-0.146274,-0.501452,...,0.093438,-0.420577,0.351728,-0.229523,0.14848,-0.332518,-0.413658,1,2020-21,-0.049539


In [9]:
df_this_year_players = pd.read_csv("cleaned_players24-25(with teams).csv")

In [10]:
element_type_mapping = {
    'GK': 0,
    'DEF': 1,
    'MID': 2,
    'FWD': 3, 
    'Manager': 4
}
df_this_year_players['element_type'] = df_this_year_players['element_type'].map(element_type_mapping)

In [11]:
columns_to_standardize = [
    'goals_scored', 'assists', 'total_points', 'minutes', 'goals_conceded',
    'creativity', 'influence', 'threat', 'bonus', 'bps', 'ict_index',
    'clean_sheets', 'red_cards', 'yellow_cards', 'selected_by_percent'
]
scaler = StandardScaler()
df_this_year_players[columns_to_standardize] = scaler.fit_transform(df_this_year_players[columns_to_standardize])
print(df_this_year_players.head())

  first_name           second_name  goals_scored   assists  total_points  \
0      Fábio       Ferreira Vieira     -0.399524 -0.493247     -0.788704   
1    Gabriel     Fernando de Jesus      0.914818  0.711774      0.501751   
2    Gabriel  dos Santos Magalhães      0.914818  0.711774      2.099456   
3        Kai               Havertz      3.543502  1.314284      2.160906   
4       Karl                  Hein     -0.399524 -0.493247     -0.788704   

    minutes  goals_conceded  creativity  influence    threat     bonus  \
0 -0.863838       -0.846113   -0.626442  -0.781864 -0.539701 -0.463039   
1 -0.013947       -0.436584    0.141389   0.066403  1.007692  0.956415   
2  1.824652        0.792004    0.471010   1.945335  1.110852  1.429566   
3  1.741080        0.792004    1.024877   1.784912  3.714114  2.849020   
4 -0.863838       -0.846113   -0.626442  -0.781864 -0.539701 -0.463039   

        bps  ict_index  clean_sheets  red_cards  yellow_cards  \
0 -0.782129  -0.728774     -0.660

In [12]:
df_this_year_players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 772 entries, 0 to 771
Data columns (total 20 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   first_name           772 non-null    object 
 1   second_name          772 non-null    object 
 2   goals_scored         772 non-null    float64
 3   assists              772 non-null    float64
 4   total_points         772 non-null    float64
 5   minutes              772 non-null    float64
 6   goals_conceded       772 non-null    float64
 7   creativity           772 non-null    float64
 8   influence            772 non-null    float64
 9   threat               772 non-null    float64
 10  bonus                772 non-null    float64
 11  bps                  772 non-null    float64
 12  ict_index            772 non-null    float64
 13  clean_sheets         772 non-null    float64
 14  red_cards            772 non-null    float64
 15  yellow_cards         772 non-null    flo

In [13]:
X = df_this_year_players.drop(['total_points', 'first_name', 'second_name', 'team'], axis=1) 
y = df_this_year_players['total_points']

In [14]:
df_this_year_players['predicted_score'] = model.predict(X)

In [15]:
sorted_players = df_this_year_players.sort_values(by='predicted_score', ascending=False)
sorted_players.tail(20)

Unnamed: 0,first_name,second_name,goals_scored,assists,total_points,minutes,goals_conceded,creativity,influence,threat,...,bps,ict_index,clean_sheets,red_cards,yellow_cards,selected_by_percent,now_cost,element_type,team,predicted_score
457,Treymaurice,Nyoni,-0.399524,-0.493247,-0.788704,-0.863838,-0.846113,-0.626442,-0.781864,-0.539701,...,-0.782129,-0.728774,-0.660752,-0.192036,-0.709056,-0.328177,4.5,2,12,-0.760439
433,Ben,Doak,-0.399524,-0.493247,-0.788704,-0.863838,-0.846113,-0.626442,-0.781864,-0.539701,...,-0.782129,-0.728774,-0.660752,-0.192036,-0.709056,-0.328177,4.5,2,12,-0.760439
430,Bobby,Clark,-0.399524,-0.493247,-0.788704,-0.863838,-0.846113,-0.626442,-0.781864,-0.539701,...,-0.782129,-0.728774,-0.660752,-0.192036,-0.709056,-0.328177,4.5,2,12,-0.760439
428,Stefan,Bajčetić Maquieira,-0.399524,-0.493247,-0.788704,-0.863838,-0.846113,-0.626442,-0.781864,-0.539701,...,-0.782129,-0.728774,-0.660752,-0.192036,-0.709056,-0.328177,4.5,2,12,-0.760439
422,Henry,Cartwright,-0.399524,-0.493247,-0.788704,-0.863838,-0.846113,-0.626442,-0.781864,-0.539701,...,-0.782129,-0.728774,-0.660752,-0.192036,-0.709056,-0.328177,4.5,2,11,-0.760439
688,Min-Hyeok,Yang,-0.399524,-0.493247,-0.788704,-0.863838,-0.846113,-0.626442,-0.781864,-0.539701,...,-0.782129,-0.728774,-0.660752,-0.192036,-0.709056,-0.328177,4.5,2,18,-0.760439
404,Wanya,Marçal-Madivádua,-0.399524,-0.493247,-0.788704,-0.863838,-0.846113,-0.626442,-0.781864,-0.539701,...,-0.782129,-0.728774,-0.660752,-0.192036,-0.709056,-0.328177,4.5,2,11,-0.760439
365,Cameron,Humphreys,-0.399524,-0.493247,-0.788704,-0.863838,-0.846113,-0.626442,-0.781864,-0.539701,...,-0.782129,-0.728774,-0.660752,-0.192036,-0.709056,-0.328177,4.5,2,10,-0.760439
350,Willian,Borges da Silva,-0.399524,-0.493247,-0.788704,-0.863838,-0.846113,-0.626442,-0.781864,-0.539701,...,-0.782129,-0.728774,-0.660752,-0.192036,-0.709056,-0.328177,5.0,2,9,-0.760439
329,Luke,Harris,-0.399524,-0.493247,-0.788704,-0.863838,-0.846113,-0.626442,-0.781864,-0.539701,...,-0.782129,-0.728774,-0.660752,-0.192036,-0.709056,-0.328177,4.5,2,9,-0.760439


In [16]:
import pandas as pd
from collections import defaultdict

def select_fpl_team(players_df, budget=100, max_players_per_team=3, min_budget_usage=95):
    position_constraints = {0: 2, 1: 5, 2: 5, 3: 3}  # GK, DEF, MID, FWD
    total_players_needed = 15

    players_df = players_df[players_df["element_type"] != 4].copy()
    players_df = players_df.reset_index(drop=True)
    players_df["value"] = players_df["predicted_score"] / (players_df["now_cost"] / 10)
    selected = []
    used_teams = defaultdict(int)
    position_counts = defaultdict(int)
    total_cost = 0
    
    sorted_players = players_df.sort_values(by=["value", "predicted_score"], ascending=[False, False])
    
    for pos in position_constraints:
        candidates = sorted_players[sorted_players["element_type"] == pos]
        selected_pos = []
        
        for _, player in candidates.iterrows():
            if (len(selected_pos) < position_constraints[pos] and
                used_teams[player["team"]] < max_players_per_team and
                total_cost + player["now_cost"] <= budget):
                
                selected.append(player.name)
                selected_pos.append(player.name)
                total_cost += player["now_cost"]
                used_teams[player["team"]] += 1
                position_counts[pos] += 1

    remaining_slots = total_players_needed - len(selected)
    remaining_budget = budget - total_cost
    
    if remaining_slots > 0:
        remaining_candidates = players_df[~players_df.index.isin(selected)]
        remaining_candidates = remaining_candidates.sort_values(by=["now_cost", "predicted_score"], ascending=[False, False])
        
        for _, player in remaining_candidates.iterrows():
            if (remaining_budget >= player["now_cost"] and
                used_teams[player["team"]] < max_players_per_team and
                position_counts[player["element_type"]] < position_constraints[player["element_type"]]):
                
                selected.append(player.name)
                total_cost += player["now_cost"]
                remaining_budget -= player["now_cost"]
                used_teams[player["team"]] += 1
                position_counts[player["element_type"]] += 1
                
                if len(selected) == total_players_needed:
                    break
    
    final_team = players_df.loc[selected]
    remaining_budget = budget - final_team["now_cost"].sum()
    
    if remaining_budget > 0 and total_cost < min_budget_usage:
        upgrade_attempts = 10 
        while upgrade_attempts > 0 and (remaining_budget > 0 or total_cost < min_budget_usage):
            upgrade_attempts -= 1
            current_team = players_df.loc[selected]
            
            weak_player = current_team.sort_values(by=["now_cost", "value"], ascending=[True, True]).iloc[0]
            
            candidates = players_df[
                (players_df["element_type"] == weak_player["element_type"]) &
                (~players_df.index.isin(selected)) &
                (players_df["now_cost"] > weak_player["now_cost"]) &
                (players_df["now_cost"] <= remaining_budget + weak_player["now_cost"])
            ].sort_values(by="predicted_score", ascending=False)
            
            for _, candidate in candidates.iterrows():
                team_count = used_teams[candidate["team"]]
                if candidate["team"] == weak_player["team"]:
                    team_count -= 1 
                
                if team_count < max_players_per_team and weak_player.name in selected:
                    cost_diff = candidate["now_cost"] - weak_player["now_cost"]
                    
                    if cost_diff <= remaining_budget:
                        selected.remove(weak_player.name)
                        selected.append(candidate.name)
                        
                        total_cost += cost_diff
                        remaining_budget -= cost_diff
                        used_teams[weak_player["team"]] -= 1
                        used_teams[candidate["team"]] += 1
                        break  
            else:
                break  

    final_team = players_df.loc[selected].head(total_players_needed)
    total_cost = final_team["now_cost"].sum()
    
    return final_team, total_cost

In [17]:
selected_team, total_cost = select_fpl_team(df_this_year_players)
print(selected_team)
print("Total cost:", total_cost)


    first_name               second_name  goals_scored   assists  \
129      Bryan                    Mbeumo      5.734071  1.314284   
486       Amad                    Diallo      2.229160  3.724326   
94     Antoine                   Semenyo      2.667274  1.916795   
322       Alex                     Iwobi      2.667274  1.314284   
88      Justin                  Kluivert      4.419730  1.916795   
532  Alexander                      Isak      7.048413  2.519305   
717    Matheus  Santos Carneiro Da Cunha      4.419730  1.916795   
327       Raúl                   Jiménez      3.543502  0.711774   
416      Trent          Alexander-Arnold      0.038590  3.121816   
442     Virgil                  van Dijk      0.038590  0.109264   
11       David               Raya Martin     -0.399524 -0.493247   
459      Joško                  Gvardiol      1.791046 -0.493247   
415    Alisson             Ramses Becker     -0.399524 -0.493247   
2      Gabriel      dos Santos Magalhães      0.

In [18]:
def select_starting_11(final_team):
    formation = {0: 1, 1: 3, 2: 4, 3: 3}  # 1 GK, 3 DEF, 4 MID, 3 FWD
    starting_11 = []
    
    for pos, count in formation.items():
        candidates = final_team[final_team["element_type"] == pos]
        best_players = candidates.sort_values(by="predicted_score", ascending=False).head(count)
        starting_11.extend(best_players.index.tolist())
    
    return final_team.loc[starting_11]

select_starting_11(selected_team)

Unnamed: 0,first_name,second_name,goals_scored,assists,total_points,minutes,goals_conceded,creativity,influence,threat,...,ict_index,clean_sheets,red_cards,yellow_cards,selected_by_percent,now_cost,element_type,team,predicted_score,value
11,David,Raya Martin,-0.399524,-0.493247,2.068731,2.195772,0.955816,-0.626442,1.979398,-0.539701,...,0.387241,2.619636,-0.192036,0.339586,3.762758,5.5,0,1,1.61384,2.934255
416,Trent,Alexander-Arnold,0.03859,3.121816,2.498882,1.590932,0.546286,3.870029,2.102462,0.431212,...,2.341931,3.029684,-0.192036,1.388228,4.872702,7.4,1,12,2.43915,3.296148
442,Virgil,van Dijk,0.03859,0.109264,1.976556,2.068288,0.87391,0.201787,2.067301,0.819578,...,1.20151,3.439733,-0.192036,0.339586,3.287068,6.4,1,12,2.103273,3.286364
459,Joško,Gvardiol,1.791046,-0.493247,1.945831,2.000297,1.774874,1.590309,2.378259,1.86331,...,2.197715,1.389491,-0.192036,-0.184735,2.636958,5.9,1,13,1.944526,3.295807
129,Bryan,Mbeumo,5.734071,1.314284,3.789337,2.188689,2.430121,3.884807,3.700104,3.416771,...,4.090281,0.979442,-0.192036,0.339586,5.26911,7.9,2,4,3.786333,4.792827
88,Justin,Kluivert,4.41973,1.916795,2.990484,1.300553,0.628192,1.776645,2.301343,3.143702,...,2.696926,3.439733,-0.192036,2.43687,2.747953,5.9,2,3,2.541881,4.308273
322,Alex,Iwobi,2.667274,1.314284,2.498882,1.996047,1.447251,3.599521,2.372765,2.518676,...,3.12292,1.389491,-0.192036,-0.184735,0.63906,5.8,2,9,2.52739,4.35757
486,Amad,Diallo,2.22916,3.724326,2.498882,1.389791,1.201533,2.922287,2.057412,2.354835,...,2.703582,1.799539,-0.192036,0.863907,3.302924,5.6,2,14,2.491921,4.449859
532,Alexander,Isak,7.048413,2.519305,3.881512,1.685837,1.365345,2.353,3.590225,4.952028,...,4.063656,2.209587,-0.192036,-0.184735,9.217339,9.5,3,15,4.143622,4.361707
717,Matheus,Santos Carneiro Da Cunha,4.41973,1.916795,2.990484,1.792073,2.83965,3.020595,2.946333,3.25293,...,3.424665,0.979442,-0.192036,0.863907,0.845192,6.8,3,20,2.898088,4.261893
