## Model 2

Improved form and fixture difficulty calculation.

In this model, I created two new features for form and fixture difficulty. Form is a rolling average across 4 gameweeks and Fixture Difficulty is distinct for attacking/defending players and is a combination of teamxG, xGA and FPL points (see analysis folder and article on medium.

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import warnings
# Ignore all warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', None)

In [2]:
# Enter current gameweek 
gameweek = 8

## Collect data

In [3]:
# Initialize an empty list to store the data from each gameweek
all_gameweeks = []

# Loop through each gameweek
for i in range(1, gameweek + 1):  # Adjusting the range to start from 1 to gameweek
    # Read the CSV for the current gameweek
    gameweek_data = pd.read_csv(rf'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\2025_26\Players\GW_{i}.csv')
    
    # Append the current gameweek data to the list
    all_gameweeks.append(gameweek_data)

# Concatenate all dataframes in the list into a single dataframe
data = pd.concat(all_gameweeks, axis=0, ignore_index=True)

## Calculate form

In [4]:
# Sort dataset by Player ID and Gameweek
final_data = data.sort_values(by=['Player ID', 'Gameweek'])

# Define the rolling window size
number_of_games = 3

# Calculate the rolling average of GW_Points over the specified number_of_games
final_data["Form"] = (
    final_data
    .groupby("Player ID")["GW Points"]
    .transform(lambda x: x.rolling(window=number_of_games).mean().round(3))
)

# Choose important columns
columns = [
    'Player ID', 'Name', 'Last_Name', 'Team', 'Position', 'Cost_Today',
    'GW Points', 'Form', 'Gameweek', 'Avail'
]

final_data = final_data[columns]

In [5]:
# Normalize form
scaler = MinMaxScaler(feature_range=(1,2))

mask = final_data['Form'].notna()

# Scale only the non-null values
final_data.loc[mask, 'Form'] = scaler.fit_transform(
    final_data.loc[mask, 'Form'].values.reshape(-1, 1)
)

## Add upcoming fixtures 

In [6]:
# Add fixture list into spreadsheet
fixtures = pd.read_csv(r'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\2025_26\Fixtures\Schedule\Fixtures.csv')

# Merge on fixture list
final_data = final_data.merge(fixtures, on= 'Team')

# Drop unneeded gameweek columns
def drop_gw_columns(final_data, gameweek):
    # Create lists of columns to drop
    columns_to_drop = [f'GW{i}' for i in range(1, gameweek + 1)] + [f'GW{i}' for i in range(gameweek + 6, 39)]
    
    # Drop columns if they exist in the DataFrame
    final_data = final_data.drop(columns=[col for col in columns_to_drop if col in final_data.columns], errors='ignore')
    return final_data

# Run the loop
data = drop_gw_columns(final_data, gameweek)

# Current gameweek

In [7]:
# filter on current gameweek
today = data['Gameweek'].isin([gameweek])
data = data[today]

## Fixture Difficulty

In [8]:
# Import improve fixture difficulty 
difficulty = pd.read_csv(r'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\2025_26\Fixtures\Difficulty_ratings\FD_Improved\Current_FD_Improved.csv', index_col=0)

# Create a mapping dictionary from fixture difficulty
mapping = difficulty.set_index(['Opponent', 'Position'])['FD_combined'].to_dict()

# Map difficulty for NGWs (next gameweeks) using Team and Position
for i in range(1, 6):  # NGW1 to NGW5
    data[f'NGW{i}'] = data.apply(lambda row: mapping.get((row.iloc[9 + i], row.iloc[4]), None), axis=1)

In [14]:
scaler_2 = MinMaxScaler(feature_range=(1,2))
# Normalize fixture difficulty
for col in ['NGW1', 'NGW2', 'NGW3', 'NGW4', 'NGW5']:
    mask = data[col].notna()
    data.loc[mask, col] = scaler_2.fit_transform(
        data.loc[mask, col].values.reshape(-1, 1)
    )

In [15]:
# Loop to create FDI_1 to FDI_5, summing up the values from F_1 to F_i
for i in range(1, 6):
    # Create FDI_i by summing the appropriate columns
    data[f'F_{i}'] = data[[f'NGW{j}' for j in range(1, i+1)]].sum(axis=1)

# Calculate accumulated FD_index for up to next 5 gameweeks
for i in range(1, 6):
    data[f'FDI_{i}'] = round(data.iloc[:, 7] / data.iloc[:, 19 + i], 4)

In [48]:
# Export to csv for website
data.to_csv(r'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\\Website\Current_form\Current_Form_M2.csv')

## Optimization

In [16]:
import pandas as pd
from pulp import LpMaximize, LpProblem, LpVariable, lpSum

# Model_1

# This model uses the FD_index to choose the best players. The FD_index is a 
# simple calculation of current form/upcoming fixture difficulty. The fixture
# difficulty can be planned for up to 5 weeks. The model will choose the players
# that have the highest form per lowest fixture difficulty, and optimally select
# a team given the constraints of budget, position and team limit. 

# Define constants
BUDGET = 800 # Choose your budget (1000 = £100m)
WEEKS = 1 # Choose how many weeks you want to prepare for between 1 and 5
GK = 1 # Goalkeepers required (Choose between 0 and 2)
DEF = 3 # Defenders required (Choose between 0 and 5)
MID = 4 # Midfielders required (Choose between 0 and 5)
FWD = 3
 #  Forwards required (Choose between 0 and 3)

# Use dataset
data = data

# Drop any nans
data[['FDI_1', 'FDI_2', 'FDI_3', 'FDI_4', 'FDI_5']] = data[['FDI_1', 'FDI_2', 'FDI_3', 'FDI_4', 'FDI_5']].replace([np.inf, -np.inf], np.nan)
data = data.dropna(subset=['FDI_1', 'FDI_2', 'FDI_3', 'FDI_4', 'FDI_5'])

In [12]:
data

Unnamed: 0,Player ID,Name,Last_Name,Team,Position,Cost_Today,GW Points,Form,Gameweek,Avail,GW9,GW10,GW11,GW12,GW13,NGW1,NGW2,NGW3,NGW4,NGW5,F_1,F_2,F_3,F_4,F_5,FDI_1,FDI_2,FDI_3,FDI_4,FDI_5
7,1,David,Raya Martín,Arsenal,GK,57.0,6.0,1.400030,8.0,a,CRY (H),BUR (A),SUN (A),TOT (H),CHE (A),1.714286,1.285714,1.428571,1.714286,1.571429,1.714286,3.000000,4.428571,6.142857,7.714286,0.8167,0.4667,0.3161,0.2279,0.1815
15,2,Kepa,Arrizabalaga Revuelta,Arsenal,GK,43.0,0.0,1.050022,8.0,a,CRY (H),BUR (A),SUN (A),TOT (H),CHE (A),1.714286,1.285714,1.428571,1.714286,1.571429,1.714286,3.000000,4.428571,6.142857,7.714286,0.6125,0.3500,0.2371,0.1709,0.1361
23,3,Karl,Hein,Arsenal,GK,40.0,0.0,1.050022,8.0,u,CRY (H),BUR (A),SUN (A),TOT (H),CHE (A),1.714286,1.285714,1.428571,1.714286,1.571429,1.714286,3.000000,4.428571,6.142857,7.714286,0.6125,0.3500,0.2371,0.1709,0.1361
31,4,Tommy,Setford,Arsenal,GK,40.0,0.0,1.050022,8.0,a,CRY (H),BUR (A),SUN (A),TOT (H),CHE (A),1.714286,1.285714,1.428571,1.714286,1.571429,1.714286,3.000000,4.428571,6.142857,7.714286,0.6125,0.3500,0.2371,0.1709,0.1361
39,5,Gabriel,dos Santos Magalhães,Arsenal,DEF,63.0,12.0,1.899955,8.0,a,CRY (H),BUR (A),SUN (A),TOT (H),CHE (A),1.714286,1.285714,1.428571,1.714286,1.571429,1.714286,3.000000,4.428571,6.142857,7.714286,1.1083,0.6333,0.4290,0.3093,0.2463
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5796,738,Lukasz,Fabianski,West Ham,GK,45.0,0.0,1.050022,8.0,a,LEE (A),NEW (H),BUR (H),BOU (A),LIV (H),1.285714,1.142857,1.285714,1.142857,1.857143,1.285714,2.428571,3.714286,4.857143,6.714286,0.8167,0.4324,0.2827,0.2162,0.1564
5801,739,Divine,Mukasa,Man City,MID,45.0,0.0,1.050022,8.0,a,AVL (A),BOU (H),LIV (H),NEW (A),LEE (H),1.571429,1.000000,1.714286,1.857143,1.285714,1.571429,2.571429,4.285714,6.142857,7.428571,0.6682,0.4083,0.2450,0.1709,0.1413
5806,740,Stephen,Mfuni,Man City,DEF,40.0,0.0,1.050022,8.0,a,AVL (A),BOU (H),LIV (H),NEW (A),LEE (H),1.000000,1.714286,1.857143,1.571429,1.142857,1.000000,2.714286,4.571429,6.142857,7.285714,1.0500,0.3869,0.2297,0.1709,0.1441
5810,741,Harry,Gray,Leeds,FWD,45.0,0.0,1.050022,8.0,d,WHU (H),BHA (A),NFO (A),AVL (H),MCI (A),1.428571,1.428571,1.000000,1.428571,1.571429,1.428571,2.857143,3.857143,5.285714,6.857143,0.7350,0.3675,0.2722,0.1987,0.1531


In [17]:
# Dynamically create the column name based on the number of weeks
column_name = f'FDI_{WEEKS}'

# Filter out players with FD_index == 0 to avoid selecting them
data = data[data[column_name] > 0]

# Create lists of key variables
names = data.Last_Name.tolist()
teams = data.Team.tolist()
positions = data.Position.tolist()
prices = data.Cost_Today.tolist()
FD_index = data[column_name].tolist()

# Initialize the problem
prob = LpProblem("FPL_Player_Choices", LpMaximize)

# Create binary variables for players
players = [LpVariable(f"player_{i}", cat="Binary") for i in range(len(data))]

# Define the objective function: maximize the sum of FD_index for selected players
prob += lpSum(players[i] * FD_index[i] for i in range(len(data)))

# Budget constraint: the sum of selected players' prices must be <= BUDGET
prob += lpSum(players[i] * prices[i] for i in range(len(data))) <= BUDGET

# Budget constraint: the sum of selected players' prices must be <= BUDGET
prob += lpSum(players[i] * prices[i] for i in range(len(data))) >= (BUDGET - 75)

# Position constraints: enforce exact limits for each position
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'GK') == GK
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'DEF') == DEF 
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'MID') == MID 
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'FWD') == FWD  

# Club constraint: each team can have at most 3 players
for club in data.Team.unique():
    prob += lpSum(players[i] for i in range(len(data)) if teams[i] == club) <= 3

# Solve the problem
prob.solve()

# Create a list of selected players
selected_players = []
for v in prob.variables():
    if v.varValue != 0:
        index = int(v.name.split("_")[1])
        player_info = {
            'Name': names[index],
            'Team': teams[index],
            'Position': positions[index],
            'FD_Index': FD_index[index],
            'Price': prices[index],
        }
        selected_players.append(player_info)

# Convert selected players to a DataFrame for a better display
selected_players_df = pd.DataFrame(selected_players)

# Display the DataFrame
print(selected_players_df)

# Display the total cost and index
print(f'Total Team Cost:', sum(selected_players_df.Price))
print(f'Total Team Index', sum(selected_players_df.FD_Index))

                         Name         Team Position  FD_Index  Price
0                       James      Chelsea      DEF    1.4656   54.0
1                       Nunes     Man City      DEF    1.6500   53.0
2                     Haaland     Man City      FWD    1.2568  146.0
3   Guimarães Rodriguez Moura    Newcastle      MID    1.3125   65.0
4      Trindade da Costa Neto       Wolves      MID    1.3500   54.0
5                     Munetsi       Wolves      MID    1.3750   54.0
6                Senesi Barón  Bournemouth      DEF    1.3250   50.0
7                   Woltemade    Newcastle      FWD    1.3781   73.0
8                  Donnarumma     Man City       GK    1.4000   57.0
9                     Semenyo  Bournemouth      MID    1.7750   80.0
10                     Kroupi  Bournemouth      FWD    1.5000   45.0
Total Team Cost: 731.0
Total Team Index 15.788
