In [33]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
from pulp import LpMaximize, LpProblem, LpVariable, lpSum

In [34]:
all_gameweeks = []

files = os.listdir(
    r'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\2025_26\Players'
)

for i in range(len(files)):
    gameweek_data = pd.read_csv(
        rf'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\2025_26\Players\GW_{i + 1}.csv'
    )
    all_gameweeks.append(gameweek_data)

data = pd.concat(all_gameweeks, ignore_index=True)

gameweek = len(files)

In [35]:
# Sort dataset by Player ID and Gameweek
final_data = data.sort_values(by=['Player ID', 'Gameweek'])

# Define the rolling window size
number_of_games = 3

# Calculate the rolling average of GW_Points over the specified number_of_games
final_data["Form"] = (
    final_data
    .groupby("Player ID")["GW Points"]
    .transform(lambda x: x.rolling(window=number_of_games).mean().round(3))
)

# Choose important columns
columns = [
    'Player ID', 'Name', 'Last_Name', 'Team', 'Position', 'Cost_Today',
    'GW Points', 'Form', 'Gameweek', 'Avail'
]

final_data = final_data[columns]

In [36]:
# Add fixture list into spreadsheet
fixtures = pd.read_csv(r'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\2025_26\Fixtures\Schedule\Fixtures.csv')

# Merge on fixture list
final_data = final_data.merge(fixtures, on= 'Team')

# Drop unneeded gameweek columns
def drop_gw_columns(final_data, gameweek):
    # Create lists of columns to drop
    columns_to_drop = [f'GW{i}' for i in range(1, gameweek + 1)] + [f'GW{i}' for i in range(gameweek + 4, 39)]
    
    # Drop columns if they exist in the DataFrame
    final_data = final_data.drop(columns=[col for col in columns_to_drop if col in final_data.columns], errors='ignore')
    return final_data

# Run the loop
data = drop_gw_columns(final_data, gameweek)

In [37]:
# Import improve fixture difficulty 
difficulty = pd.read_csv(fr'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\2025_26\Fixtures\Difficulty_ratings\FD_Improved\FD_{gameweek}.csv', index_col=0)

# Create a mapping dictionary from fixture difficulty
mapping = difficulty.set_index(['Opponent', 'Position'])['Difficulty'].to_dict()

# Apply the fixture difficulty by position and opponent
for i in range(1, 4):
    data[f'Diff{i + gameweek}'] = data.apply(
        lambda row: mapping.get(
            (row[f'GW{gameweek + i}'], row['Position']),
            None
        ),
        axis=1
    )

In [38]:
# Loop to create FDI_1 to FDI_5, summing up the values from F_1 to F_i
for i in range(1, 4):
    # Create FDI_i by summing the appropriate columns
    data[f'Acc{i + gameweek}'] = data[[f'Diff{j + gameweek}' for j in range(1, i+1)]].sum(axis=1)

   # FD index
for i in range(1, 4):
    data[f'FD{i + gameweek}'] = round(data['Form']/ data[f'Acc{i + gameweek}'], 3) 

In [39]:
data = data[data['Gameweek'] == gameweek]

## Optimization

In [55]:
# Model_1

# This model uses the FD_index to choose the best players. The FD_index is a 
# simple calculation of current form/upcoming fixture difficulty. The fixture
# difficulty can be planned for up to 5 weeks. The model will choose the players
# that have the highest form per lowest fixture difficulty, and optimally select
# a team given the constraints of budget, position and team limit. 

# Define constants
BUDGET = 500 # Choose your budget (1000 = £100m)
weeks = 1 # Choose how many weeks you want to prepare for between 1 and 5
GK = 0 # Goalkeepers required (Choose between 0 and 2)
DEF = 2 # Defenders required (Choose between 0 and 5)
MID = 4 # Midfielders required (Choose between 0 and 5)
FWD = 0
 #  Forwards required (Choose between 0 and 3)

# Use dataset
data = data

cols = [
    f'FD{gameweek + 1}',
    f'FD{gameweek + 2}',
    f'FD{gameweek + 3}'
]

data[cols] = data[cols].replace([np.inf, -np.inf], np.nan)
data = data.dropna(subset=cols)

In [56]:
# Dynamically create the column name based on the number of weeks
column_name = f'FD{gameweek + weeks}'

# Filter out players with FD_index == 0 to avoid selecting them
data = data[data[column_name] > 0]

# Create lists of key variables
names = data.Last_Name.tolist()
teams = data.Team.tolist()
positions = data.Position.tolist()
prices = data.Cost_Today.tolist()
FD_index = data[column_name].tolist()

# Initialize the problem
prob = LpProblem("FPL_Player_Choices", LpMaximize)

# Create binary variables for players
players = [LpVariable(f"player_{i}", cat="Binary") for i in range(len(data))]

# Define the objective function: maximize the sum of FD_index for selected players
prob += lpSum(players[i] * FD_index[i] for i in range(len(data)))

# Budget constraint: the sum of selected players' prices must be <= BUDGET
prob += lpSum(players[i] * prices[i] for i in range(len(data))) <= BUDGET

# Budget constraint: the sum of selected players' prices must be <= BUDGET
prob += lpSum(players[i] * prices[i] for i in range(len(data))) >= (BUDGET - 75)

# Position constraints: enforce exact limits for each position
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'GK') == GK
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'DEF') == DEF 
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'MID') == MID 
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'FWD') == FWD  

# Club constraint: each team can have at most 3 players
for club in data.Team.unique():
    prob += lpSum(players[i] for i in range(len(data)) if teams[i] == club) <= 3

# Solve the problem
prob.solve()

# Create a list of selected players
selected_players = []
for v in prob.variables():
    if v.varValue != 0:
        index = int(v.name.split("_")[1])
        player_info = {
            'Name': names[index],
            'Team': teams[index],
            'Position': positions[index],
            'FD_Index': FD_index[index],
            'Price': prices[index],
        }
        selected_players.append(player_info)

# Convert selected players to a DataFrame for a better display
selected_players_df = pd.DataFrame(selected_players)

print(selected_players_df.sort_values(by='FD_Index', ascending= False))

# Display the total cost and index
print(f'Total Team Cost:', sum(selected_players_df.Price))
print(f'Total Team Index', sum(selected_players_df.FD_Index))

               Name         Team Position  FD_Index  Price
3  Borges Fernandes      Man Utd      MID     5.500   90.0
0      Merino Zazón      Arsenal      MID     3.834   60.0
4            Romero        Spurs      DEF     3.834   50.0
2             Foden     Man City      MID     3.667   84.0
1              Cash  Aston Villa      DEF     3.166   47.0
5              Saka      Arsenal      MID     2.500  101.0
Total Team Cost: 432.0
Total Team Index 22.501
