In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [2]:
# Enter current gameweek 
gameweek = 12

## Collect data

In [3]:
# Initialize an empty list to store the data from each gameweek
all_gameweeks = []

# Loop through each gameweek
for i in range(1, gameweek + 1):  # Adjusting the range to start from 1 to gameweek
    # Read the CSV for the current gameweek
    gameweek_data = pd.read_csv(rf'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\Players\Seperate_GW\GW_{i}.csv')
    
    # Append the current gameweek data to the list
    all_gameweeks.append(gameweek_data)

# Concatenate all dataframes in the list into a single dataframe
data = pd.concat(all_gameweeks, axis=0, ignore_index=True)

# Drop unnamed column
data = data.drop(columns = ['Unnamed: 0'])

## Calculate form (over past 3 games)

In [4]:
# Sort dataset correctly IMPORTANT
final_data = data.sort_values(by= ['Player ID','Gameweek'])

# Calculate the rolling average of GW_Points over 3 games
number_of_games = 3

# Calculate rolling mean of xG_change over the last 3 games, aligned to the prior games
final_data["Points_rolling_average"] = final_data["GW Points"].rolling(window=number_of_games).mean().round(3)

# Rename column to Form
final_data = final_data.rename(columns={'Points_rolling_average': 'Form'})

# Choose important columns
columns = ['Player ID', 'Name', 'Last_Name', 'Team', 'Position', 'Cost_Today',
       'GW Points','Form', 'Gameweek']

final_data = final_data[columns]

## Add upcoming fixtures 

In [5]:
# Add fixture list into spreadsheet
fixtures = pd.read_csv(r'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\Fixtures\Schedule\Fixtures.csv')

# Merge on fixture list
final_data = final_data.merge(fixtures, on= 'Team')

# Drop unneeded gameweek columns
def drop_gw_columns(final_data, gameweek):
    # Create lists of columns to drop
    columns_to_drop = [f'GW{i}' for i in range(1, gameweek + 1)] + [f'GW{i}' for i in range(gameweek + 6, 39)]
    
    # Drop columns if they exist in the DataFrame
    final_data = final_data.drop(columns=[col for col in columns_to_drop if col in final_data.columns], errors='ignore')
    return final_data

# Run the loop
data = drop_gw_columns(final_data, gameweek)

## Current gameweek

In [6]:
# filter on current gameweek
today = data['Gameweek'] == gameweek
data = data[today]

## Fixture Difficulty

In [7]:
# Import improve fixture difficulty 
difficulty = pd.read_csv(r'C:\Users\thoma\Code\Projects\Fantasy-Premier-League\Data\Fixtures\Difficulty_ratings\Model_3_FD.csv', index_col=0)

# Create a mapping dictionary from fixture difficulty
mapping = difficulty.set_index(['Opponent', 'Position'])['Difficulty'].to_dict()

# Map difficulty for NGWs (next gameweeks) using Team and Position
for i in range(1, 6):  # NGW1 to NGW5
    data[f'NGW{i}'] = data.apply(lambda row: mapping.get((row.iloc[8 + i], row.iloc[4]), None), axis=1)

# Calculate accumulated fixture difficulty for up to the next 5 gameweeks
for gw in range(1, 6):
    data[f'Diff_Score_{gw}_GW'] = data[[f'NGW{i}' for i in range(1, gw + 1)]].sum(axis=1)

# Convert specified columns to float
columns_to_convert = ['Form'] + [f'Diff_Score_{gw}_GW' for gw in range(1, 6)]
data[columns_to_convert] = data[columns_to_convert].astype(float)

## FD Index (from improved form and fixture difficulty)

In [8]:
# Calculate accumulated FD_index for up to next 5 gameweeks
for i in range(1, 6):
    data[f'FDI_{i}'] = round(data.iloc[:, 7] / data.iloc[:, 18 + i], 3)

In [9]:
data

Unnamed: 0,Player ID,Name,Last_Name,Team,Position,Cost_Today,GW Points,Form,Gameweek,GW13,...,Diff_Score_1_GW,Diff_Score_2_GW,Diff_Score_3_GW,Diff_Score_4_GW,Diff_Score_5_GW,FDI_1,FDI_2,FDI_3,FDI_4,FDI_5
10,1,Fábio,Fábio Vieira,Arsenal,MID,54,0,0.000,12,WHU (A),...,3.0,8.0,10.0,13.0,16.0,0.000,0.000,0.000,0.000,0.000
22,2,Gabriel,G.Jesus,Arsenal,FWD,68,1,1.000,12,WHU (A),...,3.0,8.0,10.0,13.0,16.0,0.333,0.125,0.100,0.077,0.062
34,3,Gabriel,Gabriel,Arsenal,DEF,61,6,3.333,12,WHU (A),...,3.0,5.0,10.0,12.0,14.0,1.111,0.667,0.333,0.278,0.238
46,4,Kai,Havertz,Arsenal,FWD,80,0,0.667,12,WHU (A),...,3.0,8.0,10.0,13.0,16.0,0.222,0.083,0.067,0.051,0.042
58,5,Karl,Hein,Arsenal,GK,40,0,0.000,12,WHU (A),...,3.0,5.0,10.0,12.0,14.0,0.000,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7828,660,Vítezslav,Jaros,Liverpool,GK,40,0,0.000,12,MCI (H),...,5.0,7.0,9.0,13.0,18.0,0.000,0.000,0.000,0.000,0.000
7836,661,Tony,Yogane,Brentford,MID,45,0,0.000,12,LEI (H),...,2.0,6.0,9.0,12.0,16.0,0.000,0.000,0.000,0.000,0.000
7843,662,Josh,Nichols,Arsenal,DEF,40,0,0.000,12,WHU (A),...,3.0,5.0,10.0,12.0,14.0,0.000,0.000,0.000,0.000,0.000
7850,663,Imari,Samuels,Brighton,DEF,40,0,0.000,12,SOU (H),...,2.0,7.0,11.0,14.0,17.0,0.000,0.000,0.000,0.000,0.000


## Optimization

In [10]:
import pandas as pd
from pulp import LpMaximize, LpProblem, LpVariable, lpSum

# Model_1

# This model uses the FD_index to choose the best players. The FD_index is a 
# simple calculation of current form/upcoming fixture difficulty. The fixture
# difficulty can be planned for up to 5 weeks. The model will choose the players
# that have the highest form per lowest fixture difficulty, and optimally select
# a team given the constraints of budget, position and team limit. 

# Define constants
Gameweek = 12
BUDGET = 800 # Choose your budget (1000 = £100m)
WEEKS = 1 # Choose how many weeks you want to prepare for between 1 and 5
GK = 1 # Goalkeepers required (Choose between 0 and 2)
DEF = 3 # Defenders required (Choose between 0 and 5)
MID = 4 # Midfielders required (Choose between 0 and 5)
FWD = 3 #  Forwards required (Choose between 0 and 3)

# Use dataset
data = data

In [11]:
# Dynamically create the column name based on the number of weeks
column_name = f'FDI_{WEEKS}'

# Filter out players with FD_index == 0 to avoid selecting them
data = data[data[column_name] > 0]

# Create lists of key variables
names = data.Last_Name.tolist()
teams = data.Team.tolist()
positions = data.Position.tolist()
prices = data.Cost_Today.tolist()
FD_index = data[column_name].tolist()

# Initialize the problem
prob = LpProblem("FPL_Player_Choices", LpMaximize)

# Create binary variables for players
players = [LpVariable(f"player_{i}", cat="Binary") for i in range(len(data))]

# Define the objective function: maximize the sum of FD_index for selected players
prob += lpSum(players[i] * FD_index[i] for i in range(len(data)))

# Budget constraint: the sum of selected players' prices must be <= BUDGET
prob += lpSum(players[i] * prices[i] for i in range(len(data))) <= BUDGET

# Budget constraint: the sum of selected players' prices must be <= BUDGET
prob += lpSum(players[i] * prices[i] for i in range(len(data))) >= (BUDGET - 75)

# Position constraints: enforce exact limits for each position
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'GK') == GK
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'DEF') == DEF 
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'MID') == MID 
prob += lpSum(players[i] for i in range(len(data)) if positions[i] == 'FWD') == FWD  

# Club constraint: each team can have at most 3 players
for club in data.Team.unique():
    prob += lpSum(players[i] for i in range(len(data)) if teams[i] == club) <= 3

# Solve the problem
prob.solve()

# Create a list of selected players
selected_players = []
for v in prob.variables():
    if v.varValue != 0:
        index = int(v.name.split("_")[1])
        player_info = {
            'Name': names[index],
            'Team': teams[index],
            'Position': positions[index],
            'FD_Index': FD_index[index],
            'Price': prices[index],
        }
        selected_players.append(player_info)

# Convert selected players to a DataFrame for a better display
selected_players_df = pd.DataFrame(selected_players)

# Display the DataFrame
print(selected_players_df)

# Display the total cost and index
print(f'Total Team Cost:', sum(selected_players_df.Price))
print(f'Total Team Index', sum(selected_players_df.FD_Index))

           Name       Team Position  FD_Index  Price
0          Saka    Arsenal      MID     1.889    101
1       M.Salah  Liverpool      MID     2.400    130
2   B.Fernandes    Man Utd      MID     3.222     84
3         Onana    Man Utd       GK     2.334     51
4   Wan-Bissaka   West Ham      DEF     2.444     44
5         Cunha     Wolves      FWD     5.666     68
6       J.Gomes     Wolves      MID     4.166     49
7          Toti     Wolves      DEF     2.000     43
8      Mazraoui    Man Utd      DEF     2.000     46
9         Wissa  Brentford      FWD     2.834     61
10   João Pedro   Brighton      FWD     3.834     55
Total Team Cost: 732
Total Team Index 32.789
