In [1]:
#Import libraries.
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [2]:
pd.set_option('display.max_columns', None)

## Model Testing

In [3]:
#Import libraries for Model Testing. 
from sklearn.ensemble import RandomForestRegressor
from itertools import permutations
from sklearn.metrics import mean_squared_error

In [4]:
gameweek = 18

In [5]:
toRun = pd.read_csv('data/fpl_data.csv')
toRun.drop(['kickoff_time'], axis = 1, inplace = True)

toRun = pd.get_dummies(toRun, columns = ['position'], drop_first=True)

testSet = toRun.loc[((toRun['season_x'] == '2021-22') & (toRun['GW'] == gameweek))]
trainingSet = toRun.loc[((toRun['season_x'] == '2021-22') & (toRun['GW'] != gameweek))
                        |(toRun['season_x'] == '2016-17')|(toRun['season_x'] == '2017-18')
                       |(toRun['season_x'] == '2018-19')|(toRun['season_x'] == '2019-20')|(toRun['season_x'] == '2020-21')]

x_test = testSet.loc[:,[i for i in list(testSet.columns) if i not in ['total_points', 'name', 'season_x','opp_team_name','team_x']]]
y_test = pd.DataFrame(testSet.loc[:, testSet.columns == 'total_points'])

x_train = trainingSet.loc[:,[i for i in list(trainingSet.columns) if i not in ['total_points','name', 'season_x','opp_team_name','team_x']]]
y_train = pd.DataFrame(trainingSet.loc[:, trainingSet.columns == 'total_points'])

In [6]:
#Random Forest
model = RandomForestRegressor(n_estimators = 300, min_samples_split = 5)
model.fit(x_train, y_train)

In [7]:
#Random Forest MSE & Accuracy
pred_y = model.predict(x_test)
mse = mean_squared_error(y_test, pred_y)
print('MSE: ', mse)
print('Accuracy: ', (100-mse))
final = testSet.copy(deep = True)
final['score'] = pred_y

MSE:  0.05866747266029221
Accuracy:  99.94133252733971


In [8]:
def teams_limit(cntTeam):
    cntTeamSet = list(set(cntTeam))
    for i in cntTeamSet:
        counter = 0
        for j in cntTeam:
            if i == j:
                counter += 1
        if counter > 3:
            return False
    return True

def optimal_lineup(scoresDF, threshold = [3, 8, 9, 6], budget = 850):
    formations = [[4, 4, 2], [4, 3, 3], [3, 4, 3], [3, 5, 2], [4, 5, 1], [5, 2, 3],[5,4,1]]
    scoresDF = scoresDF.sort_values(by='score', ascending=False)
    weekPointsDF = scoresDF.sort_values(by='total_points', ascending=False)
    expected=[]
    actual=[]
    lineup=[]
    price=[]  
    
    pvInd = scoresDF.columns.get_loc("value")
    pnInd = scoresDF.columns.get_loc("name")
    wpInd = scoresDF.columns.get_loc("total_points")
    sInd = scoresDF.columns.get_loc("score")
    ptInd = scoresDF.columns.get_loc("team_x")
    

    def updateCnt(i, df, cntPrice, cntFor, cntActual, cntScore, line, cntTeam, cntMax):
        cntPrice += df[i, pvInd]
        cntFor.append(df[i, pnInd])
        cntActual += df[i, wpInd]
        cntScore += df[i, sInd]
        cntMax = max(cntMax, df[i, sInd])
        cntTeam.append(df[i, ptInd])
        return cntPrice, cntFor, cntActual, cntScore, cntTeam, cntMax
    
    GKs = (scoresDF[scoresDF.position_GK == 1]).values
    FWs = (scoresDF[scoresDF.position_FWD == 1]).values
    MDs = (scoresDF[scoresDF.position_MID == 1]).values
    DFs = (scoresDF[(scoresDF.position_GK == 0) & (scoresDF.position_MID == 0) & (scoresDF.position_FWD == 0)]).values

    GKs2 = weekPointsDF[weekPointsDF.position_GK == 1]
    FWs2 = weekPointsDF[weekPointsDF.position_FWD == 1]
    MDs2 = weekPointsDF[weekPointsDF.position_MID == 1]
    DFs2 = weekPointsDF[(weekPointsDF.position_GK == 0) & (weekPointsDF.position_MID == 0) & (weekPointsDF.position_FWD == 0)]
    
    DreamTeam = []
    for i in range(threshold[0]):
        DreamTeam.append(GKs2.iloc[i].name)
    for i in range(threshold[1]):
        DreamTeam.append(DFs2.iloc[i].name)
    for i in range(threshold[2]):
        DreamTeam.append(MDs2.iloc[i].name)
    for i in range(threshold[3]):
        DreamTeam.append(FWs2.iloc[i].name)
    
    for formation in formations:
        maxi = 0 
        bestFor = 0
        bestPrice = 0 
        bestActual = 0 
        
        gkStr = '1' + '0' * (threshold[0] - 1)
        gks = list(set([''.join(p) for p in permutations(gkStr)]))

        dfStr = '1' * formation[0] + '0' * (threshold[1] - formation[0])
        dfs = list(set([''.join(p) for p in permutations(dfStr)]))

        mdStr = '1' * formation[1] + '0' * (threshold[2] - formation[1])
        mds = list(set([''.join(p) for p in permutations(mdStr)]))

        fwStr = '1' * formation[2] + '0' * (threshold[3] - formation[2])
        fws = list(set([''.join(p) for p in permutations(fwStr)]))
        
        #Try all permutations of players
        for gk in gks:
            for df in dfs:
                for md in mds:
                    for fw in fws:
                        #variables to store cnt permutation (Price - Expected Score - Lineup Names - Actual Score)
                        cntPrice = 0
                        cntScore = 0
                        cntFor = []
                        cntActual = 0
                        cntMax = 0 # to know maximum expected points lineup to make the highest one as the captain 
                        cntTeam = []
                        for i in range(len(gk)):
                            if gk[i] == '1':
                                cntPrice, cntFor, cntActual, cntScore, cntTeam, cntMax = updateCnt(i, GKs, cntPrice, cntFor, cntActual, cntScore, 'GKP', cntTeam, cntMax)
                        for i in range(len(df)):
                            if df[i] == '1':
                                cntPrice, cntFor, cntActual, cntScore, cntTeam, cntMax = updateCnt(i, DFs, cntPrice, cntFor, cntActual, cntScore, 'DEF', cntTeam, cntMax)
                        for i in range(len(md)):
                            if md[i] == '1':
                                cntPrice, cntFor, cntActual, cntScore, cntTeam, cntMax = updateCnt(i, MDs, cntPrice, cntFor, cntActual, cntScore, 'MID', cntTeam, cntMax)
                        for i in range(len(fw)):
                            if fw[i] == '1':
                                cntPrice, cntFor, cntActual, cntScore, cntTeam, cntMax = updateCnt(i, FWs, cntPrice, cntFor, cntActual, cntScore, 'FWD', cntTeam, cntMax)
                        
                        cntScore = cntMax + cntScore # Captain Score is doubled
                        if cntPrice <= budget and cntScore > maxi and teams_limit(cntTeam): #Check Budget - Maximum Score - 3 players max from each team 
                            maxi = cntScore
                            bestFor = cntFor
                            bestPrice = cntPrice
                            bestActual = cntActual
        counter = 0
        for p in bestFor:
            if p in DreamTeam:
                counter += 1
        bestPercent = counter / 11.0 * 100
        
        lineup.append(bestFor)
        expected.append(maxi)
        actual.append(bestActual)
        price.append(bestPrice)

    return formations,lineup,expected,actual,price

In [9]:
formations, lineup, expected, actual, price = optimal_lineup(final)

In [10]:
formations=['4-4-2','4-3-3','3-4-3','3-5-2','4-5-1','5-2-3','5-4-1']

In [12]:
best_teams = pd.DataFrame(list(zip(formations,price,expected,actual,lineup)),columns=['Formations','Price','Predicted','Actual','Lineup'])

In [13]:
best_teams

Unnamed: 0,Formations,Price,Predicted,Actual,Lineup
0,4-4-2,801,121.187382,105,"[José Malheiro de Sá, João Pedro Cavaco Cancel..."
1,4-3-3,792,117.508688,102,"[José Malheiro de Sá, João Pedro Cavaco Cancel..."
2,3-4-3,790,115.075628,100,"[José Malheiro de Sá, João Pedro Cavaco Cancel..."
3,3-5-2,810,118.659577,103,"[José Malheiro de Sá, João Pedro Cavaco Cancel..."
4,4-5-1,783,122.966462,107,"[José Malheiro de Sá, João Pedro Cavaco Cancel..."
5,5-2-3,745,118.85002,103,"[José Malheiro de Sá, João Pedro Cavaco Cancel..."
6,5-4-1,773,124.441619,108,"[José Malheiro de Sá, João Pedro Cavaco Cancel..."


In [14]:
best_teams.to_csv('data/best_predicted_line up.csv')