In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pulp 
import glob
import os
import Setup

  pd.set_option('display.max_colwidth', -1)


In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 100)
pd.options.mode.chained_assignment = None  # default='warn'

In [3]:
data_path = Setup.github_address
season_folders = Setup.season_folders
season_names = Setup.season_names
this_season = Setup.this_season

# Rules

## Positions
To join the game select a fantasy football squad of 15 players, consisting of:

- 2 Goalkeepers
- 5 Defenders
- 5 Midfielders
- 3 Forwards

## Budget
The total value of your initial squad must not exceed £100 million.

## Players Per Team
You can select up to 3 players from a single Premier League team.

# Pull data

In [14]:
list_of_file_addresses = glob.glob('Data/Input_data/*')
latest_file_address = max(list_of_file_addresses, key=os.path.getctime)
print('Opening ', latest_file_address)

input_df = pd.read_csv(latest_file_address).sort_values(['second_name','first_name','FIX_Fixture_date'])

games_played = input_df.loc[input_df['forecast'] == 0]
last_four_games = input_df.loc[input_df['forecast'] == 0].groupby(['second_name','first_name']).tail(4)

next_four_games = input_df.loc[input_df['forecast'] != 0].groupby(['second_name','first_name']).head(4)
next_game = input_df.loc[input_df['forecast'] != 0].groupby(['second_name','first_name']).head(1)

Opening  Data/Input_data/2020-12-03.csv


In [20]:
data = input_df.groupby(['second_name','first_name','FIX_season']).agg({'position' : 'mean',
                                                    'cost_today' : 'mean'}).reset_index()

data = pd.concat([data,pd.get_dummies(data['position'])], axis = 1)


data = data.rename(columns = {1.0 : 'keeper'
                             ,2.0 : 'defender'
                             ,3.0 : 'mid-field'
                             ,4.0 : 'striker'})

data = data.set_index(['second_name','first_name'])


# Make columns for teams
data['Current_team'] = games_played.groupby(['second_name','first_name'])['FIX_team'].last()
data = pd.concat([data,pd.get_dummies(data['Current_team'], prefix = "TEAM_TRANSFORM")], axis = 1)

# Data from games already played
data['TSS_games_played'] = games_played.groupby(['second_name','first_name'])['FIX_Fixture_date'].count()
data['TSS_average_points_per_game'] = games_played.groupby(['second_name','first_name'])['total_points'].mean()
data['TSS_total_points_this_season'] = games_played.groupby(['second_name','first_name'])['total_points'].sum()
# data['TSS_average_minutes_per_game'] = games_played.groupby(['second_name','first_name'])['minutes'].mean()
data['TSS_rank'] = data['TSS_total_points_this_season'].rank(ascending = False)

# Data from recent games
data['L4W_games_played'] = last_four_games.groupby(['second_name','first_name'])['FIX_Fixture_date'].count()
data['L4W_average_points_per_game'] = last_four_games.groupby(['second_name','first_name'])['total_points'].mean()
data['L4W_total_points'] = last_four_games.groupby(['second_name','first_name'])['total_points'].sum()
# data['L4W_average_minutes_per_game'] = last_four_games.groupby(['second_name','first_name'])['minutes'].mean()


# Points forecasts
data['forecast_points_for_season_sum'] = input_df.groupby(['second_name','first_name'])['forecast_points'].sum()
data['forecast_points_for_season_mean'] = input_df.groupby(['second_name','first_name'])['forecast_points'].mean()

data['forecast_points_N4G_sum'] = next_four_games.groupby(['second_name','first_name'])['forecast_points'].sum()
data['forecast_points_N4G_mean'] = next_four_games.groupby(['second_name','first_name'])['forecast_points'].mean()

data['forecast_points_next_game'] = next_game.groupby(['second_name','first_name'
                                                      ])['forecast_points'].mean()

data = data.loc[data['FIX_season'] == this_season]

data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,FIX_season,position,cost_today,keeper,defender,mid-field,striker,Current_team,TEAM_TRANSFORM_Arsenal,TEAM_TRANSFORM_Aston Villa,TEAM_TRANSFORM_Bournemouth,TEAM_TRANSFORM_Brighton,TEAM_TRANSFORM_Burnley,TEAM_TRANSFORM_Chelsea,TEAM_TRANSFORM_Crystal Palace,TEAM_TRANSFORM_Everton,TEAM_TRANSFORM_Fulham,TEAM_TRANSFORM_Leeds,TEAM_TRANSFORM_Leicester,TEAM_TRANSFORM_Liverpool,TEAM_TRANSFORM_Man City,TEAM_TRANSFORM_Man Utd,TEAM_TRANSFORM_Newcastle,TEAM_TRANSFORM_Norwich,TEAM_TRANSFORM_Sheffield Utd,TEAM_TRANSFORM_Southampton,TEAM_TRANSFORM_Spurs,TEAM_TRANSFORM_Watford,TEAM_TRANSFORM_West Brom,TEAM_TRANSFORM_West Ham,TEAM_TRANSFORM_Wolves,TSS_games_played,TSS_average_points_per_game,TSS_total_points_this_season,TSS_rank,L4W_games_played,L4W_average_points_per_game,L4W_total_points,forecast_points_for_season_sum,forecast_points_for_season_mean,forecast_points_N4G_sum,forecast_points_N4G_mean,forecast_points_next_game
second_name,first_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
Aarons,Rolando,2020-21,3.0,48.0,0,0,1,0,Newcastle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,47,0.0,0.0,1145.0,4,0.0,0.0,3.554589,0.12695,0.596454,0.149113,0.10727
Abraham,Tammy,2020-21,4.0,72.0,0,0,0,1,Chelsea,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,4.042553,190.0,44.5,4,6.0,24.0,117.787417,4.206693,15.648669,3.912167,4.200414
Adams,Che,2020-21,4.0,59.0,0,0,0,1,Southampton,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,47,2.680851,126.0,180.5,4,5.25,21.0,122.318754,4.368527,17.329065,4.332266,4.604894
Adarabioyo,Tosin,2020-21,2.0,45.0,0,1,0,0,Fulham,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,2.2,11.0,843.5,4,2.5,10.0,75.929504,2.711768,9.795768,2.448942,1.565611
Agosto RamÃ­rez,Fabricio,2020-21,1.0,40.0,1,0,0,0,Fulham,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0.0,0.0,1145.0,4,0.0,0.0,3.515821,0.125565,0.439323,0.109831,0.109332


# Exclusions

In [41]:
data.index

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [42]:
data['Exclude'] = 0


data.loc[data['Current_team'] == 'Aston Villa','Exclude'] = 1
data.loc[data['Current_team'] == 'Newcastle','Exclude'] = 1
data.loc['Kane','Exclude'] = 1

data.loc[data['Exclude'] == 1]


Unnamed: 0_level_0,Unnamed: 1_level_0,FIX_season,position,cost_today,keeper,defender,mid-field,striker,Current_team,TEAM_TRANSFORM_Arsenal,TEAM_TRANSFORM_Aston Villa,TEAM_TRANSFORM_Bournemouth,TEAM_TRANSFORM_Brighton,TEAM_TRANSFORM_Burnley,TEAM_TRANSFORM_Chelsea,TEAM_TRANSFORM_Crystal Palace,TEAM_TRANSFORM_Everton,TEAM_TRANSFORM_Fulham,TEAM_TRANSFORM_Leeds,TEAM_TRANSFORM_Leicester,TEAM_TRANSFORM_Liverpool,TEAM_TRANSFORM_Man City,TEAM_TRANSFORM_Man Utd,TEAM_TRANSFORM_Newcastle,TEAM_TRANSFORM_Norwich,TEAM_TRANSFORM_Sheffield Utd,TEAM_TRANSFORM_Southampton,TEAM_TRANSFORM_Spurs,TEAM_TRANSFORM_Watford,TEAM_TRANSFORM_West Brom,TEAM_TRANSFORM_West Ham,TEAM_TRANSFORM_Wolves,TSS_games_played,TSS_average_points_per_game,TSS_total_points_this_season,TSS_rank,L4W_games_played,L4W_average_points_per_game,L4W_total_points,forecast_points_for_season_sum,forecast_points_for_season_mean,forecast_points_N4G_sum,forecast_points_N4G_mean,forecast_points_next_game,Exclude,Player_selected
second_name,first_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
Aarons,Rolando,2020-21,3.0,48.0,0,0,1,0,Newcastle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,47,0.0,0.0,1145.0,4,0.0,0.0,3.554589,0.12695,0.596454,0.149113,0.10727,1,0.0
AlmirÃ³n,Miguel,2020-21,3.0,57.0,0,0,1,0,Newcastle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,47,2.680851,126.0,180.5,4,2.25,9.0,66.244519,2.365876,10.33406,2.583515,2.329564,1,0.0
ApolinÃ¡rio de Lira,Joelinton CÃ¡ssio,2020-21,4.0,57.0,0,0,0,1,Newcastle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,47,2.06383,97.0,293.0,4,3.5,14.0,85.400476,3.050017,11.609024,2.902256,2.728802,1,0.0
Atsu,Christian,2020-21,3.0,52.0,0,0,1,0,Newcastle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,47,0.787234,37.0,645.0,4,0.0,0.0,4.408335,0.157441,0.758835,0.189709,0.149749,1,0.0
Barkley,Ross,2020-21,3.0,59.0,0,0,1,0,Aston Villa,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,44,2.045455,90.0,318.0,4,2.5,10.0,80.917923,2.889926,12.650914,3.162729,3.331021,1,0.0
Bree,James,2020-21,2.0,45.0,0,1,0,0,Aston Villa,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0.0,0.0,1145.0,4,0.0,0.0,4.507108,0.160968,0.798682,0.199671,0.220902,1,0.0
Carroll,Andy,2020-21,4.0,52.0,0,0,0,1,Newcastle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,47,0.957447,45.0,581.0,4,0.75,3.0,16.498098,0.589218,2.932099,0.733025,0.655327,1,0.0
Cash,Matthew,2020-21,2.0,50.0,0,1,0,0,Aston Villa,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,2.875,23.0,739.5,4,2.25,9.0,77.495421,2.767694,12.028433,3.007108,3.252824,1,0.0
Clark,Ciaran,2020-21,2.0,45.0,0,1,0,0,Newcastle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,47,1.191489,56.0,521.5,4,1.75,7.0,71.161411,2.541479,10.270832,2.567708,2.35953,1,0.0
Darlow,Karl,2020-21,1.0,50.0,1,0,0,0,Newcastle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,47,0.93617,44.0,595.0,4,3.5,14.0,99.681174,3.560042,14.96184,3.74046,3.467562,1,0.0


## Choose team from scratch 

In [43]:
def the_draft(data, objective_func):
    
    the_draft = pulp.LpProblem("the_draft", pulp.LpMaximize)
    
    # Decision variable
    player_selected = pulp.LpVariable.dicts("selected",
                                     (name for name in data.index),
                                     cat='Binary')
    
    # Objective function 
    the_draft += pulp.lpSum([data.loc[name, objective_func] * player_selected[name]  for name in data.index])
    
    # Constraints
    # 2 GK
    the_draft += pulp.lpSum([data.loc[name, 'keeper'] * player_selected[name]  for name in data.index]) == 2

    # 5 DEF
    the_draft += pulp.lpSum([data.loc[name, 'defender'] * player_selected[name]  for name in data.index]) == 5

    # 5 MID
    the_draft += pulp.lpSum([data.loc[name, 'mid-field'] * player_selected[name]  for name in data.index]) == 5

    # 3 FOR
    the_draft += pulp.lpSum([data.loc[name, 'striker'] * player_selected[name]  for name in data.index]) == 3

    # Total cost < £100M
    the_draft += pulp.lpSum([data.loc[name, 'cost_today'] * player_selected[name]  for name in data.index]) <= 1000

    # No more than 3 from each team
    for col in [c for c in data.columns if str.startswith(c, 'TEAM_TRANSFORM')]:
        the_draft += pulp.lpSum([data.loc[name, col] * player_selected[name]  for name in data.index]) <=3
        

    # No one who's been excluded (above)
    the_draft += pulp.lpSum([data.loc[name, 'Exclude'] * player_selected[name]  for name in data.index]) == 0 
    
    the_draft.solve()
    print("Optimiser status: ", pulp.LpStatus[the_draft.status])
    
    output = []
    
    for name in data.index:
        var_output ={'Name': name,
                    'Player_selected': player_selected[name].varValue}
        output.append(var_output)
        
    output_df = pd.DataFrame.from_records(output).set_index(['Name'])

    return output_df                 
                     

In [44]:
optimiser_selection = the_draft(data, 'forecast_points_N4G_sum')

Optimiser status:  Optimal


In [45]:
data['Player_selected'] = optimiser_selection['Player_selected']

# The team

In [46]:
team = data.loc[data['Player_selected'] == True]
team = team.sort_values('forecast_points_next_game', ascending = False)
team['Forecast_file'] = latest_file_address
team[['position','L4W_average_points_per_game','forecast_points_N4G_mean']]

Unnamed: 0_level_0,Unnamed: 1_level_0,position,L4W_average_points_per_game,forecast_points_N4G_mean
second_name,first_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Calvert-Lewin,Dominic,4.0,7.0,6.528023
Son,Heung-Min,3.0,4.5,6.317149
Mahrez,Riyad,3.0,6.5,5.813713
Chilwell,Benjamin,2.0,7.25,5.834718
James,Reece,2.0,5.5,5.765249
Borges Fernandes,Bruno Miguel,3.0,10.0,5.996441
Zouma,Kurt,2.0,6.75,5.291801
Vardy,Jamie,4.0,6.25,5.233678
Fabianski,Lukasz,1.0,7.25,5.250856
Adams,Che,4.0,5.25,4.332266


In [None]:
GW = next_game['FIX_GW'].unique()[0]

team.to_csv('Teams/GW_' + str(GW) + '.csv')

In [None]:
team.agg({'cost_today':'sum',
          'keeper': 'sum',
          'defender': 'sum',
          'mid-field': 'sum',
          'striker': 'sum',
          'TSS_average_points_per_game' : 'sum',
          'TSS_average_minutes_per_game' : 'mean',
          'L4W_average_points_per_game' : 'sum',
          'L4W_average_minutes_per_game' : 'mean',
          'forecast_points_for_season_sum' : 'sum',
          'forecast_points_for_season_mean': 'sum',
          'forecast_points_N4G_sum' : 'sum',
          'forecast_points_N4G_mean' : 'sum',
          'forecast_points_next_game': 'sum'})

In [None]:
team['Current_team'].value_counts()

# Look at recent performance of team

In [None]:
list_of_files = glob.glob('Data/Input_data/*')
latest_file = max(list_of_files, key=os.path.getctime)
print('Opening ', latest_file)

games_to_date = pd.read_csv(latest_file)
games_to_date = games_to_date.loc[games_to_date['forecast'] == 0]

In [None]:
def plot_player_performance(df, player_list,season = '2019-20', smooth = False):
    df = df.loc[(df['name'].isin(player_list)) & (df['FIX_season'] == season)]
    
    for player in df['name'].unique():
        
        player_df = df.loc[df['name'] == player]
        
        player_df['points_smoothed'] = player_df['total_points'].transform(lambda x: x.ewm(halflife=10).mean())
        
        season_av = player_df['total_points'].mean()
        
        plt.title(str(player) + ' - season av. =' + str(season_av))
        plt.scatter(player_df['FIX_GW'],player_df['total_points'], marker = 'x')
        plt.plot(player_df['FIX_GW'],player_df['points_smoothed'])
        plt.axhline(season_av, c = 'k', linestyle = ":")

        plt.grid()
        plt.show()

In [None]:
plot_player_performance(games_to_date, team.index.to_list())

In [None]:
ranks_2019 = pd.DataFrame(input_df.loc[input_df['FIX_season'] == '2019-20'].groupby('name').total_points.mean().sort_values(ascending = False))
ranks_2019['2019_rank'] = input_df.loc[input_df['FIX_season'] == '2019-20'].groupby('name').total_points.mean().sort_values(ascending = False).rank(ascending = False,method = 'dense')
ranks_2019['selected'] = np.where(ranks_2019.index.isin(team.index.to_list()), 1 ,0)
ranks_2019['position'] = input_df.sort_values(by = 'FIX_season').groupby('name')['position'].last()
ranks_2019['cost'] = input_df.sort_values(by = 'FIX_season').groupby('name')['cost_today'].last()/10

In [None]:
# ranks_2019.head(20)

In [None]:
print("Average per game with this team",ranks_2019.loc[ranks_2019['selected'] == 1]['total_points'].sum())
ranks_2019.loc[ranks_2019['selected'] == 1].head(50).sort_values('position')

In [None]:
team_file = 'Teams/Current_team.csv'
