In [1]:
# import libraries

import requests
import pandas as pd
import numpy as np
import time

In [2]:
# use FPL API to access FPL data

url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()
json.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

In [3]:
# create dataframes for teams, elements, and element_type data

teams_df = pd.DataFrame(json['teams'])

elements_df = pd.DataFrame(json['elements'])

elements_types_df = pd.DataFrame(json['element_types'])


In [4]:
# inspect each dataframe

In [5]:
teams_df.head()

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,...,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,...,,False,0,1200,1250,1130,1150,1220,1210,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,...,,False,0,1100,1160,1120,1140,1080,1100,2
2,94,0,,3,0,Brentford,0,0,0,BRE,...,,False,0,1010,1020,1020,1030,1020,1030,130
3,36,0,,4,0,Brighton,0,0,0,BHA,...,,False,0,1100,1130,1150,1190,1100,1130,131
4,90,0,,5,0,Burnley,0,0,0,BUR,...,,False,0,1060,1070,1040,1090,1040,1080,43


In [6]:
elements_df.head()

Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,...,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,corners_and_indirect_freekicks_text,direct_freekicks_order,direct_freekicks_text,penalties_order,penalties_text
0,,,80201,0,0,0,0,0,1,2.6,...,230,44,230,44,,,,,,
1,,,115918,0,0,0,0,0,1,1.0,...,16,10,16,10,,,,,,
2,,,47431,0,0,0,0,0,3,2.2,...,460,184,460,184,1.0,,2.0,,,
3,25.0,25.0,54694,0,0,0,0,0,4,0.8,...,521,67,521,67,,,,,1.0,
4,,,58822,0,0,0,0,0,2,1.7,...,61,32,61,32,,,5.0,,,


In [7]:
elements_types_df

Unnamed: 0,id,plural_name,plural_name_short,singular_name,singular_name_short,squad_select,squad_min_play,squad_max_play,ui_shirt_specific,sub_positions_locked,element_count
0,1,Goalkeepers,GKP,Goalkeeper,GKP,2,1,1,True,[12],56
1,2,Defenders,DEF,Defender,DEF,5,3,5,False,[],181
2,3,Midfielders,MID,Midfielder,MID,5,2,5,False,[],223
3,4,Forwards,FWD,Forward,FWD,3,1,3,False,[],70


In [8]:
# further inspect columns of elements dataframe

elements_df.columns

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round', 'code',
       'cost_change_event', 'cost_change_event_fall', 'cost_change_start',
       'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next',
       'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam',
       'news', 'news_added', 'now_cost', 'photo', 'points_per_game',
       'second_name', 'selected_by_percent', 'special', 'squad_number',
       'status', 'team', 'team_code', 'total_points', 'transfers_in',
       'transfers_in_event', 'transfers_out', 'transfers_out_event',
       'value_form', 'value_season', 'web_name', 'minutes', 'goals_scored',
       'assists', 'clean_sheets', 'goals_conceded', 'own_goals',
       'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards',
       'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat',
       'ict_index', 'influence_rank', 'influence_rank_type', 'creativity_rank',
       'creativity_rank_type', 'threat_rank'

In [9]:
# filter out irrelevent columns from elements_df

slim_elements_df = elements_df[['id','first_name','second_name','team','element_type','selected_by_percent','now_cost','minutes','transfers_in','value_season','total_points', 'status']]

# change index

slim_elements_df = slim_elements_df.set_index('id')

# map position from element_types_df into slim_elements_df

slim_elements_df['position'] = slim_elements_df.element_type.map(elements_types_df.set_index('id').singular_name)

# map team name from teams_df into slim_elements_df

slim_elements_df['team'] = slim_elements_df.team.map(teams_df.set_index('id').name)

# ensure all value info is of type int

slim_elements_df['value_season'] = slim_elements_df.value_season.astype(float)

# add points per 90 minutes

slim_elements_df['ppg'] = (slim_elements_df['value_season'] / slim_elements_df['minutes']) * 90

# remove players with less than 18 games played minutes

slim_elements_df = slim_elements_df.loc[slim_elements_df.minutes > 1620]

# add new value metric: points per minute per cost

slim_elements_df['value'] = slim_elements_df['ppg'] / slim_elements_df['now_cost']

In [10]:
# view best value players

slim_elements_df.sort_values(by = 'value', ascending = False).head(10)

Unnamed: 0_level_0,first_name,second_name,team,element_type,selected_by_percent,now_cost,minutes,transfers_in,value_season,total_points,status,position,ppg,value
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1


In [11]:
# calculate mean value per position

pivot=slim_elements_df.pivot_table(index='position',values='value_season',aggfunc=np.mean).reset_index()
pivot.sort_values('value',ascending=False)

KeyError: 'value'

In [None]:
# calculate mean value per team

team_pivot = slim_elements_df.pivot_table(index='team',values='value_season',aggfunc=np.mean).reset_index()
team_pivot.sort_values('value',ascending=False)

In [None]:
# create new dataframes for each position

fwd_df = slim_elements_df.loc[slim_elements_df.position == 'Forward']
mid_df = slim_elements_df.loc[slim_elements_df.position == 'Midfielder']
def_df = slim_elements_df.loc[slim_elements_df.position == 'Defender']
goal_df = slim_elements_df.loc[slim_elements_df.position == 'Goalkeeper']

In [None]:
# histogram of goalkeeper values

goal_df.value.hist()

In [None]:
# most valuable goalkeepers

goal_df.sort_values('value',ascending=False).head(10)

In [None]:
# histogram of defenders

def_df.value.hist()

In [None]:
# most valuable defenders

def_df.sort_values('value',ascending=False).head(10)

In [None]:
# histogram of midfielders

mid_df.value.hist()

In [None]:
# most valuable midfielders

mid_df.sort_values('value',ascending=False).head(10)

In [None]:
# histogram of forwards

fwd_df.value.hist()

In [None]:
# most valuable forwards

fwd_df.sort_values('value',ascending=False).head(10)

In [None]:
def get_money_team(data = slim_elements_df, budget = 1000, num_goal = 2, num_def = 5, num_mid = 5, num_fwd = 3):
    money_team = []
    budget = budget
    positions = {'Goalkeeper':num_goal, 'Defender':num_def, 'Midfielder':num_mid, 'Forward':num_fwd}
    teams = {'Arsenal':3, 'Aston Villa':3, 'Brighton':3, 'Burnley':3, 'Chelsea':3, 
             'Crystal Palace':3, 'Everton':3, 'Leicester':3, 'Leeds':3, 'Liverpool':3, 
             'Man City':3, 'Man Utd':3, 'Newcastle':3, 'Norwich':3, 'Southampton':3, 
             'Spurs':3, 'Watford':3, 'West Ham':3, 'Wolves':3, 'Brentford':3, }
    for idx, player in slim_elements_df.sort_values(by = 'value', ascending = False).iterrows():
        if len(money_team) <= 15 and budget >= player.now_cost and positions[player.position] > 0 and teams[player.team] > 0 and player.status == 'a':    
            money_team.append(player)
            budget -= player.now_cost
            positions[player.position] -= 1
            teams[player.team] -= 1
        else:
            continue      
    
    money_team_df = pd.DataFrame(money_team)
    
    time_start = time.time()
    timeout = 5
    while budget > 0:
        for idx1, player in money_team_df.sort_values(by = 'ppg').iterrows():
            position = player.position
            new_budget = budget + player.now_cost
            for idx2, new_player in slim_elements_df.sort_values(by = 'value', ascending = False).iterrows():
                if new_player.name not in money_team_df.index and new_budget >= new_player.now_cost and new_player.position == player.position and new_player.status == 'a' and (teams[new_player.team] > 0 or new_player.team == player.team) and new_player['ppg'] > player['ppg']:   
                    teams[player.team] += 1
                    money_team_df.drop(pd.DataFrame(money_team_df.loc[player.name]), inplace = True)
                    money_team_df = money_team_df.append(pd.DataFrame(slim_elements_df.loc[new_player.name]).T)
                    budget += player.now_cost
                    budget -= new_player.now_cost
                    teams[new_player.team] -= 1

                    break
                else:
                    continue
        if time.time() > time_start + timeout:
            break
    return money_team_df

In [None]:
def calc_budget(num_def = 5, num_mid = 5, num_fwd = 3):
    
    goal_cost = 40
    def_cost = 40
    mid_cost = 45
    fwd_cost = 45
    
    budget = 1000 - def_cost*(5-num_def) - mid_cost*(5-num_mid) - fwd_cost*(3-num_fwd) - goal_cost
    return budget

In [None]:
money_team_2553 = get_money_team(data = slim_elements_df, budget = 1000, num_goal = 2, num_def = 5, num_mid = 5, num_fwd = 3)
money_team_541 = get_money_team(data = slim_elements_df, budget = 825, num_goal = 1, num_def = 5, num_mid = 4, num_fwd = 1)
money_team_532 = get_money_team(data = slim_elements_df, budget = 825, num_goal = 1, num_def = 5, num_mid = 3, num_fwd = 2)
money_team_451 = get_money_team(data = slim_elements_df, budget = 830, num_goal = 1, num_def = 4, num_mid = 5, num_fwd = 1)
money_team_433 = get_money_team(data = slim_elements_df, budget = 830, num_goal = 1, num_def = 4, num_mid = 3, num_fwd = 3)
money_team_352 = get_money_team(data = slim_elements_df, budget = 835, num_goal = 1, num_def = 3, num_mid = 5, num_fwd = 2)
money_team_343 = get_money_team(data = slim_elements_df, budget = 835, num_goal = 1, num_def = 3, num_mid = 4, num_fwd = 3)

In [None]:
print("2553 total points: " + str(sum(money_team_2553['total_points'])*(11/15)))
print("541 total points: " + str(sum(money_team_541['total_points'])))
print("532 total points: " + str(sum(money_team_532['total_points'])))
print("451 total points: " + str(sum(money_team_451['total_points'])))
print("433 total points: " + str(sum(money_team_433['total_points'])))
print("352 total points: " + str(sum(money_team_352['total_points'])))
print("343 total points: " + str(sum(money_team_343['total_points'])))

In [None]:
print("2553 points per game: " + str(sum(money_team_2553['ppg'])*(11/15)))
print("541 points per game: " + str(sum(money_team_541['ppg'])))
print("532 points per game: " + str(sum(money_team_532['ppg'])))
print("451 points per game: " + str(sum(money_team_451['ppg'])))
print("433 points per game: " + str(sum(money_team_433['ppg'])))
print("352 points per game: " + str(sum(money_team_352['ppg'])))
print("343 points per game: " + str(sum(money_team_343['ppg'])))

In [None]:
money_team_2553

In [None]:
money_team_541

In [None]:
money_team_532

In [None]:
money_team_451

In [None]:
money_team_433

In [None]:
money_team_352

In [None]:
money_team_343