In [13]:
%config IPCompleter.greedy=True

import import_ipynb
from functions import get_raw_data

import glob
import os
import pandas
import sys
import requests

In [14]:
import plotly.graph_objs as go
import plotly.offline

import chart_studio
import chart_studio.plotly as py
#chart_studio .tools.set_credentials_file(username='user', api_key='***')

# Functions

In [15]:
def to_pretty_print(input_str):
    return input_str.replace('_', ' ').capitalize()


def from_pretty_print(input_str):
    return input_str.replace(' ', '_').lower()

In [16]:
def get_aggregate_functions():
    return ['average', 'median', 'sum', 'count', 'min', 'max']


def get_features_for_aggregation():
    return ['assists', 'bonus', 'bps',
            'clean_sheets', 'cost', 'creativity',
            'goals_conceded', 'goals_scored', 'ict_index', 'influence',
            'minutes', 'own_goals',
            'penalties_missed', 'penalties_saved', 'red_cards', 'saves', 'selected',
            'threat', 'total_points', 'transfers_balance',
            'transfers_in', 'transfers_out', 'yellow_cards']


def get_aggregate_features():
    features = get_features_for_aggregation()
    aggregates = get_aggregate_functions()

    features_out = ['name_id', 'id', 'name']
    for feature in features:
        for aggregate in aggregates:
            features_out.append(aggregate + "_" + feature)

    return features_out


def get_detailed_aggregate_data(base_path, season):
    features_in = get_features_for_aggregation()
    features_out = get_aggregate_features()
    features_out.append('cost')

    df_out = pandas.DataFrame(columns=features_out)
    df_out.set_index('id')

    for file in glob.glob(base_path + 'data/' + season + '/players/*/gw.csv'):
        try:
            df_in = pandas.read_csv(file, encoding='latin_1')
            df_in['value'] = df_in['value']/10
            df_in.rename(columns={'value': 'cost'}, inplace=True)

            element_id = df_in['element'][0]
            name_id = file.replace('/', '\\').split('\\')[-2]
            name = name_id[:int(name_id.rfind("_"))]
            name = name.replace("_", " ")

            features_out_dict = {}
            for feature in features_in:
                features_out_dict["average_" + feature] = df_in[feature].mean()
                features_out_dict["median_" + feature] = df_in[feature].median()
                features_out_dict["sum_" + feature] = df_in[feature].sum()
                features_out_dict["count_" + feature] = df_in[feature].count()
                features_out_dict["min_" + feature] = df_in[feature].min()
                features_out_dict["max_" + feature] = df_in[feature].max()


            features_out_dict['cost'] = df_in['cost']    
            features_out_dict['name_id'] = name_id
            features_out_dict['id'] = element_id
            features_out_dict['name'] = name
            df_out.loc[name_id] = pandas.Series(features_out_dict)
        except:
            print(file)
    
    df_out = df_out.fillna(0)
    return df_out

In [17]:
def get_agg_features(features, aggregates):
    agg_features = []
    for feature in features:
        for agg in aggregates:
            agg_features.append(agg + '_' + feature)
    return agg_features


def get_trace(df, x_metrics, y_metrics, color):
    return go.Bar(
        x = df[x_metrics],        
        y = df[y_metrics],
        text = df['name'],        
        #mode = 'markers',        
        marker=dict(color=color),
        hovertemplate = "<b>%{text}</b><br><br>" +
            y_metrics+": %{y:.2f}</br>"+
#             x_metrics+": %{x}</br>"+
            "<extra></extra>")


def get_data(df, x_metrics, y_metrics, color):
    data = []
    for x in x_metrics:
        for y in y_metrics:
            data.append(get_trace(df, x, y, color))
    
    for el in data[1:]:
        el['visible'] = 'legendonly'
    
    return data


def get_layout(df, x_metrics, y_metrics, title, show_dropdown=True):  
    buttons=[]
    i = 0
    for x in x_metrics.keys():
        for y in y_metrics.keys():
            template = [False] * len(y_metrics)
            template[i] = True
            buttons.append(dict(label = y_metrics[y], method = 'update', args = [{'visible': template}]))        
            i+=1
        
    updatemenus = list([
        dict(active=0,
             bgcolor = 'rgba(255,255,255,100)',
             pad = {'r': 0, 't': 10},
             x = 0,
             y = 1.18,
             xanchor = 'left',
             buttons=buttons)])
    
    if show_dropdown==False:
        updatemenus=None
    
    layout = go.Layout(
        hovermode = 'closest',
        showlegend=False,
        updatemenus=updatemenus, 
        modebar={'bgcolor': 'rgba(0,0,0,0)'},
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        xaxis=go.layout.XAxis(
            color='white',
            title=go.layout.xaxis.Title(
                text='',
                font=dict(
                    size=18
                )
            )
        ),
        yaxis=go.layout.YAxis(
            color='white',
            title=go.layout.yaxis.Title(
                text='',
                font=dict(
                    size=18
                )
            )
        )
    )
    return layout


def get_figure(df, x_metrics, y_metrics, title = '', color='rgba(101,255,71, 0.4)', show_dropdown=True):    
    data = get_data(df, x_metrics, y_metrics, color)
    layout = get_layout(df, x_metrics, y_metrics, title, show_dropdown)
    
    return go.Figure(data=data, layout=layout)


def generate_performance_plots(points_dict, df, position, aggregate="average"):
    if aggregate != "" and aggregate[-1] != "_":
        aggregate += "_"

    df = df.copy(deep=True)

    df['achievements'] = get_achievements(df, points_dict)
    df['errors'] = get_errors(df, points_dict)
    df['value'] = df['achievements'] - df['errors']
    df = df[(df['sum_minutes'] > 0)]# & (df['errors'] > 0) & (df['value'] > 0)]
    
    plot3 = get_figure(df, {'web_name':'Name'}, {'value':'Value'}, 'value', 'white', show_dropdown=True)
#     plotly.offline.iplot(plot3)
    chart_studio.plotly.plot(plot3, filename=(position+"value"))
    
    y1 = {'achievements':'Achievements'}
    for a in filter_achievements(points_dict).keys(): y1[aggregate + a]=to_pretty_print(aggregate + a)
    plot1 = get_figure(df, {'web_name':'Name'}, y1, 'achievements', 'white', show_dropdown=True)
#     plotly.offline.iplot(plot1)
    chart_studio.plotly.plot(plot1, filename=(position+"achievements"))

    y2 = {'errors':'Errors'}
    for e in filter_errors(points_dict).keys(): y2[aggregate + e]=to_pretty_print(aggregate + e)
    plot2 = get_figure(df, {'web_name':'Name'}, y2, 'errors', 'white', show_dropdown=True)
#     plotly.offline.iplot(plot2)
    chart_studio.plotly.plot(plot2, filename=(position+"errors"))

    return df

In [18]:
def filter_achievements(points_dict):
    return {k:v for (k,v) in points_dict.items() if  v > 0}


def filter_errors(points_dict):
    return {k:v for (k,v) in points_dict.items() if  v < 0}


def get_errors(df, points_dict, aggregate="average"):
    errors = pandas.Series()
    for e in filter_errors(points_dict):
        tmp = (df[aggregate + "_" + e]*(0-points_dict[e]))
        errors = errors.add(tmp, fill_value=0)
    return errors


def get_achievements(df, points_dict, aggregate="average"):
    achievements = pandas.Series()
    for e in filter_achievements(points_dict):
        tmp = (df[aggregate + '_' + e]*(points_dict[e]))
        achievements = achievements.add(tmp, fill_value=0)
    return achievements

In [19]:
points_metrics = ['goals_scored','assists','own_goals','clean_sheetes','goals_conceded',
                  'penalties_missed','penalties_saved','minutes','yellow_cards']

defensive_metrics = ['clean_sheets','saves','penalties_saved','recoveries','clearances_blocks_interceptions','tackles',
                     'goals_conceded','own_goals','penalties_conceded','errors_leading_to_goal','errors_leading_to_goal_attempt']

creativity_metrics=['assists','big_chances_created','big_chances_missed',
                    'attempted_passes','completed_passes','key_passes','dribbles','open_play_crosses']

attack_metrics=['goals_scored','winning_goals','penalties_missed','target_missed','tackled','offside']

general_metrics=['minutes','red_cards','yellow_cards','fouls',
               'bonus','bps','total_points','ea_index','ict_index','influence','creativity','threat']

other_metrics=['cost','selected','loaned_in','loaned_out','transfers_in','transfers_out','transfers_balance']

points = {'assists':3,
          'own_goals':-2,
          'penalties_missed':-2,
          'minutes':1/45,
          'yellow_cards':-1,
          'red_cards':-3}

# Main

In [20]:
URL = "https://fantasy.premierleague.com/api/bootstrap-static/"
DATA = requests.get(URL).json()
CURR_GW_OBJS = [x for x in DATA['events'] if x['is_current'] == True]
if len(CURR_GW_OBJS) == 0:
    CURR_GW_OBJS = DATA['events']
CURR_GW = CURR_GW_OBJS[-1]['id']
SEASON = '2019-20'
BASE_PATH = '../scraper/'

In [21]:
agg_data = get_detailed_aggregate_data(BASE_PATH, SEASON)
raw_data = get_raw_data(BASE_PATH, SEASON)

../scraper/data/2019-20/players\Muhamed_Bešić_522\gw.csv


In [22]:
raw_data.drop(columns=['name'],inplace=True)
df = pandas.merge(agg_data, raw_data, on='id', how='outer')
df['minutes_points'] = df['minutes']/45
df["web_name_lower"] = df["web_name"].str.lower()
df.sort_values(by="web_name_lower", inplace=True)

In [23]:
goalkeepers = df[df.position == 'Goalkeeper']
defenders = df[df.position == 'Defender']
midfielders = df[df.position == 'Midfielder']
forwards = df[df.position == 'Forward']

In [26]:
game_metrics_gkp = points_metrics + general_metrics + defensive_metrics
game_metrics_gkp = set(game_metrics_gkp)
game_metrics_gkp = get_agg_features(game_metrics_gkp, ['sum','average'])

points_gkp = points.copy()
points_gkp['goals_scored']=6
points_gkp['saves']=0.5
points_gkp['penalties_saved'] = 5
points_gkp['clean_sheets']=4
points_gkp['goals_conceded']=-1

gkp_df = generate_performance_plots(points_gkp, goalkeepers, 'Goalkeeper')

In [27]:
goalkeepers = goalkeepers.copy(deep=True)
goalkeepers['achievements'] = get_achievements(goalkeepers, points_gkp)
goalkeepers['errors'] = get_errors(goalkeepers, points_gkp)
goalkeepers['value'] = goalkeepers['achievements'] - goalkeepers['errors']
goalkeepers[goalkeepers['web_name']=='Alisson']
# goalkeepers

Unnamed: 0,name_id,id,name,average_assists,median_assists,sum_assists,count_assists,min_assists,max_assists,average_bonus,...,value_form,value_season,web_name,yellow_cards,position,minutes_points,web_name_lower,achievements,errors,value
26,Alisson_Ramses Becker_189,189,Alisson Ramses Becker,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.2,Alisson,0,Goalkeeper,0.844444,alisson,0.461111,0.0,0.461111


In [28]:
game_metrics_def = points_metrics + general_metrics + defensive_metrics
game_metrics_def = set(game_metrics_def)
game_metrics_def = get_agg_features(game_metrics_def, ['sum','average'])

points_def = points.copy()
points_def['goals_scored']=6
points_def['clean_sheets']=4
points_def['goals_conceded']=-1

generate_performance_plots(points_def, defenders, 'Defender')

Unnamed: 0,name_id,id,name,average_assists,median_assists,sum_assists,count_assists,min_assists,max_assists,average_bonus,...,value_form,value_season,web_name,yellow_cards,position,minutes_points,web_name_lower,achievements,errors,value
364,Maximillian_Aarons_274,274,Maximillian Aarons,0.000000,0.0,0.0,4.0,0.0,0.0,0.00,...,0.1,0.4,Aarons,2,Defender,8.000000,aarons,2.000000,3.000000,-1.000000
10,Adam_Smith_63,63,Adam Smith,0.000000,0.0,0.0,4.0,0.0,0.0,0.00,...,0.3,1.1,Adam Smith,1,Defender,6.888889,adam smith,1.722222,2.000000,-0.277778
390,Nathan_Aké_59,59,Nathan Aké,0.000000,0.0,0.0,4.0,0.0,0.0,0.00,...,0.2,1.0,Aké,1,Defender,8.000000,aké,2.000000,2.250000,-0.250000
494,Toby_Alderweireld_331,331,Toby Alderweireld,0.000000,0.0,0.0,4.0,0.0,0.0,0.00,...,0.3,1.1,Alderweireld,0,Defender,8.000000,alderweireld,2.000000,1.500000,0.500000
500,Trent_Alexander-Arnold_182,182,Trent Alexander-Arnold,0.750000,1.0,3.0,4.0,0.0,1.0,0.50,...,0.8,3.1,Alexander-Arnold,1,Defender,8.000000,alexander-arnold,5.250000,1.000000,4.250000
334,Marcos_Alonso_103,103,Marcos Alonso,0.000000,0.0,0.0,4.0,0.0,0.0,0.00,...,0.0,0.2,Alonso,0,Defender,0.022222,alonso,0.005556,0.000000,0.005556
110,César_Azpilicueta_105,105,César Azpilicueta,0.250000,0.0,1.0,4.0,0.0,1.0,0.00,...,0.3,1.2,Azpilicueta,0,Defender,8.000000,azpilicueta,2.750000,2.250000,0.500000
168,Fabián_Balbuena_382,382,Fabián Balbuena,0.000000,0.0,0.0,4.0,0.0,0.0,0.00,...,0.0,-0.2,Balbuena,1,Defender,2.000000,balbuena,0.500000,1.500000,-1.000000
185,George_Baldock_294,294,George Baldock,0.250000,0.0,1.0,4.0,0.0,1.0,0.75,...,0.8,3.3,Baldock,1,Defender,8.000000,baldock,3.750000,1.500000,2.250000
98,Chris_Basham_423,423,Chris Basham,0.000000,0.0,0.0,4.0,0.0,0.0,0.00,...,0.6,2.2,Basham,0,Defender,7.355556,basham,2.838889,1.250000,1.588889


In [29]:
game_metrics_mid = points_metrics + general_metrics + creativity_metrics + attack_metrics
game_metrics_mid = set(game_metrics_mid)

game_metrics_mid = get_agg_features(game_metrics_mid, ['sum','average'])

points_mid = points.copy()
points_mid['goals_scored']=5
points_mid['clean_sheets']=1
points_mid['goals_conceded']=0

generate_performance_plots(points_mid, midfielders, 'Midfielder')

Unnamed: 0,name_id,id,name,average_assists,median_assists,sum_assists,count_assists,min_assists,max_assists,average_bonus,...,value_form,value_season,web_name,yellow_cards,position,minutes_points,web_name_lower,achievements,errors,value
338,Marc_Albrighton_174,174,Marc Albrighton,0.00,0.0,0.0,4.0,0.0,0.0,0.00,...,0.1,0.6,Albrighton,1,Midfielder,1.933333,albrighton,0.483333,0.25,0.233333
58,Bamidele_Alli_344,344,Bamidele Alli,0.00,0.0,0.0,4.0,0.0,0.0,0.00,...,0.0,0.1,Alli,0,Midfielder,0.666667,alli,0.166667,0.00,0.166667
375,Miguel_Almirón_266,266,Miguel Almirón,0.00,0.0,0.0,4.0,0.0,0.0,0.00,...,0.3,1.4,Almirón,1,Midfielder,8.000000,almirón,2.250000,0.25,2.000000
207,Ibrahim_Amadou_507,507,Ibrahim Amadou,0.00,0.0,0.0,4.0,0.0,0.0,0.00,...,0.0,0.2,Amadou,0,Midfielder,1.200000,amadou,0.300000,0.00,0.300000
36,André Filipe_Tavares Gomes_422,422,André Filipe Tavares Gomes,0.00,0.0,0.0,4.0,0.0,0.0,0.00,...,0.3,1.1,André Gomes,2,Midfielder,7.000000,andré gomes,2.000000,0.50,1.500000
373,Michail_Antonio_389,389,Michail Antonio,0.25,0.0,1.0,4.0,0.0,1.0,0.00,...,0.2,0.9,Antonio,0,Midfielder,2.777778,antonio,1.444444,0.00,1.444444
484,Stuart_Armstrong_323,323,Stuart Armstrong,0.00,0.0,0.0,4.0,0.0,0.0,0.00,...,0.1,0.4,Armstrong,1,Midfielder,1.044444,armstrong,0.261111,0.25,0.011111
91,Christian_Atsu_267,267,Christian Atsu,0.25,0.0,1.0,4.0,0.0,1.0,0.00,...,0.4,1.5,Atsu,0,Midfielder,3.422222,atsu,1.855556,0.00,1.855556
450,Ross_Barkley_117,117,Ross Barkley,0.00,0.0,0.0,4.0,0.0,0.0,0.00,...,0.2,0.7,Barkley,0,Midfielder,4.577778,barkley,1.144444,0.00,1.144444
199,Harvey_Barnes_172,172,Harvey Barnes,0.00,0.0,0.0,4.0,0.0,0.0,0.00,...,0.4,1.5,Barnes,0,Midfielder,2.777778,barnes,1.944444,0.00,1.944444


In [30]:
game_metrics_fwd = points_metrics + general_metrics + creativity_metrics + attack_metrics
game_metrics_fwd = set(game_metrics_fwd)
game_metrics_fwd = get_agg_features(game_metrics_fwd, ['sum', 'average'])

points_fwd = points.copy()
points_fwd['goals_scored'] = 4
points_fwd['clean_sheets'] = 0
points_fwd['goals_conceded'] = 0

generate_performance_plots(points_fwd, forwards, 'Forward')

Unnamed: 0,name_id,id,name,average_assists,median_assists,sum_assists,count_assists,min_assists,max_assists,average_bonus,...,value_form,value_season,web_name,yellow_cards,position,minutes_points,web_name_lower,achievements,errors,value
488,Tammy_Abraham_460,460,Tammy Abraham,0.0,0.0,0.0,4.0,0.0,0.0,1.5,...,1.0,3.9,Abraham,1,Forward,5.577778,abraham,5.394444,0.25,5.144444
90,Che_Adams_437,437,Che Adams,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.3,1.4,Adams,0,Forward,6.2,adams,1.55,0.0,1.55
471,Sergio_Agüero_210,210,Sergio Agüero,0.25,0.0,1.0,4.0,0.0,1.0,1.5,...,0.8,3.3,Agüero,0,Forward,5.911111,agüero,8.227778,0.0,8.227778
174,Florin_Andone_46,46,Florin Andone,0.0,0.0,0.0,4.0,0.0,0.0,0.25,...,0.2,1.0,Andone,0,Forward,1.577778,andone,1.394444,0.75,0.644444
423,Pierre-Emerick_Aubameyang_11,11,Pierre-Emerick Aubameyang,0.25,0.0,1.0,4.0,0.0,1.0,1.0,...,0.6,2.4,Aubameyang,1,Forward,8.0,aubameyang,5.75,0.25,5.5
268,Jordan_Ayew_468,468,Jordan Ayew,0.0,0.0,0.0,4.0,0.0,0.0,1.5,...,1.0,4.0,Ayew,0,Forward,4.955556,ayew,3.238889,0.0,3.238889
50,Ashley_Barnes_90,90,Ashley Barnes,0.0,0.0,0.0,4.0,0.0,0.0,1.0,...,1.0,4.0,Barnes,1,Forward,7.155556,barnes,5.788889,0.25,5.538889
374,Michy_Batshuayi_461,461,Michy Batshuayi,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.1,Batshuayi,0,Forward,0.155556,batshuayi,0.038889,0.0,0.038889
92,Christian_Benteke_129,129,Christian Benteke,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.3,1.0,Benteke,0,Forward,4.244444,benteke,1.061111,0.0,1.061111
80,Callum_Wilson_67,67,Callum Wilson,0.75,1.0,3.0,4.0,0.0,1.0,0.0,...,0.6,2.6,Callum Wilson,1,Forward,7.488889,callum wilson,5.122222,0.25,4.872222
