In [52]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import linear_model

In [53]:
pd.get_option("display.max_columns")
pd.set_option("display.max_columns", None)
pd.options.mode.chained_assignment = None

In [54]:
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()

In [55]:
elements_df = pd.DataFrame(json['elements'])
elements_types_df = pd.DataFrame(json['element_types'])
teams_df = pd.DataFrame(json['teams'])

In [154]:
elements_types_df.head()

Unnamed: 0,id,plural_name,plural_name_short,singular_name,singular_name_short,squad_select,squad_min_play,squad_max_play,ui_shirt_specific,sub_positions_locked,element_count
0,1,Goalkeepers,GKP,Goalkeeper,GKP,2,1,1,True,[12],78
1,2,Defenders,DEF,Defender,DEF,5,3,5,False,[],254
2,3,Midfielders,MID,Midfielder,MID,5,2,5,False,[],322
3,4,Forwards,FWD,Forward,FWD,3,1,3,False,[],92


In [56]:
slim_elements_df = elements_df[['id','first_name','second_name','team','element_type'
                                ,'selected_by_percent','now_cost','minutes','transfers_in'
                                ,'value_season','total_points'
                               , 'influence','creativity','threat']]
slim_elements_df['position'] = slim_elements_df.element_type.map(elements_types_df.set_index('id').singular_name)
slim_elements_df['team_name'] = slim_elements_df.team.map(teams_df.set_index('id').name)
slim_elements_df['value'] = slim_elements_df.value_season.astype(float)
slim_elements_df['selected_by_percent'] = slim_elements_df.selected_by_percent.astype(float)
slim_elements_df = slim_elements_df.loc[slim_elements_df.value > 0]
slim_elements_df.sort_values('selected_by_percent',ascending=False).head(15)

Unnamed: 0,id,first_name,second_name,team,element_type,selected_by_percent,now_cost,minutes,transfers_in,value_season,total_points,influence,creativity,threat,position,team_name,value
470,318,Erling,Haaland,13,4,85.2,122,1860,5583326,15.0,183,978.4,228.5,1300.0,Forward,Man City,15.0
529,357,Kieran,Trippier,15,2,65.8,61,2017,6544905,23.9,146,591.4,1185.4,81.0,Defender,Newcastle,23.9
494,335,Marcus,Rashford,14,3,52.3,73,1954,7703064,20.7,151,660.0,292.4,973.0,Midfielder,Man Utd,20.7
4,7,Martin,Ødegaard,1,3,37.8,70,1868,5146033,18.7,131,572.6,744.3,600.0,Midfielder,Arsenal,18.7
645,427,Harry,Kane,18,4,37.3,118,2146,5353588,13.1,155,797.2,515.4,1273.0,Forward,Spurs,13.1
10,13,Bukayo,Saka,1,3,35.5,84,1984,4561484,15.7,132,656.4,721.6,750.0,Midfielder,Arsenal,15.7
455,301,Kevin,De Bruyne,13,3,34.9,124,1860,6059203,10.1,125,636.4,1141.3,500.0,Midfielder,Man City,10.1
21,26,William,Saliba,1,2,34.4,53,2055,5269745,18.3,97,452.4,69.2,105.0,Defender,Arsenal,18.3
15,19,Gabriel,Martinelli Silva,1,3,34.0,65,1825,5733475,17.5,114,470.0,539.2,858.0,Midfielder,Arsenal,17.5
116,80,Ivan,Toney,4,4,28.9,77,1881,8221388,15.8,122,632.4,290.4,880.0,Forward,Brentford,15.8


In [57]:
pivot = slim_elements_df.pivot_table(index='position',values='value',aggfunc=np.mean).reset_index()
pivot.sort_values('value',ascending=False)

Unnamed: 0,position,value
2,Goalkeeper,12.28
0,Defender,7.214286
3,Midfielder,6.415918
1,Forward,5.597059


In [58]:
team_pivot = slim_elements_df.pivot_table(index='team_name',values='value',aggfunc=np.mean).reset_index()
team_pivot.sort_values('value',ascending=False)

Unnamed: 0,team_name,value
0,Arsenal,10.156522
14,Newcastle,9.654167
12,Man City,8.609524
3,Brentford,8.388
4,Brighton,8.047826
13,Man Utd,7.938462
6,Crystal Palace,7.373913
8,Fulham,7.317241
1,Aston Villa,7.134783
17,Spurs,6.888889


In [59]:
fwd_df = slim_elements_df.loc[slim_elements_df.position == 'Forward']
mid_df = slim_elements_df.loc[slim_elements_df.position == 'Midfielder']
def_df = slim_elements_df.loc[slim_elements_df.position == 'Defender']
goal_df = slim_elements_df.loc[slim_elements_df.position == 'Goalkeeper']

In [60]:
goal_df.sort_values('value',ascending=False).head(15)

Unnamed: 0,id,first_name,second_name,team,element_type,selected_by_percent,now_cost,minutes,transfers_in,value_season,total_points,influence,creativity,threat,position,team_name,value
117,81,David,Raya Martin,4,1,9.9,47,2070,1491656,22.8,107,721.4,20.1,0.0,Goalkeeper,Brentford,22.8
304,2,Bernd,Leno,9,1,3.0,45,1980,384444,21.6,97,730.4,10.0,0.0,Goalkeeper,Fulham,21.6
486,327,David,De Gea Quintana,14,1,10.3,50,2160,1268161,19.4,97,528.8,10.0,0.0,Goalkeeper,Man Utd,19.4
548,376,Nick,Pope,15,1,22.7,55,2001,3651542,19.1,105,416.4,0.0,0.0,Goalkeeper,Newcastle,19.1
348,254,Danny,Ward,10,1,27.8,40,2070,2248493,19.0,76,544.2,0.0,0.0,Goalkeeper,Leicester,19.0
229,152,Vicente,Guaita,7,1,2.0,44,2070,486856,18.9,83,600.2,0.0,0.0,Goalkeeper,Crystal Palace,18.9
711,478,José,Malheiro de Sá,20,1,5.6,50,2070,868619,18.4,92,525.2,0.0,0.0,Goalkeeper,Wolves,18.4
12,15,Aaron,Ramsdale,1,1,12.1,49,2070,1379433,18.2,89,418.4,0.0,0.0,Goalkeeper,Arsenal,18.2
424,281,Alisson,Ramses Becker,12,1,15.1,54,1980,1408577,18.1,98,638.4,10.1,0.0,Goalkeeper,Liverpool,18.1
579,398,Dean,Henderson,16,1,6.6,46,1620,1417234,17.8,82,474.4,0.0,0.0,Goalkeeper,Nott'm Forest,17.8


In [61]:
def_df.sort_values('total_points',ascending=False).head(15)

Unnamed: 0,id,first_name,second_name,team,element_type,selected_by_percent,now_cost,minutes,transfers_in,value_season,total_points,influence,creativity,threat,position,team_name,value
529,357,Kieran,Trippier,15,2,65.8,61,2017,6544905,23.9,146,591.4,1185.4,81.0,Defender,Newcastle,23.9
538,366,Fabian,Schär,15,2,15.5,52,1964,3046251,19.6,102,425.2,157.7,350.0,Defender,Newcastle,19.6
21,26,William,Saliba,1,2,34.4,53,2055,5269745,18.3,97,452.4,69.2,105.0,Defender,Arsenal,18.3
140,526,Ben,Mee,4,2,12.5,50,1928,2155727,19.0,95,484.8,35.4,183.0,Defender,Brentford,19.0
13,16,Gabriel,dos Santos Magalhães,1,2,13.5,52,2070,2046751,17.7,92,475.6,62.0,264.0,Defender,Arsenal,17.7
530,358,Dan,Burn,15,2,3.6,46,1944,619541,20.0,92,334.4,143.2,164.0,Defender,Newcastle,20.0
7,10,Benjamin,White,1,2,12.9,47,1768,2073921,19.1,90,348.2,299.2,61.0,Defender,Arsenal,19.1
491,332,Luke,Shaw,14,2,26.0,54,1661,3551145,16.7,90,375.2,347.7,91.0,Defender,Man Utd,16.7
549,377,Sven,Botman,15,2,5.2,45,1777,926649,19.8,89,349.8,85.4,187.0,Defender,Newcastle,19.8
307,201,Tim,Ream,9,2,3.1,46,2157,419466,18.3,84,463.6,32.6,72.0,Defender,Fulham,18.3


In [62]:
mid_df.sort_values('total_points',ascending=False).head(15)

Unnamed: 0,id,first_name,second_name,team,element_type,selected_by_percent,now_cost,minutes,transfers_in,value_season,total_points,influence,creativity,threat,position,team_name,value
494,335,Marcus,Rashford,14,3,52.3,73,1954,7703064,20.7,151,660.0,292.4,973.0,Midfielder,Man Utd,20.7
10,13,Bukayo,Saka,1,3,35.5,84,1984,4561484,15.7,132,656.4,721.6,750.0,Midfielder,Arsenal,15.7
4,7,Martin,Ødegaard,1,3,37.8,70,1868,5146033,18.7,131,572.6,744.3,600.0,Midfielder,Arsenal,18.7
455,301,Kevin,De Bruyne,13,3,34.9,124,1860,6059203,10.1,125,636.4,1141.3,500.0,Midfielder,Man City,10.1
541,369,Miguel,Almirón Rejala,15,3,26.8,56,1879,5606852,22.3,125,512.6,284.1,701.0,Midfielder,Newcastle,22.3
426,283,Mohamed,Salah,12,3,24.6,126,1956,3855262,9.7,122,498.6,511.4,992.0,Midfielder,Liverpool,9.7
492,333,Bruno,Borges Fernandes,14,3,10.0,98,2069,2040586,11.6,114,644.8,935.3,589.0,Midfielder,Man Utd,11.6
15,19,Gabriel,Martinelli Silva,1,3,34.0,65,1825,5733475,17.5,114,470.0,539.2,858.0,Midfielder,Arsenal,17.5
155,107,Solly,March,5,3,5.9,51,1900,1145832,20.6,105,505.6,549.9,648.0,Midfielder,Brighton,20.6
355,261,James,Maddison,10,3,4.8,81,1362,3167164,12.1,98,532.8,516.3,498.0,Midfielder,Leicester,12.1


In [63]:
fwd_df.sort_values('total_points',ascending=False).head(15)

Unnamed: 0,id,first_name,second_name,team,element_type,selected_by_percent,now_cost,minutes,transfers_in,value_season,total_points,influence,creativity,threat,position,team_name,value
470,318,Erling,Haaland,13,4,85.2,122,1860,5583326,15.0,183,978.4,228.5,1300.0,Forward,Man City,15.0
645,427,Harry,Kane,18,4,37.3,118,2146,5353588,13.1,155,797.2,515.4,1273.0,Forward,Spurs,13.1
116,80,Ivan,Toney,4,4,28.9,77,1881,8221388,15.8,122,632.4,290.4,880.0,Forward,Brentford,15.8
44,40,Ollie,Watkins,2,4,4.7,72,1795,935315,13.1,94,422.4,257.4,745.0,Forward,Aston Villa,13.1
316,210,Aleksandar,Mitrović,9,4,21.9,69,1636,7602721,12.6,87,471.0,207.6,1030.0,Forward,Fulham,12.6
131,95,Bryan,Mbeumo,4,4,3.2,58,1747,891682,14.1,82,335.8,418.3,511.0,Forward,Brentford,14.1
575,394,Brennan,Johnson,16,4,2.2,57,1885,409038,13.3,76,302.6,240.2,492.0,Forward,Nott'm Forest,13.3
616,411,Che,Adams,17,4,1.3,62,1574,506105,11.8,73,277.0,277.9,477.0,Forward,Southampton,11.8
528,356,Callum,Wilson,15,4,3.1,70,1153,2438112,10.1,71,307.6,158.9,593.0,Forward,Newcastle,10.1
422,279,Roberto,Firmino,12,4,3.0,80,969,1820663,8.9,71,410.2,267.5,642.0,Forward,Liverpool,8.9


# Data Engineering

### Fixed Variables

In [137]:
gameweek_gone = 24
time = 6
future_gameweeks = 6

### Completed Fixtures, Minutes, and Points 

In [98]:
df = []
for j,k in enumerate(range(gameweek_gone)):
        url_gameweek = 'https://fantasy.premierleague.com/api/event/'+str(k+1)+'/live/'
        r = requests.get(url_gameweek)
        data = r.json()
        
        explain = [data['elements'][i]['explain'] for i in range(len(data['elements']))]
        game_count = [len(games) for games in explain]
        player_ids = [ele['id'] for ele in data['elements']]
        
        id_repeats = []
        for i in range(len(game_count)):
            id_repeats.append([player_ids[i]] * game_count[i])

        player_ids = [val for sublist in id_repeats for val in sublist]

        fix_ids = [ele['fixture'] for elem in explain for ele in elem]
        minutes = [ele['stats'][0]['value'] for elem in explain for ele in elem]
        points = [ele['stats'] for elem in explain for ele in elem]

        points_fix = []
        for i in points:
            total_points = 0
            for j in i:
                total_points += j['points']
            points_fix.append(total_points)

        fix_df = list(zip(player_ids, fix_ids, minutes, points_fix))
        df.append(fix_df)
        
tuples = [ele for elem in df for ele in elem]
df_final = pd.DataFrame(tuples, columns=['player_id','fix_id','minutes','points'])

In [99]:
df_final.head()

Unnamed: 0,player_id,fix_id,minutes,points
0,1,1,0,0
1,2,4,0,0
2,3,1,90,2
3,4,1,0,0
4,5,1,0,0


### Fixture Events

In [100]:
url_fixtures = 'https://fantasy.premierleague.com/api/fixtures/'
r_fixtures = requests.get(url_fixtures)
fixtures = r_fixtures.json()

In [101]:
match_events = [ele['stats'] for ele in fixtures]

def match_events_search(variable):
    events_list = []
    for num, match in enumerate(match_events):
        for event in match:
            if event['identifier'] == variable:
                events = []
                for k in event['a']:
                    events.append((fixtures[num]['id'], k['element'], k['value'], variable))
                for p in event['h']:
                    events.append((fixtures[num]['id'], p['element'], p['value'], variable))
                if events != []:
                    events_list.append(events)
    return [ele for elem in events_list for ele in elem]

events = ['goals_scored', 'assists', 'red_cards', 'yellow_cards', 'bonus', 'saves']
event_data = []

for i in events:
    event_data += match_events_search(i)
    
event_df_long = pd.DataFrame(event_data, columns = ['fix_id', 'player_id', 'count', 'variable'])

In [102]:
event_df = event_df_long.pivot(index=['player_id', 'fix_id'], columns='variable', values='count').reset_index().fillna(0)

### Useful Functions

In [103]:
def label_team_join (row, team):
    if row['home_team'] != team :
        return row['home_team']
    return row['away_team']

def home_or_away (row, team):
    if row['home_team'] != team :
        return 'away'
    return 'home'

def is_home (row, team):
    if row['home_team'] != team :
        return 1
    return 0

def below_half (row):
    if row['game_diff'] > 10 :
        return 1
    return 0

def brace_or_more (row):
    if row['goals_scored'] >= 2:
        return 1
    return 0

def lag_avg_shift(col, time=6):
    play_df[str(time) + 'G_avg_' + col] = list(play_df.groupby(['player_id']).rolling(time)[col].mean())
    play_df[str(time) + 'G_avg_' + col] = play_df.groupby(['player_id'])[str(time) + 'G_avg_' + col].shift(1)

def lag_sum_shift(col, time=6):
    play_df[str(time) + 'G_sum_' + col] = list(play_df.groupby(['player_id']).rolling(time)[col].sum())
    play_df[str(time) + 'G_sum_' + col] = play_df.groupby(['player_id'])[str(time) + 'G_sum_' + col].shift(1)
    
def lag_avg(col, time=6):
    upcom_df[str(time) + 'G_avg_' + col] = list(upcom_df.groupby(['player_id']).rolling(time)[col].mean())

def lag_sum(col, time=6):
    upcom_df[str(time) + 'G_sum_' + col] = list(upcom_df.groupby(['player_id']).rolling(time)[col].sum())
    
def goal_diff (row, team):
    if row['Home Team'] != team :
        return row['Away Goals'] - row['Home Goals']
    return row['Home Goals'] - row['Away Goals']

def goals_for (row, team):
    if row['Home Team'] != team :
        return row['Away Goals']
    return row['Home Goals']

def points (row):
    if row['Goal Diff'] > 0 :
        return 3
    elif row['Goal Diff'] == 0:
        return 1
    return 0

### League Table Construction

In [104]:
results = [[i['team_h'], i['team_a'],i['event'],i['team_h_score'],i['team_a_score']] for i in fixtures if i['started'] == True]
results = pd.DataFrame(results,columns=['Home Team','Away Team','Game Week','Home Goals','Away Goals'])
season_results = pd.DataFrame(columns=['Team','Game Week','Goals For','Goal Diff'])

for i in range(1,21,1):
    team = i
    team_results = results[(results['Home Team'] == team) | (results['Away Team'] == team)]
    if team_results.shape[0] == 0:
        continue
    team_results['Team'] = team
    team_results['Goal Diff'] = team_results.apply(lambda row: goal_diff(row, team), axis=1)
    team_results['Goals For'] = team_results.apply(lambda row: goals_for(row, team), axis=1)
    loop_dat = team_results[['Team', 'Game Week', 'Goals For','Goal Diff']]
    season_results = pd.concat([season_results, loop_dat])
    

season_results['Goals Against'] = season_results['Goals For'] - season_results['Goal Diff']
season_results['Points'] = season_results.apply(lambda row: points(row), axis=1)
season_results[["Goals For", "Goal Diff", 'Goals Against']] = season_results[["Goals For", "Goal Diff", 'Goals Against']].apply(pd.to_numeric)

In [105]:
team_names = teams_df[['id','name']]
league_table = season_results.groupby(by=['Team']).sum()
league_table = pd.merge(league_table, team_names, left_on='Team', right_on='id', how='left')
league_table = league_table.rename({'name': 'Team'}, axis=1)
league_table = league_table.sort_values(['Points', 'Goal Diff', 'Goals For'], ascending=[False, False, False])

In [106]:
league_table

Unnamed: 0,Goals For,Goal Diff,Goals Against,Points,id,Team
0,51,28,23,54,1,Arsenal
12,60,36,24,52,13,Man City
13,41,13,28,49,14,Man Utd
17,44,9,35,42,18,Spurs
14,35,20,15,41,15,Newcastle
8,35,5,30,38,9,Fulham
4,39,10,29,35,5,Brighton
11,38,10,28,35,12,Liverpool
3,37,7,30,35,4,Brentford
5,23,0,23,31,6,Chelsea


In [107]:
team_diff_curr_season = pd.DataFrame({'Team_id':league_table.id,'Current Season Diff': range(1,21,1)
                                      , 'team_name':league_table.Team})

## Modelling Data

In [108]:
ids = [(ele['id'], ele['kickoff_time'], ele['team_h'], ele['team_a']) for ele in fixtures]
fix_start_df = pd.DataFrame(ids, columns = ['fix_id', 'start_time','home_team','away_team'])
player_info = slim_elements_df[['id', 'first_name', 'second_name', 'position'
                                , 'team_name', 'team'
                               , 'influence','creativity','threat']]
player_info[['influence','creativity','threat']] = player_info[['influence','creativity','threat']].apply(pd.to_numeric)

play_df = df_final.merge(fix_start_df, on='fix_id')
play_df = play_df.merge(player_info, left_on='player_id', right_on='id').drop('id', axis=1)
play_df['playing_team'] = play_df.apply(lambda row: label_team_join(row, row['team']), axis=1).astype(object)
play_df['home_or_away'] = play_df.apply(lambda row: home_or_away(row, row['team']), axis=1)
play_df = play_df.merge(team_diff_curr_season, left_on='playing_team', right_on='Team_id').drop('Team_id', axis=1).rename({'team_name_y':'playing_team_name'
                                                                                                                           ,'team_name_x':'team_name'
                                                                                                                           ,'Current Season Diff': "game_diff"},axis=1)
play_df = play_df.merge(team_diff_curr_season, left_on='team', right_on='Team_id').drop(['Team_id','team_name_y'], axis=1).rename({'team_name_x':'team_name'
                                                                                                                           ,'Current Season Diff': "team_qual"},axis=1)
play_df = play_df.merge(event_df, how='left', on=['player_id','fix_id']).fillna(0)
play_df['start_time'] = pd.to_datetime(play_df['start_time'])

In [109]:
play_build_df = play_df.set_index('start_time')
play_build_df.columns

Index(['player_id', 'fix_id', 'minutes', 'points', 'home_team', 'away_team',
       'first_name', 'second_name', 'position', 'team_name', 'team',
       'influence', 'creativity', 'threat', 'playing_team', 'home_or_away',
       'game_diff', 'playing_team_name', 'team_qual', 'assists', 'bonus',
       'goals_scored', 'red_cards', 'saves', 'yellow_cards'],
      dtype='object')

In [110]:
play_df = play_build_df.sort_values(by = ['player_id', 'start_time'], ascending = [True, True])

play_df['below_half'] = play_df.apply(lambda row: below_half(row), axis=1)
play_df['brace_or_more'] = play_df.apply(lambda row: brace_or_more(row), axis=1)

    
play_df = pd.get_dummies(play_df, columns=['position'])
play_df = play_df.drop('position_Goalkeeper', axis=1)

play_df = pd.get_dummies(play_df, columns=['home_or_away'])
play_df = play_df.drop('home_or_away_away', axis=1)
play_df = play_df.rename(columns={'home_or_away_home': 'is_home_game'})

avg_columns = ['points', 'minutes', 'bonus', 'goals_scored', 'assists', 'saves', 'game_diff']
time = 6
for i in avg_columns:   
    lag_avg_shift(i, time)
    
sum_columns = ['below_half', 'is_home_game', 'brace_or_more']
for i in sum_columns:
    lag_sum_shift(i, time)
    
qual_cols = [str(time)+'G_avg_goals_scored', str(time)+'G_avg_assists']

for i in qual_cols:
    play_df[i + '_qual'] = play_df[i] / (play_df[str(time)+'G_sum_below_half'] + 1)

play_df['qual_diff'] = play_df['game_diff'] - play_df['team_qual']

play_df.head()

Unnamed: 0_level_0,player_id,fix_id,minutes,points,home_team,away_team,first_name,second_name,team_name,team,influence,creativity,threat,playing_team,game_diff,playing_team_name,team_qual,assists,bonus,goals_scored,red_cards,saves,yellow_cards,below_half,brace_or_more,position_Defender,position_Forward,position_Midfielder,is_home_game,6G_avg_points,6G_avg_minutes,6G_avg_bonus,6G_avg_goals_scored,6G_avg_assists,6G_avg_saves,6G_avg_game_diff,6G_sum_below_half,6G_sum_is_home_game,6G_sum_brace_or_more,6G_avg_goals_scored_qual,6G_avg_assists_qual,qual_diff
start_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1
2022-08-05 19:00:00+00:00,1,1,0,0,7,1,Cédric,Alves Soares,Fulham,9,2.8,1.7,0.0,7,12,Crystal Palace,6,0.0,0.0,0.0,0.0,0.0,0.0,1,0,1,0,0,0,,,,,,,,,,,,,6
2022-08-13 14:00:00+00:00,1,11,0,0,1,10,Cédric,Alves Soares,Fulham,9,2.8,1.7,0.0,1,1,Arsenal,6,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0,0,,,,,,,,,,,,,-5
2022-08-20 16:30:00+00:00,1,21,0,0,3,1,Cédric,Alves Soares,Fulham,9,2.8,1.7,0.0,3,17,Bournemouth,6,0.0,0.0,0.0,0.0,0.0,0.0,1,0,1,0,0,0,,,,,,,,,,,,,11
2022-08-27 16:30:00+00:00,1,31,0,0,1,9,Cédric,Alves Soares,Fulham,9,2.8,1.7,0.0,1,1,Arsenal,6,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0,0,,,,,,,,,,,,,-5
2022-08-31 18:30:00+00:00,1,41,0,0,1,2,Cédric,Alves Soares,Fulham,9,2.8,1.7,0.0,1,1,Arsenal,6,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0,0,,,,,,,,,,,,,-5


In [111]:
model_data = play_df[['player_id', 'fix_id','points', 'qual_diff', 'game_diff', 'team_qual','below_half'
                     ,'influence', 'creativity', 'threat'
                     ,'position_Defender', 'position_Forward', 'position_Midfielder'
                     ,'is_home_game', str(time)+'G_avg_points', str(time)+'G_avg_minutes', str(time)+'G_avg_bonus'
                     , str(time)+'G_avg_goals_scored', str(time)+'G_avg_assists', str(time)+'G_avg_goals_scored_qual'
                     , str(time)+'G_avg_assists_qual', str(time)+'G_sum_brace_or_more']].dropna()

## Model Training

In [112]:
mids_fwds = model_data
X = mids_fwds.drop(['points'
                    ,'position_Midfielder','position_Defender', 'game_diff', 'team_qual'
                    , str(time)+'G_avg_bonus', str(time)+'G_avg_goals_scored', str(time)+'G_avg_assists'
                    , str(time)+'G_avg_minutes', str(time)+'G_avg_goals_scored_qual', str(time)+'G_avg_assists_qual'
                    ]
                   , axis=1)
y = mids_fwds['points']

In [113]:
# mids_fwds = model_data[(model_data['position_Forward'] == 1) | (model_data['position_Midfielder'] == 1)]
mids_fwds = model_data
X = mids_fwds.drop(['points'
                    ,'position_Midfielder','position_Defender', 'game_diff', 'team_qual'
                    , str(time)+'G_avg_bonus', str(time)+'G_avg_goals_scored', str(time)+'G_avg_assists'
                    , str(time)+'G_avg_minutes', str(time)+'G_avg_goals_scored_qual', str(time)+'G_avg_assists_qual'
                    ]
                   , axis=1)
y = mids_fwds['points']

In [114]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
test_player_ids = X_test['player_id']
test_fix_ids = X_test['fix_id']
X_train = X_train.drop(['player_id', 'fix_id'], axis=1)
X_test = X_test.drop(['player_id', 'fix_id'], axis=1)

In [115]:
regr = linear_model.LinearRegression()

regr.fit(X_train, y_train)

y_pred = regr.predict(X_test)

error = np.sqrt(np.mean((y_pred - y_test) ** 2))
print(error)

2.254580601378159


In [116]:
coefficients = pd.concat([pd.DataFrame(X_train.columns),pd.DataFrame(np.transpose(regr.coef_))], axis = 1)
coefficients

Unnamed: 0,0,0.1
0,qual_diff,0.017374
1,below_half,0.238775
2,influence,0.005169
3,creativity,0.001119
4,threat,0.000642
5,position_Forward,0.20268
6,is_home_game,0.266337
7,6G_avg_points,0.180589
8,6G_sum_brace_or_more,-0.857915


## Upcoming Games for Prediction

In [138]:
games = [[i['team_h'], i['team_a'],i['event']
          ,i['team_h_difficulty'],i['team_a_difficulty']] for i in fixtures]
games = pd.DataFrame(games,columns=['Home Team','Away Team','Gameweek'
                                    ,'Home Team Difficulty','Away Team Difficulty'])

games_1 = pd.merge(games, team_names, left_on='Home Team', right_on='id', how='left')
games_2 = pd.merge(games_1, team_names, left_on='Away Team', right_on='id', how='left')
games_2 = games_2.rename({'name_x': 'home_team', 'name_y': 'away_team'}, axis=1)

season_fixtures = pd.DataFrame(columns=['home_team','is_home_game','Gameweek','playing_join'])

team_names_vec = team_names['name']

for i in team_names_vec:
    team = i
    team_fixtures = games_2[(games_2['home_team'] == team) | (games_2['away_team'] == team)]
    team_fixtures['is_home_game'] = team_fixtures.apply(lambda row: is_home(row, team), axis=1)
    team_fixtures['playing_join'] = team_fixtures.apply(lambda row: label_team_join(row, team), axis=1)
    team_fixtures['home_team'] = team
    loop_dat = team_fixtures[['home_team', 'is_home_game', 'Gameweek','playing_join']]
    season_fixtures = pd.concat([season_fixtures, loop_dat])

In [139]:
next_game_weeks = gameweek_gone + future_gameweeks

upcoming_fixtures = season_fixtures[(season_fixtures['Gameweek'] > gameweek_gone) 
                                    & (season_fixtures['Gameweek'] <= next_game_weeks)]

upcoming_fixtures = upcoming_fixtures.merge(team_diff_curr_season, left_on = 'playing_join', right_on = 'team_name').rename({'Current Season Diff':'game_diff'}, axis=1)
drop_cols = ['team_name_y','Team_id_y','team_name_x']
upcoming_fixtures = upcoming_fixtures.merge(team_diff_curr_season
                                            , left_on = 'home_team'
                                            , right_on = 'team_name').rename({'Current Season Diff':'team_qual'}, axis=1).drop(columns=drop_cols)
upcoming_fixtures['qual_diff'] = upcoming_fixtures['game_diff'] - upcoming_fixtures['team_qual']
upcoming_fixtures['below_half'] = upcoming_fixtures.apply(lambda row: below_half(row), axis=1)
upcoming_fixtures.sort_values(by=['home_team','Gameweek']).head()

Unnamed: 0,home_team,is_home_game,Gameweek,playing_join,Team_id_x,game_diff,team_qual,qual_diff,below_half
0,Arsenal,1,25.0,Leicester,10,14,1,13,1
1,Arsenal,0,25.0,Everton,8,16,1,15,1
2,Arsenal,0,26.0,Bournemouth,3,17,1,16,1
3,Arsenal,1,27.0,Fulham,9,6,1,5,0
4,Arsenal,0,28.0,Crystal Palace,7,12,1,11,1


In [140]:
match_vars = upcoming_fixtures.groupby('home_team').agg({'is_home_game':'mean', 
                                            'qual_diff':'mean', 
                                            'below_half':'mean',
                                            'team_qual':'size'}).reset_index().rename({'team_qual':'number_of_games'}, axis=1)
match_vars.head()

Unnamed: 0,home_team,is_home_game,qual_diff,below_half,number_of_games
0,Arsenal,0.428571,12.142857,0.714286,7
1,Aston Villa,0.5,3.333333,0.833333,6
2,Bournemouth,0.5,-10.0,0.333333,6
3,Brentford,0.5,2.333333,0.5,6
4,Brighton,0.333333,3.833333,0.5,6


In [141]:
upcom_df = play_build_df.sort_values(by = ['player_id', 'start_time'], ascending = [True, True])

upcom_df['below_half'] = upcom_df.apply(lambda row: below_half(row), axis=1)
upcom_df['brace_or_more'] = upcom_df.apply(lambda row: brace_or_more(row), axis=1)
 
upcom_df['player_position'] = upcom_df['position']
upcom_df = pd.get_dummies(upcom_df, columns=['position'])
upcom_df = upcom_df.drop('position_Goalkeeper', axis=1)

avg_columns = ['points', 'minutes', 'bonus', 'goals_scored', 'assists', 'saves', 'game_diff']
for i in avg_columns:   
    lag_avg(i, time)
    
sum_columns = ['below_half','brace_or_more']
for i in sum_columns:
    lag_sum(i, time)
    
qual_cols = [str(time)+'G_avg_goals_scored', str(time)+'G_avg_assists']

for i in qual_cols:
    upcom_df[i + '_qual'] = upcom_df[i] / (upcom_df[str(time)+'G_sum_below_half'] + 1)

In [142]:
predict_data_full = upcom_df.groupby("player_id").last().dropna()
predict_data_full = predict_data_full.merge(match_vars, left_on='team_name', right_on='home_team')
model_vars = ['qual_diff','below_half_y','influence','creativity'
             ,'threat','position_Forward','is_home_game',str(time)+'G_avg_points'
             ,str(time)+'G_sum_brace_or_more']
predict_data = predict_data_full[model_vars].rename({'below_half_y':'below_half'}, axis=1)
# predict_data = predict_data.dropna()
predict_data.head()

Unnamed: 0,qual_diff,below_half,influence,creativity,threat,position_Forward,is_home_game,6G_avg_points,6G_sum_brace_or_more
0,5.333333,0.5,2.8,1.7,0.0,0,0.5,0.166667,0.0
1,5.333333,0.5,730.4,10.0,0.0,0,0.5,5.333333,0.0
2,5.333333,0.5,0.2,0.0,0.0,0,0.5,0.0,0.0
3,5.333333,0.5,84.0,164.4,140.0,0,0.5,0.666667,0.0
4,5.333333,0.5,463.6,32.6,72.0,0,0.5,4.666667,0.0


## Prediction and Results

In [143]:
y_pred = regr.predict(predict_data)

In [144]:
results = predict_data_full[['first_name', 'second_name', 'team_name','player_position']]
results['prediction_per_game'] = y_pred

results = results.merge(match_vars[['home_team','number_of_games']]
                        , left_on='team_name'
                        , right_on='home_team').drop('home_team',axis=1)

results['prediction_6GWs'] = results['prediction_per_game'] * results['number_of_games']

In [150]:
results = results.sort_values(by='prediction_6GWs', ascending=False)
results[results['player_position'] == 'Goalkeeper'].head(30)
# results.head(20)

Unnamed: 0,first_name,second_name,team_name,player_position,prediction_per_game,number_of_games,prediction_6GWs
1,Bernd,Leno,Fulham,Goalkeeper,4.944566,6,29.667396
347,Alisson,Ramses Becker,Liverpool,Goalkeeper,4.220012,7,29.540081
198,David,Raya Martin,Brentford,Goalkeeper,4.917423,6,29.504538
52,Vicente,Guaita,Crystal Palace,Goalkeeper,3.904409,7,27.330864
248,Jordan,Pickford,Everton,Goalkeeper,3.702028,7,25.914193
470,José,Malheiro de Sá,Wolves,Goalkeeper,3.407862,7,23.855031
39,Aaron,Ramsdale,Arsenal,Goalkeeper,3.109973,7,21.769808
90,Gavin,Bazunu,Southampton,Goalkeeper,2.978861,7,20.852026
442,Hugo,Lloris,Spurs,Goalkeeper,3.291783,6,19.750696
147,Kepa,Arrizabalaga,Chelsea,Goalkeeper,3.28133,6,19.687977


In [125]:
predict_data_full[(predict_data_full['second_name'] == 'Rashford') | (predict_data_full['second_name'] == 'Borges Fernandes')]

Unnamed: 0,fix_id,minutes,points,home_team_x,away_team,first_name,second_name,team_name,team,influence,creativity,threat,playing_team,home_or_away,game_diff,playing_team_name,team_qual,assists,bonus,goals_scored,red_cards,saves,yellow_cards,below_half_x,brace_or_more,player_position,position_Defender,position_Forward,position_Midfielder,6G_avg_points,6G_avg_minutes,6G_avg_bonus,6G_avg_goals_scored,6G_avg_assists,6G_avg_saves,6G_avg_game_diff,6G_sum_below_half,6G_sum_brace_or_more,6G_avg_goals_scored_qual,6G_avg_assists_qual,home_team_y,is_home_game,qual_diff,below_half_y,number_of_games
423,236,90,12,14,10,Bruno,Borges Fernandes,Man Utd,14,644.8,935.3,589.0,10,home,14,Leicester,3,2.0,3.0,0.0,0.0,0.0,0.0,1,0,Midfielder,0,0,1,7.0,90.0,1.5,0.333333,0.5,0.0,12.833333,5.0,0.0,0.055556,0.083333,Man Utd,0.6,8.2,0.4,5
424,236,68,15,14,10,Marcus,Rashford,Man Utd,14,660.0,292.4,973.0,10,home,14,Leicester,3,0.0,2.0,2.0,0.0,0.0,0.0,1,1,Midfielder,0,0,1,8.666667,86.333333,0.833333,1.0,0.166667,0.0,12.833333,5.0,1.0,0.166667,0.027778,Man Utd,0.6,8.2,0.4,5


# -------------------- END OF THE CURRENT DOCUMENT ---------------------------

##  EXTRA MODELLING 

In [954]:
model_xgb_mf = XGBRegressor(
                        n_estimators = 1000,
                        max_depth = 8,
                        subsample = 0.6,
                        min_child_weight = 60, 
                        learning_rate = 0.05)

model_xgb_mf.fit(X_train, y_train,
            verbose = False)

y_pred = model_xgb_mf.predict(X_test)

error = np.sqrt(np.mean((y_pred - y_test) ** 2))
print(error)

2.47377482159355


In [1278]:
# mids_fwds = model_data[(model_data['position_Forward'] == 1) | (model_data['position_Midfielder'] == 1)]
mids_fwds = model_data
X = mids_fwds.drop(['points'
                    ,'position_Midfielder','position_Defender'
                    ]
                   , axis=1)
y = mids_fwds['points']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
test_player_ids = X_test['player_id']
test_fix_ids = X_test['fix_id']
X_train = X_train.drop(['player_id', 'fix_id'], axis=1)
X_test = X_test.drop(['player_id', 'fix_id'], axis=1)

In [1227]:
alphas = np.linspace(0.1,10,20)
log_store_test = []
log_store_train = []
for i in alphas:
    regr = linear_model.Lasso(alpha=i)

    regr.fit(X_train, y_train)
    
    y_train_pred = regr.predict(X_train)
    y_pred = regr.predict(X_test)
    
    error_train = np.sqrt(np.mean((y_train_pred - y_train) ** 2))
    error_test = np.sqrt(np.mean((y_pred - y_test) ** 2))
    log_store_test.append(error_test)
    log_store_train.append(error_train)
print(log_store_test)

[2.222156472924126, 2.2240118659510535, 2.2270259195671485, 2.226938442080421, 2.2270992298430277, 2.227508282059376, 2.228164696793755, 2.2290717339798394, 2.2291456833987224, 2.2291219237697795, 2.229103108870923, 2.2290892388273624, 2.229080313731404, 2.2290763336424453, 2.229077298586975, 2.22908320855857, 2.2290940635178997, 2.229109863392722, 2.2291306080778903, 2.2291562974353543]


In [1228]:
X_test['pred'] = y_pred
X_test['actual'] = y_test
X_test['error'] = abs(y_pred - y_test)
X_test['player_id'] = test_player_ids
X_test['fix_id'] = test_fix_ids

players_pred = X_test.merge(play_df[['player_id', 'fix_id', 'first_name', 'second_name']], on=['player_id', 'fix_id'])
players_pred.sort_values(by='pred', ascending=False).head(20)

Unnamed: 0,qual_diff,game_diff,team_qual,below_half,influence,creativity,threat,position_Forward,is_home_game,6G_avg_points,6G_avg_minutes,6G_avg_bonus,6G_avg_goals_scored,6G_avg_assists,6G_avg_goals_scored_qual,6G_avg_assists_qual,6G_sum_brace_or_more,pred,actual,error,player_id,fix_id,first_name,second_name
1325,13,15,2,1,978.4,228.5,1300.0,1,1,5.0,89.833333,0.666667,0.666667,0.0,0.222222,0.0,1.0,7.187762,17,9.812238,318,208,Erling,Haaland
488,14,16,2,1,978.4,228.5,1300.0,1,1,5.666667,62.5,1.166667,0.833333,0.0,0.277778,0.0,2.0,7.187762,6,1.187762,318,175,Erling,Haaland
1055,-1,1,2,0,978.4,228.5,1300.0,1,0,5.5,77.333333,0.5,0.666667,0.166667,0.222222,0.055556,1.0,7.187762,6,1.187762,318,111,Erling,Haaland
899,12,14,2,1,978.4,228.5,1300.0,1,0,9.833333,88.166667,1.5,1.333333,0.333333,0.333333,0.083333,2.0,7.187762,0,7.187762,318,137,Erling,Haaland
1345,-4,1,5,0,779.2,489.0,1260.0,1,0,8.0,87.833333,1.666667,1.0,0.333333,0.2,0.066667,1.0,6.584573,6,0.584573,427,81,Harry,Kane
1308,7,12,5,1,779.2,489.0,1260.0,1,0,6.166667,90.0,1.166667,0.666667,0.166667,0.166667,0.041667,0.0,6.584573,16,9.415427,427,185,Harry,Kane
1066,11,16,5,1,779.2,489.0,1260.0,1,1,7.833333,87.666667,1.5,1.0,0.333333,0.25,0.083333,1.0,6.584573,9,2.415427,427,109,Harry,Kane
698,3,8,5,0,779.2,489.0,1260.0,1,1,5.5,89.833333,1.166667,0.666667,0.0,0.222222,0.0,0.0,6.584573,8,1.415427,427,148,Harry,Kane
823,9,14,5,1,779.2,489.0,1260.0,1,1,6.666667,87.833333,1.5,0.833333,0.166667,0.166667,0.033333,1.0,6.584573,10,3.415427,427,79,Harry,Kane
1281,5,7,2,0,634.4,1141.3,500.0,0,1,5.666667,69.166667,0.5,0.0,1.0,0.0,0.2,0.0,5.813108,9,3.186892,301,125,Kevin,De Bruyne


In [907]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [913]:
numeric_features = ['qual_diff', '3G_avg_points', '3G_sum_brace_or_more']

ct = ColumnTransformer([
        ('somename', StandardScaler(), numeric_features)
    ], remainder='passthrough')

X_train = ct.fit_transform(X_train)

In [909]:
# X_train = X_train_fit.transform(X_train)
# X_train.head()

Need to scale the variables - can refer to one of the Kaggle files for guidance

In [914]:
model = Sequential()
 
model.add(Dense(units=5, input_dim=6, kernel_initializer='normal', activation='relu'))
 
model.add(Dense(units=5, kernel_initializer='normal', activation='tanh'))
 
model.add(Dense(1, kernel_initializer='normal'))
 
model.compile(loss='mean_squared_error', optimizer='adam')
 
model.fit(X_train, y_train ,batch_size = 20, epochs = 50, verbose=1)

Epoch 1/50


2023-02-18 17:32:05.114263: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x29ada1e70>

In [915]:
y_pred = model.predict(X_test)



2023-02-18 17:32:48.758490: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [916]:
y_pred = [num for numb in y_pred for num in numb]

error = np.sqrt(np.mean((y_pred - y_test) ** 2))
print(error)

2.8614483802936532


Corrections:
- Separate out midfielders and forwards, goalkeepers, defenders
- Add extra fields for specific player types - saves and clean sheets
- Add in the creativity, influence fields for each player
- Get the scaling stuff from one of the Kaggle files - good preprocessing
- Tune the neural network for epochs and batch size

In [676]:
error_frame = pd.DataFrame(zip(y_pred, y_test, abs(y_pred - y_test)), columns = ['prediction', 'actual', 'error'])
error_frame.sort_values(by='error', ascending=False)

Unnamed: 0,prediction,actual,error
2251,0.015770,19,18.984230
927,5.127660,23,17.872340
1868,3.236477,18,14.763523
478,0.987629,15,14.012371
79,3.443389,17,13.556611
...,...,...,...
1361,-0.001101,0,0.001101
1168,0.001088,0,0.001088
113,0.000940,0,0.000940
635,0.000630,0,0.000630


Need to build out the next fixture and next 6 fixtures

In [1055]:
season_fixtures.head()

Unnamed: 0,home_team,is_home_game,Gameweek,playing_join
7,Arsenal,1,1.0,Crystal Palace
18,Arsenal,0,2.0,Leicester
32,Arsenal,1,3.0,Bournemouth
43,Arsenal,0,4.0,Fulham
51,Arsenal,0,5.0,Aston Villa


In [1056]:
pivot_fix = season_fixtures.pivot_table(index='home_team',
                                        columns = 'Gameweek',
                                        values='playing_join',
                                        aggfunc=lambda x: ' '.join(x)).reset_index()

pivot_fix = pivot_fix.set_index("home_team")

In [971]:
pivot_fix_diffi = season_fixtures.pivot_table(index='home_team',
                                        columns = 'Gameweek',
                                        values='Challenge').reset_index()

##### 

## Fixture Difficulty Construction

In [977]:
last_season_order = ['Man City','Liverpool','Chelsea', 'Spurs','Arsenal','Man Utd','West Ham','Leicester',
                    'Brighton','Wolves', 'Newcastle', 'Crystal Palace', 'Brentford', 'Aston Villa','Southampton',
                    'Everton','Leeds','Fulham','Bournemouth',"Nott'm Forest"]
team_diff_last_season = pd.DataFrame({'Team':last_season_order,'Last Season Diff':range(1,21,1)})

team_diff_curr_season = pd.DataFrame({'Team_id':league_table.id,'Current Season Diff': range(1,21,1)
                                      , 'team_name':league_table.Team})

# team_diff_table = team_diff_last_season.merge(team_diff_curr_season, on='Team')

In [67]:
team_diff_curr_season

Unnamed: 0,Team_id,Current Season Diff,team_name
0,1,1,Arsenal
12,13,2,Man City
13,14,3,Man Utd
14,15,4,Newcastle
17,18,5,Spurs
8,9,6,Fulham
4,5,7,Brighton
11,12,8,Liverpool
3,4,9,Brentford
5,6,10,Chelsea


In [979]:
gameweek_gone = 13
next_6 = gameweek_gone + 6
select = [0] + list(range(gameweek_gone, next_6 + 1,1))

season_diff = season_fixtures.merge(team_diff_table, left_on='playing_join', right_on='Team')

next_6_fixtures = pivot_fix_diffi.iloc[:,select]
next_6_fixtures['Total'] = next_6_fixtures.sum(axis=1,numeric_only=True)
next_6_fixtures['Average'] = next_6_fixtures['Total'] / (next_6 - gameweek_gone)

pivot_fix_diffi_tables_last = season_diff.pivot_table(index='Team_x',
                                        columns = 'Game Week',
                                        values=['Last Season Diff']).reset_index()
pivot_fix_diffi_tables_curr = season_diff.pivot_table(index='Team_x',
                                        columns = 'Game Week',
                                        values=['Current Season Diff']).reset_index()

next_6_fix_last = pivot_fix_diffi_tables_last.iloc[:,select]
next_6_fix_last['Total'] = next_6_fix_last.sum(axis=1,numeric_only=True)
next_6_fix_last['Average'] = next_6_fix_last['Total'] / (next_6 - gameweek_gone)
next_6_fix_last = next_6_fix_last.rename({'Team_x':'Team'},axis=1)
next_6_fix_last_merge = next_6_fix_last[['Team','Average']].droplevel(1, axis=1)

next_6_fix_curr = pivot_fix_diffi_tables_curr.iloc[:,select]
next_6_fix_curr['Total'] = next_6_fix_curr.sum(axis=1,numeric_only=True)
next_6_fix_curr['Average'] = next_6_fix_curr['Total'] / (next_6 - gameweek_gone)
next_6_fix_curr = next_6_fix_curr.rename({'Team_x':'Team'},axis=1)
next_6_fix_curr_merge = next_6_fix_curr[['Team','Average']].droplevel(1, axis=1)

upcoming_diff = next_6_fixtures.merge(next_6_fix_curr_merge, on = 'Team',suffixes=[' Diff Score',' Curr Table'])
upcoming_diff = upcoming_diff.merge(next_6_fix_last_merge, on = 'Team')
upcoming_diff = upcoming_diff[['Team','Average Diff Score', 'Average Curr Table', 'Average']].rename({'Average':'Average Last Table'}, axis=1)

In [980]:
weight_last = 0
weight_curr = 1

upcoming_diff['Average Table'] = (weight_last * upcoming_diff['Average Last Table']) + (weight_curr * upcoming_diff['Average Curr Table'])
upcoming_diff.sort_values(by='Average Table', ascending=False)
                                  

Unnamed: 0,Team,Average Diff Score,Average Curr Table,Average Last Table,Average Table
13,Man Utd,1.5,14.75,15.75,14.75
6,Crystal Palace,-0.583333,14.583333,14.583333,14.583333
4,Brighton,0.333333,14.166667,10.833333,14.166667
7,Everton,-0.833333,13.5,13.333333,13.5
14,Newcastle,1.166667,13.333333,13.333333,13.333333
11,Liverpool,1.5,13.333333,13.333333,13.333333
3,Brentford,0.0,13.166667,10.5,13.166667
17,Spurs,0.833333,12.916667,13.333333,12.916667
12,Man City,2.583333,12.833333,13.333333,12.833333
2,Bournemouth,-1.0,12.166667,11.833333,12.166667


In [981]:
league_table[['Goals For', 'Goals Against', 'Goal Diff', 'Points']]

Unnamed: 0,Goals For,Goals Against,Goal Diff,Points
0,51,23,28,54
12,60,24,36,52
13,38,28,10,46
14,35,15,20,41
17,42,35,7,39
8,35,30,5,38
4,39,29,10,35
11,38,28,10,35
3,37,30,7,35
5,23,23,0,31


In [982]:
next_6_fixtures.sort_values(by=['Average'], ascending=False)

Game Week,Team,14.0,15.0,16.0,17.0,18.0,19.0,20.0,Total,Average
12,Man City,2.0,3.0,2.0,3.0,3.0,1.0,1.5,15.5,2.583333
11,Liverpool,2.0,0.0,2.0,2.0,1.0,1.0,1.0,9.0,1.5
13,Man Utd,2.0,1.0,1.0,2.0,1.0,2.0,0.0,9.0,1.5
0,Arsenal,2.0,0.0,2.0,2.0,1.0,1.0,0.0,8.0,1.333333
14,Newcastle,2.0,1.0,1.0,0.0,2.0,-1.0,2.0,7.0,1.166667
17,Spurs,1.0,0.0,2.0,0.0,2.0,1.0,-1.0,5.0,0.833333
5,Chelsea,0.0,0.0,-1.0,2.0,1.0,0.0,2.0,4.0,0.666667
4,Brighton,0.0,1.0,1.0,1.0,-1.0,1.0,-1.0,2.0,0.333333
10,Leicester,-2.0,1.0,0.0,0.0,-1.0,1.0,1.0,0.0,0.0
3,Brentford,1.0,1.0,-2.0,0.0,0.0,-1.0,1.0,0.0,0.0


In [983]:
next_6_fix_last.sort_values(by='Average', ascending=False)

Unnamed: 0_level_0,Team,Last Season Diff,Last Season Diff,Last Season Diff,Last Season Diff,Last Season Diff,Last Season Diff,Last Season Diff,Total,Average
Game Week,Unnamed: 1_level_1,14.0,15.0,16.0,17.0,18.0,19.0,20.0,Unnamed: 9_level_1,Unnamed: 10_level_1
13,Man Utd,7.0,14.0,18.0,20.0,10.0,19.0,6.5,94.5,15.75
16,Southampton,12.0,11.0,2.0,9.0,18.0,20.0,16.0,88.0,14.666667
6,Crystal Palace,15.0,7.0,20.0,18.0,19.0,4.0,4.5,87.5,14.583333
5,Chelsea,9.0,5.0,11.0,19.0,20.0,9.5,12.0,85.5,14.25
12,Man City,8.0,18.0,13.0,17.0,16.0,3.0,5.0,80.0,13.333333
17,Spurs,19.0,2.0,17.0,13.0,14.0,12.0,3.0,80.0,13.333333
7,Everton,18.0,8.0,19.0,10.0,1.0,9.0,15.0,80.0,13.333333
14,Newcastle,14.0,15.0,3.0,8.0,17.0,5.0,18.0,80.0,13.333333
11,Liverpool,17.0,4.0,15.0,14.0,8.0,13.0,9.0,80.0,13.333333
10,Leicester,1.0,16.0,7.0,11.0,2.0,18.0,20.0,75.0,12.5


In [984]:
next_6_fix_curr.sort_values(by='Average', ascending=False)

Unnamed: 0_level_0,Team,Current Season Diff,Current Season Diff,Current Season Diff,Current Season Diff,Current Season Diff,Current Season Diff,Current Season Diff,Total,Average
Game Week,Unnamed: 1_level_1,14.0,15.0,16.0,17.0,18.0,19.0,20.0,Unnamed: 9_level_1,Unnamed: 10_level_1
13,Man Utd,16.0,11.0,7.0,14.0,15.0,19.0,6.5,88.5,14.75
6,Crystal Palace,20.0,16.0,14.0,7.0,19.0,5.0,6.5,87.5,14.583333
4,Brighton,10.0,15.0,11.0,20.0,2.0,18.0,9.0,85.0,14.166667
7,Everton,7.0,13.0,19.0,15.0,1.0,6.0,20.0,81.0,13.5
14,Newcastle,11.0,20.0,10.0,13.0,17.0,2.0,7.0,80.0,13.333333
11,Liverpool,17.0,5.0,20.0,11.0,13.0,8.0,6.0,80.0,13.333333
3,Brentford,15.0,14.0,1.0,5.0,16.0,9.0,19.0,79.0,13.166667
17,Spurs,19.0,9.0,17.0,8.0,11.0,12.0,1.5,77.5,12.916667
12,Man City,13.0,7.0,8.0,17.0,18.0,10.0,4.0,77.0,12.833333
2,Bournemouth,5.0,17.0,18.0,10.0,12.0,3.0,8.0,73.0,12.166667


In [1237]:
easy_order = upcoming_diff.sort_values(by=['Average Table'], ascending=False).iloc[:,0]
pivot_fix.loc[easy_order,25:38]

Gameweek,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,33.0,34.0,35.0,36.0,37.0,38.0
home_team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Man Utd,,Liverpool,Southampton,Brighton,Newcastle,Everton,Nott'm Forest,Chelsea,Spurs,Aston Villa,West Ham,Wolves,Bournemouth,Fulham
Crystal Palace,Liverpool,Aston Villa,Man City Brighton,Arsenal,Leicester,Leeds,Southampton,Everton,Wolves,West Ham,Spurs,Bournemouth,Fulham,Nott'm Forest
Brighton,,West Ham,Leeds Crystal Palace,Man Utd,Brentford,Spurs,Chelsea,Man City,Nott'm Forest,Wolves,Everton,Arsenal,Southampton,Aston Villa
Everton,Aston Villa Arsenal,Nott'm Forest,Brentford,Chelsea,Spurs,Man Utd,Fulham,Crystal Palace,Newcastle,Leicester,Brighton,Man City,Wolves,Bournemouth
Newcastle,,Man City,Wolves,Nott'm Forest,Man Utd,Brentford,Aston Villa,Spurs,Everton,Southampton,Arsenal,Leeds,Leicester,Chelsea
Liverpool,Crystal Palace Wolves,Man Utd,Bournemouth,Fulham,Man City,Arsenal,Leeds,Nott'm Forest,West Ham,Spurs,Brentford,Leicester,Aston Villa,Southampton
Brentford,,Fulham,Everton Southampton,Leicester,Brighton,Newcastle,Wolves,Aston Villa,Chelsea,Nott'm Forest,Liverpool,West Ham,Spurs,Man City
Spurs,Chelsea,Wolves,Nott'm Forest,Southampton,Everton,Brighton,Bournemouth,Newcastle,Man Utd,Liverpool,Crystal Palace,Aston Villa,Brentford,Leeds
Man City,Bournemouth,Newcastle,Crystal Palace,West Ham,Liverpool,Southampton,Leicester,Brighton,Arsenal,Fulham,Leeds,Everton,Chelsea,Brentford
Bournemouth,Man City,Arsenal,Liverpool,Aston Villa,Fulham,Leicester,Spurs,West Ham,Southampton,Leeds,Chelsea,Crystal Palace,Man Utd,Everton


## ML Modelling

In [53]:
elements_df.columns

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round', 'code',
       'cost_change_event', 'cost_change_event_fall', 'cost_change_start',
       'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next',
       'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam',
       'news', 'news_added', 'now_cost', 'photo', 'points_per_game',
       'second_name', 'selected_by_percent', 'special', 'squad_number',
       'status', 'team', 'team_code', 'total_points', 'transfers_in',
       'transfers_in_event', 'transfers_out', 'transfers_out_event',
       'value_form', 'value_season', 'web_name', 'minutes', 'goals_scored',
       'assists', 'clean_sheets', 'goals_conceded', 'own_goals',
       'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards',
       'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat',
       'ict_index', 'influence_rank', 'influence_rank_type', 'creativity_rank',
       'creativity_rank_type', 'threat_rank'

In [135]:
player_vars = elements_df[['id','first_name','second_name','team','dreamteam_count','total_points','form','minutes'
                           ,'goals_scored','assists','clean_sheets','goals_conceded','own_goals','penalties_saved'
                           ,'penalties_missed', 'yellow_cards','red_cards','saves','bonus', 'bps', 'influence', 'creativity'
                           ,'threat','element_type']]
player_vars['position'] = player_vars.element_type.map(elements_types_df.set_index('id').singular_name)
player_vars['team_name'] = player_vars.team.map(teams_df.set_index('id').name)
player_vars = player_vars.reset_index()
player_vars['id_join'] = player_vars['index'] + 1
player_vars.head()

Unnamed: 0,index,id,first_name,second_name,team,dreamteam_count,total_points,form,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,element_type,position,team_name,id_join
0,0,1,Cédric,Alves Soares,1,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,2,Defender,Arsenal,1
1,1,3,Granit,Xhaka,1,1,26,4.3,537,1,2,2,7,0,0,0,1,0,0,2,113,118.0,124.5,56.0,3,Midfielder,Arsenal,2
2,2,4,Mohamed,Elneny,1,0,2,0.3,90,0,0,0,1,0,0,0,0,0,0,0,15,4.2,3.8,0.0,3,Midfielder,Arsenal,3
3,3,5,Rob,Holding,1,0,2,0.3,3,0,0,0,0,0,0,0,0,0,0,0,8,2.2,0.0,0.0,2,Defender,Arsenal,4
4,4,6,Thomas,Partey,1,0,8,1.3,270,0,0,2,2,0,0,0,0,0,0,0,39,41.2,21.5,49.0,3,Midfielder,Arsenal,5


In [127]:
fixtures_so_far = season_fixtures[season_fixtures['Game Week'] <= gameweek_gone]

player_fix_so_far = fixtures_so_far.merge(player_vars, left_on = "Team", right_on = "team_name")
player_fix_so_far.head()

Unnamed: 0,Team,Playing,Game Week,Challenge,playing_join,id,first_name,second_name,team,dreamteam_count,total_points,form,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,element_type,position,team_name


In [150]:
url_gameweek = 'https://fantasy.premierleague.com/api/event/'+str(k)+'/live/'
r = requests.get(url_gameweek)
data = r.json()
stats = [data['elements'][i]['stats'] for i in range(len(data['elements']))]
stats = pd.DataFrame(stats).reset_index()
stats['id'] = stats['index']

stats.head()

Unnamed: 0,index,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,total_points,in_dreamteam,id
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,False,0
1,1,90,0,0,0,2,0,0,0,0,0,8,0,25,57.6,0.0,0.0,5.8,3,False,1
2,2,90,0,0,0,3,0,0,0,0,0,0,0,18,15.2,14.9,46.0,7.6,2,False,2
3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,False,3
4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,False,4


In [99]:
last_fin_gameweek = 6
gameweeks = [x + 1 for x in list(range(last_fin_gameweek))]

df = {}
for j,k in enumerate(gameweeks):
        url_gameweek = 'https://fantasy.premierleague.com/api/event/'+str(k)+'/live/'
        r = requests.get(url_gameweek)
        data = r.json()
        stats = [data['elements'][i]['stats'] for i in range(len(data['elements']))]
        stats = pd.DataFrame(stats).reset_index()
        stats['id'] = stats['index']
        stats = stats[['id','total_points','goals_scored','assists'
                      ,'clean_sheets', 'goals_conceded','own_goals','penalties_saved'
                      ,'penalties_missed', 'yellow_cards','red_cards','saves'
                      ,'bonus']]
        stats = stats.set_index('id')
        stats['gameweek'] = k
        df[j] = stats

In [100]:
stats

Unnamed: 0_level_0,total_points,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,gameweek
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,0,0,0,0,0,0,0,0,0,0,0,0,6
1,3,0,0,0,2,0,0,0,0,0,8,0,6
2,2,0,0,0,3,0,0,0,0,0,0,0,6
3,0,0,0,0,0,0,0,0,0,0,0,0,6
4,0,0,0,0,0,0,0,0,0,0,0,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
619,0,0,0,0,0,0,0,0,0,0,0,0,6
620,0,0,0,0,0,0,0,0,0,0,0,0,6
621,0,0,0,0,0,0,0,0,0,0,0,0,6
622,0,0,0,0,0,0,0,0,0,0,0,0,6


In [101]:
points_data_upg = pd.concat(df.values(), ignore_index=False)
points_data_upg

Unnamed: 0_level_0,total_points,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,gameweek
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,0,0,1
2,2,0,0,1,0,0,0,0,1,0,0,0,1
3,0,0,0,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
619,0,0,0,0,0,0,0,0,0,0,0,0,6
620,0,0,0,0,0,0,0,0,0,0,0,0,6
621,0,0,0,0,0,0,0,0,0,0,0,0,6
622,0,0,0,0,0,0,0,0,0,0,0,0,6


In [108]:
points_data = pd.concat(df.values(), ignore_index=False)
points_data = points_data.pivot_table(index='id', columns = 'gameweek', values=['total_points'])
points_data['surname'] = df_final.id.map(elements_df.set_index('id').second_name)
points_data['forname'] = df_final.id.map(elements_df.set_index('id').first_name)
points_data['position'] = df_final.id.map(slim_elements_df.set_index('id').position)
points_data['team'] = df_final.id.map(slim_elements_df.set_index('id').team_name)
points_data.columns = list(points_data.columns.get_level_values(0))
points_data['three_week_average'] = points_data.iloc[:,-7:-4].mean(axis=1)
points_data['six_week_average'] = points_data.iloc[:,-11:-5].mean(axis=1)
points_data['three_weeks_abv_6'] = (points_data.iloc[:,-9:-6]>6).sum(axis=1)
points_data['six_weeks_abv_6'] = (points_data.iloc[:,-13:-7]>6).sum(axis=1)
points_data['three_weeks_std'] = points_data.iloc[:,-11:-8].std(axis=1, skipna=True)
points_data['six_weeks_std'] = points_data.iloc[:,-15:-9].std(axis=1, skipna=True)
points_data = points_data.reset_index()

In [109]:
points_data.sort_values(by='id')

Unnamed: 0,id,total_points,total_points.1,total_points.2,total_points.3,total_points.4,total_points.5,surname,forname,position,team,three_week_average,six_week_average,three_weeks_abv_6,six_weeks_abv_6,three_weeks_std,six_weeks_std
0,0,0.0,0.0,0.0,0.0,0.0,0.0,Alves Soares,Cédric,,,0.000000,0.000000,0,0,0.000000,0.000000
1,1,0.0,0.0,2.0,3.0,2.0,3.0,Leno,Bernd,Goalkeeper,Fulham,2.666667,1.666667,0,0,0.577350,1.366260
2,2,2.0,12.0,6.0,2.0,2.0,2.0,Xhaka,Granit,Midfielder,Arsenal,2.000000,4.333333,0,1,0.000000,4.082483
3,3,0.0,0.0,0.0,2.0,0.0,0.0,Elneny,Mohamed,Midfielder,Arsenal,0.666667,0.333333,0,0,1.154701,0.816497
4,4,0.0,0.0,0.0,1.0,1.0,0.0,Holding,Rob,Defender,Arsenal,0.666667,0.333333,0,0,0.577350,0.516398
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
619,619,,,,,,0.0,Ramos de Oliveira Melo,Arthur Henrique,,,0.000000,0.000000,0,0,,
620,620,,,,,,0.0,Badé,Loïc,,,0.000000,0.000000,0,0,,
621,621,,,,,,0.0,Larios López,Juan,Defender,Southampton,0.000000,0.000000,0,0,,
622,622,,,,,,0.0,Caleta-Car,Duje,Defender,Southampton,0.000000,0.000000,0,0,,


In [110]:
gameweek_gone = 0
next_6 = gameweek_gone + 6
select = [0] + list(range(gameweek_gone + 1, next_6 + 1,1))

season_diff = season_fixtures.merge(team_diff_table, left_on='playing_join', right_on='Team')

next_6_fixtures = pivot_fix_diffi.iloc[:,select]
next_6_fixtures['Total'] = next_6_fixtures.sum(axis=1,numeric_only=True)
next_6_fixtures['Average'] = next_6_fixtures['Total'] / (next_6 - gameweek_gone)

pivot_fix_diffi_tables_last = season_diff.pivot_table(index='Team_x',
                                        columns = 'Game Week',
                                        values=['Last Season Diff']).reset_index()
pivot_fix_diffi_tables_curr = season_diff.pivot_table(index='Team_x',
                                        columns = 'Game Week',
                                        values=['Current Season Diff']).reset_index()

next_6_fix_last = pivot_fix_diffi_tables_last.iloc[:,select]
next_6_fix_last['Total'] = next_6_fix_last.sum(axis=1,numeric_only=True)
next_6_fix_last['Average'] = next_6_fix_last['Total'] / (next_6 - gameweek_gone)
next_6_fix_last = next_6_fix_last.rename({'Team_x':'Team'},axis=1)
next_6_fix_last_merge = next_6_fix_last[['Team','Average']].droplevel(1, axis=1)

next_6_fix_curr = pivot_fix_diffi_tables_curr.iloc[:,select]
next_6_fix_curr['Total'] = next_6_fix_curr.sum(axis=1,numeric_only=True)
next_6_fix_curr['Average'] = next_6_fix_curr['Total'] / (next_6 - gameweek_gone)
next_6_fix_curr = next_6_fix_curr.rename({'Team_x':'Team'},axis=1)
next_6_fix_curr_merge = next_6_fix_curr[['Team','Average']].droplevel(1, axis=1)

upcoming_diff = next_6_fixtures.merge(next_6_fix_curr_merge, on = 'Team',suffixes=[' Diff Score',' Curr Table'])
upcoming_diff = upcoming_diff.merge(next_6_fix_last_merge, on = 'Team')
upcoming_diff = upcoming_diff[['Team','Average Diff Score', 'Average Curr Table', 'Average']].rename({'Average':'Average Last Table'}, axis=1)

In [111]:
weight_last = 0.1
weight_curr = 0.9

upcoming_diff['Average Table'] = (weight_last * upcoming_diff['Average Last Table']) + (weight_curr * upcoming_diff['Average Curr Table'])
upcoming_diff.sort_values(by='Average Table', ascending=False)

Unnamed: 0,Team,Average Diff Score,Average Curr Table,Average Last Table,Average Table
12,Man City,2.666667,14.333333,13.833333,14.283333
17,Spurs,1.0,13.666667,12.166667,13.516667
0,Arsenal,1.0,12.833333,12.833333,12.833333
5,Chelsea,1.333333,12.666667,11.166667,12.516667
3,Brentford,0.166667,12.166667,12.833333,12.233333
7,Everton,-0.666667,11.666667,11.5,11.65
14,Newcastle,-0.166667,11.666667,9.0,11.4
4,Brighton,0.333333,11.166667,11.166667,11.166667
9,Leeds,-0.5,10.666667,11.0,10.7
11,Liverpool,2.0,10.333333,13.666667,10.666667


In [112]:
diff_avg = upcoming_diff[['Team','Average Table']] 

points_diff = points_data.merge(diff_avg, left_on='team', right_on='Team')
points_diff['six_week_avg_diff'] = points_diff['six_week_average'] / points_diff['Average Table']
points_diff.sort_values(by='id').head(20)

Unnamed: 0,id,total_points,total_points.1,total_points.2,total_points.3,total_points.4,total_points.5,surname,forname,position,team,three_week_average,six_week_average,three_weeks_abv_6,six_weeks_abv_6,three_weeks_std,six_weeks_std,Team,Average Table,six_week_avg_diff
0,1,0.0,0.0,2.0,3.0,2.0,3.0,Leno,Bernd,Goalkeeper,Fulham,2.666667,1.666667,0,0,0.57735,1.36626,Fulham,7.466667,0.223214
22,2,2.0,12.0,6.0,2.0,2.0,2.0,Xhaka,Granit,Midfielder,Arsenal,2.0,4.333333,0,1,0.0,4.082483,Arsenal,12.833333,0.337662
23,3,0.0,0.0,0.0,2.0,0.0,0.0,Elneny,Mohamed,Midfielder,Arsenal,0.666667,0.333333,0,0,1.154701,0.816497,Arsenal,12.833333,0.025974
24,4,0.0,0.0,0.0,1.0,1.0,0.0,Holding,Rob,Defender,Arsenal,0.666667,0.333333,0,0,0.57735,0.516398,Arsenal,12.833333,0.025974
25,5,3.0,2.0,3.0,0.0,0.0,0.0,Partey,Thomas,Midfielder,Arsenal,0.0,1.333333,0,0,0.0,1.505545,Arsenal,12.833333,0.103896
26,6,3.0,2.0,16.0,10.0,1.0,2.0,Ødegaard,Martin,Midfielder,Arsenal,4.333333,5.666667,1,2,4.932883,6.022181,Arsenal,12.833333,0.441558
27,7,1.0,1.0,1.0,2.0,2.0,0.0,Tierney,Kieran,Defender,Arsenal,1.333333,1.166667,0,0,1.154701,0.752773,Arsenal,12.833333,0.090909
28,9,5.0,1.0,7.0,1.0,8.0,1.0,White,Benjamin,Defender,Arsenal,3.333333,3.833333,1,2,4.041452,3.250641,Arsenal,12.833333,0.298701
29,10,1.0,1.0,1.0,1.0,1.0,1.0,Nketiah,Eddie,Forward,Arsenal,1.0,1.0,0,0,0.0,0.0,Arsenal,12.833333,0.077922
30,11,0.0,1.0,1.0,0.0,1.0,1.0,Smith Rowe,Emile,Midfielder,Arsenal,0.666667,0.666667,0,0,0.57735,0.516398,Arsenal,12.833333,0.051948


In [113]:
training_data = points_diff.merge(player_vars, left_on=["forname","surname"], right_on=["first_name","second_name"]).sort_values(by='id_x')
training_data

NameError: name 'player_vars' is not defined

In [114]:
variables_of_interest = training_data[['position_x'
                                      ,'three_week_average','six_week_average'
                                      ,'three_weeks_abv_6','six_weeks_abv_6'
                                      ,'three_weeks_std','six_weeks_std'
                                      ,'Average Table', 'dreamteam_count'
                                      ,'bps','influence','creativity','threat']]
variables_of_interest.head()

NameError: name 'training_data' is not defined

### Model Specifications

Does everything need to be compared to quality of the opposition faced? Most likely.

----

Goalkeepers - in last 6 weeks
Opposition in a given week
* Average goals scored by opposition in the last 6 weeks compared to quality of opposition faced
* Times above 2 goals

Quality of the player
* Saves
* Goals conceded
* Clean Sheets
* Bonus points
* Average points scored over previous weeks / form
* Dummies for top teams or something like that
* Cost of player
* 


-----------------------------------
----
Defenders - in last 6 weeks
Opposition in a given week
* Average goals scored by opposition in the last 6 weeks compared to quality of opposition faced
* Times above 2 goals

Quality of the player
* Goals
* Assists
* Clean Sheets
* Creativity etc
* bonus points

--------
--------

Midfielders - in last 6 weeks
Opposition in a given week
* Average goals conceded by opposition in the last 6 weeks compared to quality of opposition faced
* Times conceded more than 2 goals versus quality of opposition

Quality of the player
* Goals
* Assists
* Clean Sheets
* Creativity etc
* bonus points

-----
----

Forwards - in last 6 weeks
Opposition in a given week
* Average goals conceded by opposition in the last 6 weeks compared to quality of opposition faced
* Times conceded more than 2 goals versus quality of opposition


Quality of the player
* Goals
* Assists
* Creativity etc
* bonus points

