In [2]:
import pandas as pd

In [1]:
import soccerdata as sd


fbref = sd.FBref(leagues="ENG-Premier League", seasons=2021, no_cache=False)



In [4]:
schedule = fbref.read_schedule()
schedule = schedule[schedule.game_id.notna()]
schedule.loc[:, 'full_dt'] = pd.to_datetime(schedule.date.astype(str) + ' ' + schedule.time )
schedule.sort_values(by='full_dt', inplace=True)
schedule.loc[:, ['home_points', 'away_points']] = schedule.score.str.split('–', expand=True).values
schedule['home_result'] = 'draw'
schedule['away_result'] = 'draw'

schedule.loc[schedule['home_points'] > schedule['away_points'], 'home_result'] = 'win'
schedule.loc[schedule['home_points'] > schedule['away_points'], 'away_result'] = 'lose'
schedule.loc[schedule['home_points'] < schedule['away_points'], 'home_result'] = 'lose'
schedule.loc[schedule['home_points'] < schedule['away_points'], 'away_result'] = 'win'

schedule = schedule.reset_index().loc[:, ['game_id', 'home_team','away_team','home_points', 'away_points',  'home_result', 'away_result', 'full_dt']]

# DATAFRAME CONTENDO OS JOGOS ORDENADOS PELA DATA CONTENDO OS RESULTADOS PARA OS TIMES
schedule.head(5)

Unnamed: 0,game_id,home_team,away_team,home_points,away_points,home_result,away_result,full_dt
0,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00
1,db261cb0,Crystal Palace,Southampton,1,0,win,lose,2020-09-12 15:00:00
2,21b58926,Liverpool,Leeds United,4,3,win,lose,2020-09-12 17:30:00
3,78495ced,West Ham,Newcastle Utd,0,2,lose,win,2020-09-12 20:00:00
4,7dd01ca9,West Brom,Leicester City,0,3,lose,win,2020-09-13 14:00:00


In [5]:
def match_data(match_id):
    """ get match data based on id """
    players_match_stats = fbref.read_player_match_stats(stat_type="summary", match_id=match_id)
    players_match_stats = players_match_stats.reset_index().set_index('game_id').sort_index(axis=1,sort_remaining=True)
    players_match_stats = players_match_stats.drop(columns=['season', 'league', 'nation', 'jersey_number'])
    return players_match_stats

def preprocess_match_data(match_data):
    pmd = pd.DataFrame()
    pmd.loc[:, 'player'] = match_data.player
    pmd.loc[:, 'team'] = match_data.team
    pmd.loc[:, 'pos'] = match_data.pos

    pmd.loc[:, 'player_age'] = match_data.age.str.split('-').str.get(0)
    pmd.loc[:, 'time_played'] = match_data['min']
    pmd.loc[:, 'shots_on_target'] = match_data[('Performance', 'SoT')]
    pmd.loc[:, 'goals'] = match_data[('Performance', 'Gls')]
    pmd.loc[:, 'touches'] = match_data[('Performance', 'Touches')]
    pmd.loc[:, 'yellow_cards'] = match_data[('Performance', 'CrdY')]
    pmd.loc[:, 'completed_passes_perc'] = match_data[('Passes', 'Cmp%')]
                                        
    return pmd

md = match_data('bf52349b')
preprocess_match_data(md)


matches_data = preprocess_match_data(match_data(schedule.game_id.to_list()))
# matches_data = pd.concat(matches_data)
matches_data


Unnamed: 0_level_0,player,team,pos,player_age,time_played,shots_on_target,goals,touches,yellow_cards,completed_passes_perc
game_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
db261cb0,Andros Townsend,Crystal Palace,RM,29,90,0,0,38,0,43.3
db261cb0,Cheikhou Kouyaté,Crystal Palace,CB,30,90,1,0,25,0,57.9
db261cb0,Eberechi Eze,Crystal Palace,LM,22,10,0,0,13,0,80.0
db261cb0,James McArthur,Crystal Palace,CM,32,90,0,0,45,0,46.9
db261cb0,James McCarthy,Crystal Palace,CM,29,73,0,0,29,1,60.9
...,...,...,...,...,...,...,...,...,...,...
2c081c94,Romain Saïss,Wolverhampton Wanderers,CB,31,90,0,0,90,0,88.8
2c081c94,Rui Patrício,Wolverhampton Wanderers,GK,33,90,0,0,14,0,91.7
2c081c94,Rúben Neves,Wolverhampton Wanderers,CM,24,90,0,0,98,0,86.0
2c081c94,Willian José,Wolverhampton Wanderers,FW,29,64,1,0,22,0,72.2


In [6]:
positions = set(','.join(matches_data.pos).split(','))

In [19]:
import numpy as np 
x = schedule.head(1)

df = pd.merge(left=schedule, right=matches_data, left_on='game_id', right_index=True)

X_train = df.copy()
X_train.loc[:, 'result'] = np.where(X_train.team == X_train.home_team, X_train.home_result, X_train.away_result)
X_train.drop(columns=['game_id', 'home_team', 'away_team', 'home_points', 'away_points', 'home_result', 'away_result'], inplace=True)
X_train

0    bf52349b
Name: game_id, dtype: object


Unnamed: 0,full_dt,player,team,pos,player_age,time_played,shots_on_target,goals,touches,yellow_cards,completed_passes_perc,result
0,2020-09-12 12:30:00,Ainsley Maitland-Niles,Arsenal,LM,23,90,0,0,49,0,74.4,win
0,2020-09-12 12:30:00,Alexandre Lacazette,Arsenal,FW,29,86,1,1,27,0,82.4,win
0,2020-09-12 12:30:00,Bernd Leno,Arsenal,GK,28,90,0,0,34,0,100.0,win
0,2020-09-12 12:30:00,Dani Ceballos,Arsenal,CM,24,12,0,0,20,0,78.9,win
0,2020-09-12 12:30:00,Eddie Nketiah,Arsenal,FW,21,4,0,0,1,0,100.0,win
...,...,...,...,...,...,...,...,...,...,...,...,...
379,2021-05-23 16:00:00,Romain Saïss,Wolverhampton Wanderers,CB,31,90,0,0,90,0,88.8,win
379,2021-05-23 16:00:00,Rui Patrício,Wolverhampton Wanderers,GK,33,90,0,0,14,0,91.7,win
379,2021-05-23 16:00:00,Rúben Neves,Wolverhampton Wanderers,CM,24,90,0,0,98,0,86.0,win
379,2021-05-23 16:00:00,Willian José,Wolverhampton Wanderers,FW,29,64,1,0,22,0,72.2,win


In [2]:
ws = sd.WhoScored('ENG-Premier League', seasons=2021, no_cache=False)


In [18]:
epl_schedule = pd.read_pickle('bkp_premier_league_2021_whoscored.pkl')
epl_schedule

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,date,home_team,away_team,game_id,url,stage
league,season,game,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ENG-Premier League,2021,2020-09-12 Crystal Palace-Southampton,2020-09-12 15:00:00,Crystal Palace,Southampton,1485186,https://1xbet.whoscored.com/Matches/1485186/Li...,
ENG-Premier League,2021,2020-09-12 Fulham-Arsenal,2020-09-12 12:30:00,Fulham,Arsenal,1485187,https://1xbet.whoscored.com/Matches/1485187/Li...,
ENG-Premier League,2021,2020-09-12 Liverpool-Leeds,2020-09-12 17:30:00,Liverpool,Leeds,1485188,https://1xbet.whoscored.com/Matches/1485188/Li...,
ENG-Premier League,2021,2020-09-12 West Ham-Newcastle,2020-09-12 20:00:00,West Ham,Newcastle,1485191,https://1xbet.whoscored.com/Matches/1485191/Li...,
ENG-Premier League,2021,2020-09-13 Tottenham-Everton,2020-09-13 16:30:00,Tottenham,Everton,1485189,https://1xbet.whoscored.com/Matches/1485189/Li...,
ENG-Premier League,2021,...,...,...,...,...,...,...
ENG-Premier League,2021,2021-05-23 Liverpool-Crystal Palace,2021-05-23 16:00:00,Liverpool,Crystal Palace,1485559,https://1xbet.whoscored.com/Matches/1485559/Li...,
ENG-Premier League,2021,2021-05-23 Manchester City-Everton,2021-05-23 16:00:00,Manchester City,Everton,1485560,https://1xbet.whoscored.com/Matches/1485560/Li...,
ENG-Premier League,2021,2021-05-23 Sheffield United-Burnley,2021-05-23 16:00:00,Sheffield United,Burnley,1485561,https://1xbet.whoscored.com/Matches/1485561/Li...,
ENG-Premier League,2021,2021-05-23 West Ham-Southampton,2021-05-23 16:00:00,West Ham,Southampton,1485562,https://1xbet.whoscored.com/Matches/1485562/Li...,


In [20]:
schedule[schedule.game_id=='bf52349b']

Unnamed: 0,game_id,home_team,away_team,home_points,away_points,home_result,away_result,full_dt
0,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00


In [21]:
epl_schedule.query("home_team == 'Fulham' and away_team == 'Arsenal' ")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,date,home_team,away_team,game_id,url,stage
league,season,game,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ENG-Premier League,2021,2020-09-12 Fulham-Arsenal,2020-09-12 12:30:00,Fulham,Arsenal,1485187,https://1xbet.whoscored.com/Matches/1485187/Li...,


In [3]:
actions = ws.read_events(match_id=1485187, output_fmt='spadl')
actions.head()

Unnamed: 0,game_id,original_event_id,period_id,time_seconds,team_id,player_id,start_x,end_x,start_y,end_y,type_id,result_id,bodypart_id,action_id,player,team
0,1485187,2210244073.0,1,0.0,170,243076.0,52.395,40.53,34.068,42.636,0,1,0,0,Aboubakar Kamara,Fulham
1,1485187,2210244081.0,1,1.0,170,79583.0,39.165,37.065,44.064,63.444,0,1,0,1,Tom Cairney,Fulham
2,1485187,2210244095.0,1,3.0,170,134331.0,37.065,23.31,63.648,51.952,0,1,0,2,Joe Bryan,Fulham
3,1485187,2210244119.0,1,6.0,170,85006.0,22.68,19.425,51.748,30.124,0,1,0,3,Tim Ream,Fulham
4,1485187,,1,8.0,170,91840.0,19.425,20.895,30.124,27.404,21,1,0,4,Michael Hector,Fulham


In [8]:
from socceraction import vaep

model = vaep.VAEP()
model.rate(game=None,game_actions=actions)

NotFittedError: 