In [19]:
import pandas as pd
import numpy as np

import socceraction.spadl as spd
from socceraction.vaep import features as ft
from tqdm import tqdm
import socceraction.vaep.labels as lab

# VAEP

## Formatando dados

In [10]:
data = pd.read_pickle("data/augusto/who_scored_premier_2021_events.pkl")
data

Unnamed: 0,game_id,original_event_id,period_id,time_seconds,team_id,player_id,start_x,end_x,start_y,end_y,type_id,result_id,bodypart_id,action_id,player,team
0,1485186,2210296239,1,0.0,18,105172.0,52.605,71.400,33.592,36.244,0,1,0,0,James Ward-Prowse,Southampton
1,1485186,2210296365,1,2.0,18,92550.0,71.610,26.880,35.904,8.772,0,0,0,1,Jack Stephens,Southampton
2,1485186,2210296409,1,5.0,162,66741.0,29.190,43.470,13.532,14.416,0,0,1,2,Cheikhou Kouyaté,Crystal Palace
3,1485186,2210296601,1,7.0,18,90780.0,37.065,7.455,11.016,18.632,0,0,0,3,Oriol Romeu,Southampton
4,1485186,2210296737,1,11.0,162,43105.0,6.825,12.600,20.876,26.792,0,0,0,4,Joel Ward,Crystal Palace
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1772,1485563,2302283833,2,3444.0,32,384143.0,97.860,91.140,59.840,68.000,18,1,0,1772,Brandon Williams,Man Utd
1773,1485563,2302283923,2,3464.0,161,121718.0,91.140,99.330,68.000,46.512,2,0,0,1773,Marçal,Wolves
1774,1485563,2302283933,2,3466.0,32,384143.0,95.865,79.590,49.640,55.488,18,1,1,1774,Brandon Williams,Man Utd
1775,1485563,2302283949,2,3468.0,161,16161.0,79.590,89.880,55.488,56.100,0,0,1,1775,João Moutinho,Wolves


In [14]:
data_names = spd.add_names(data)
data_names

Unnamed: 0,game_id,original_event_id,period_id,time_seconds,team_id,player_id,start_x,end_x,start_y,end_y,type_id,result_id,bodypart_id,action_id,player,team,type_name,result_name,bodypart_name
0,1485186,2210296239,1,0.0,18,105172.0,52.605,71.400,33.592,36.244,0,1,0,0,James Ward-Prowse,Southampton,pass,success,foot
1,1485186,2210296365,1,2.0,18,92550.0,71.610,26.880,35.904,8.772,0,0,0,1,Jack Stephens,Southampton,pass,fail,foot
2,1485186,2210296409,1,5.0,162,66741.0,29.190,43.470,13.532,14.416,0,0,1,2,Cheikhou Kouyaté,Crystal Palace,pass,fail,head
3,1485186,2210296601,1,7.0,18,90780.0,37.065,7.455,11.016,18.632,0,0,0,3,Oriol Romeu,Southampton,pass,fail,foot
4,1485186,2210296737,1,11.0,162,43105.0,6.825,12.600,20.876,26.792,0,0,0,4,Joel Ward,Crystal Palace,pass,fail,foot
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1772,1485563,2302283833,2,3444.0,32,384143.0,97.860,91.140,59.840,68.000,18,1,0,1772,Brandon Williams,Man Utd,clearance,success,foot
1773,1485563,2302283923,2,3464.0,161,121718.0,91.140,99.330,68.000,46.512,2,0,0,1773,Marçal,Wolves,throw_in,fail,foot
1774,1485563,2302283933,2,3466.0,32,384143.0,95.865,79.590,49.640,55.488,18,1,1,1774,Brandon Williams,Man Utd,clearance,success,head
1775,1485563,2302283949,2,3468.0,161,16161.0,79.590,89.880,55.488,56.100,0,0,1,1775,João Moutinho,Wolves,pass,fail,head


In [15]:
def features_transform(spadl):
    spadl.loc[spadl.result_id.isin([2, 3]), ['result_id']] = 0
    spadl.loc[spadl.result_name.isin(['offside', 'owngoal']), ['result_name']] = 'fail'

    xfns = [
        ft.actiontype_onehot,
        ft.bodypart_onehot,
        ft.result_onehot,
        ft.goalscore,
        ft.startlocation,
        ft.endlocation,
        ft.team,
        ft.time,
        ft.time_delta
    ]

    features = []
    for game in tqdm(np.unique(spadl.game_id).tolist()):
        match_actions = spadl.loc[spadl.game_id == game].reset_index(drop=True)
        match_states = ft.gamestates(actions=match_actions)
        match_feats = pd.concat([fn(match_states) for fn in xfns], axis=1)
        features.append(match_feats)
    features = pd.concat(features).reset_index(drop=True)

    return features

In [20]:
features = features_transform(data_names)
features

100%|██████████| 380/380 [00:08<00:00, 42.40it/s]


Unnamed: 0,type_pass_a0,type_cross_a0,type_throw_in_a0,type_freekick_crossed_a0,type_freekick_short_a0,type_corner_crossed_a0,type_corner_short_a0,type_take_on_a0,type_foul_a0,type_tackle_a0,...,time_seconds_a0,time_seconds_overall_a0,period_id_a1,time_seconds_a1,time_seconds_overall_a1,period_id_a2,time_seconds_a2,time_seconds_overall_a2,time_delta_1,time_delta_2
0,True,False,False,False,False,False,False,False,False,False,...,0.0,0.0,1,0.0,0.0,1,0.0,0.0,0.0,0.0
1,True,False,False,False,False,False,False,False,False,False,...,2.0,2.0,1,0.0,0.0,1,0.0,0.0,2.0,2.0
2,False,False,False,False,False,False,False,False,False,False,...,4.5,4.5,1,2.0,2.0,1,0.0,0.0,2.5,4.5
3,True,False,False,False,False,False,False,False,False,False,...,7.0,7.0,1,4.5,4.5,1,2.0,2.0,2.5,5.0
4,False,False,False,False,False,False,False,False,False,False,...,11.0,11.0,1,7.0,7.0,1,4.5,4.5,4.0,6.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622510,False,False,False,False,False,False,False,False,False,False,...,3444.0,6144.0,2,3440.0,6140.0,2,3439.0,6139.0,4.0,5.0
622511,False,False,True,False,False,False,False,False,False,False,...,3464.0,6164.0,2,3444.0,6144.0,2,3440.0,6140.0,20.0,24.0
622512,False,False,False,False,False,False,False,False,False,False,...,3466.0,6166.0,2,3464.0,6164.0,2,3444.0,6144.0,2.0,22.0
622513,True,False,False,False,False,False,False,False,False,False,...,3468.0,6168.0,2,3466.0,6166.0,2,3464.0,6164.0,2.0,4.0


In [22]:
def labels_transform(spadl):
    yfns = [lab.scores, lab.concedes]

    labels = []
    for game in tqdm(np.unique(spadl.game_id).tolist()):
        match_actions = spadl.loc[spadl.game_id == game].reset_index(drop=True)
        labels.append(pd.concat([fn(actions=match_actions) for fn in yfns], axis=1))

    labels = pd.concat(labels).reset_index(drop=True)

    return labels

In [23]:
labels = labels_transform(data_names)
labels

100%|██████████| 380/380 [00:12<00:00, 31.62it/s]


Unnamed: 0,scores,concedes
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
622510,False,False
622511,False,False
622512,False,False
622513,False,False


In [25]:
labels["concedes"].sum()

2027

## Usando modelo

In [32]:
import xgboost as xgb
import sklearn.metrics as mt
import socceraction.vaep.formula as fm

In [29]:
models = {}
models["scores"] = xgb.XGBClassifier(random_state=0, n_estimators=50, max_depth=3)
models["scores"].load_model("./data/models/vaep/scores.json")
models["concedes"] = xgb.XGBClassifier(random_state=0, n_estimators=50, max_depth=3)
models["concedes"].load_model("./data/models/vaep/concedes.json")
models

{'scores': XGBClassifier(base_score=None, booster=None, callbacks=None,
               colsample_bylevel=None, colsample_bynode=None,
               colsample_bytree=None, early_stopping_rounds=None,
               enable_categorical=False, eval_metric=None,
               feature_types=['i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i',
                              'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i',
                              'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', ...],
               gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
               interaction_constraints=None, learning_rate=None, max_bin=None,
               max_cat_threshold=None, max_cat_to_onehot=None,
               max_delta_step=None, max_depth=3, max_leaves=None,
               min_child_weight=None, missing=nan, monotone_constraints=None,
               n_estimators=50, n_jobs=None, num_parallel_tree=None,
               predictor=None, random_state=0, ...),
 'conce

In [30]:
def generate_predictions(features, models):
    preds = {}
    for m in ['scores', 'concedes']:
        preds[m] = models[m].predict_proba(features)[:, 1]
    preds = pd.DataFrame(preds)

    return preds

In [31]:
preds = generate_predictions(features=features, models=models)
preds

Unnamed: 0,scores,concedes
0,0.003258,0.000412
1,0.002625,0.000388
2,0.002851,0.000346
3,0.018931,0.000291
4,0.018182,0.001048
...,...,...
622510,0.035918,0.002448
622511,0.022927,0.008200
622512,0.029812,0.001892
622513,0.007555,0.005057


In [54]:
def calculate_action_values(spadl, predictions):
    action_values = fm.value(actions=spadl, Pscores=predictions['scores'], Pconcedes=predictions['concedes'])
    action_values = pd.concat([
        spadl[['original_event_id', 'player', 'action_id', 'game_id', 'player_id', 'start_x', 'start_y', 'end_x', 'end_y', 'type_name', 'result_name']],
        predictions.rename(columns={'scores': 'Pscores', 'concedes': 'Pconcedes'}),
        action_values
    ], axis=1)

    return action_values

In [55]:
data_names.reset_index(drop=True, inplace=True)
action_values = calculate_action_values(spadl=data_names, predictions=preds)
action_values

Unnamed: 0,original_event_id,player,action_id,game_id,player_id,start_x,start_y,end_x,end_y,type_name,result_name,Pscores,Pconcedes,offensive_value,defensive_value,vaep_value
0,2210296239,James Ward-Prowse,0,1485186,105172.0,52.605,33.592,71.400,36.244,pass,success,0.003258,0.000412,0.000000,-0.000000,0.000000
1,2210296365,Jack Stephens,1,1485186,92550.0,71.610,35.904,26.880,8.772,pass,fail,0.002625,0.000388,-0.000633,0.000025,-0.000609
2,2210296409,Cheikhou Kouyaté,2,1485186,66741.0,29.190,13.532,43.470,14.416,pass,fail,0.002851,0.000346,0.002464,0.002279,0.004743
3,2210296601,Oriol Romeu,3,1485186,90780.0,37.065,11.016,7.455,18.632,pass,fail,0.018931,0.000291,0.018585,0.002560,0.021146
4,2210296737,Joel Ward,4,1485186,43105.0,6.825,20.876,12.600,26.792,pass,fail,0.018182,0.001048,0.017892,0.017883,0.035775
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622510,2302283833,Brandon Williams,1772,1485563,384143.0,97.860,59.840,91.140,68.000,clearance,success,0.035918,0.002448,0.032325,0.012877,0.045203
622511,2302283923,Marçal,1773,1485563,121718.0,91.140,68.000,99.330,46.512,throw_in,fail,0.022927,0.008200,0.022927,-0.008200,0.014726
622512,2302283933,Brandon Williams,1774,1485563,384143.0,95.865,49.640,79.590,55.488,clearance,success,0.029812,0.001892,0.021611,0.021034,0.042646
622513,2302283949,João Moutinho,1775,1485563,16161.0,79.590,55.488,89.880,56.100,pass,fail,0.007555,0.005057,0.005663,0.024755,0.030418


## Mergeando dados

### Schedule

In [59]:
epl_schedule = pd.read_pickle('./data/augusto/bkp_premier_league_2021_whoscored.pkl')
# para ser usado no epl schedule
epl_rename_map = {
    'Leeds' : 'Leeds United',
    'Leicester': 'Leicester City',
    'Manchester United': 'Manchester Utd',
    'Newcastle': 'Newcastle Utd' ,
    'Sheffield United': 'Sheffield Utd',
    'West Bromwich': 'West Brom',
    'Wolverhampton': 'Wolves'
}
epl_schedule.home_team = epl_schedule.home_team.replace(epl_rename_map)
epl_schedule.away_team = epl_schedule.away_team.replace(epl_rename_map)
schedule = pd.read_pickle('./data/schedule2021.pkl')
joined_schedule = epl_schedule.merge(schedule, on=['home_team', 'away_team'])
print(joined_schedule.shape)
joined_schedule.head(5)

(380, 12)


Unnamed: 0,date,home_team,away_team,game_id_x,url,stage,game_id_y,home_points,away_points,home_result,away_result,full_dt
0,2020-09-12 15:00:00,Crystal Palace,Southampton,1485186,https://1xbet.whoscored.com/Matches/1485186/Li...,,db261cb0,1,0,win,lose,2020-09-12 15:00:00
1,2020-09-12 12:30:00,Fulham,Arsenal,1485187,https://1xbet.whoscored.com/Matches/1485187/Li...,,bf52349b,0,3,lose,win,2020-09-12 12:30:00
2,2020-09-12 17:30:00,Liverpool,Leeds United,1485188,https://1xbet.whoscored.com/Matches/1485188/Li...,,21b58926,4,3,win,lose,2020-09-12 17:30:00
3,2020-09-12 20:00:00,West Ham,Newcastle Utd,1485191,https://1xbet.whoscored.com/Matches/1485191/Li...,,78495ced,0,2,lose,win,2020-09-12 20:00:00
4,2020-09-13 16:30:00,Tottenham,Everton,1485189,https://1xbet.whoscored.com/Matches/1485189/Li...,,fc7f9aa1,0,1,lose,win,2020-09-13 16:30:00


### Merge

In [56]:
from unidecode import unidecode

In [57]:
def remove_special_characters(series):
    return series.apply(lambda x: unidecode(str(x)))

In [60]:
vaep = action_values[['game_id', 'player', 'vaep_value']].groupby(['game_id', 'player']).agg({'vaep_value': 'sum'}).reset_index()
vaep.player = remove_special_characters(vaep.player)
vaep.to_pickle('./data/bkp_vaep_premier_league_2021.pkl')
vaep = vaep.merge(joined_schedule[['game_id_x', 'game_id_y']], left_on='game_id', right_on='game_id_x')
vaep

Unnamed: 0,game_id,player,vaep_value,game_id_x,game_id_y
0,1485184,Aaron Wan-Bissaka,0.286879,1485184,2c8aa556
1,1485184,Anthony Martial,0.200445,1485184,2c8aa556
2,1485184,Ashley Barnes,-0.054159,1485184,2c8aa556
3,1485184,Ashley Westwood,0.408179,1485184,2c8aa556
4,1485184,Ben Mee,0.270509,1485184,2c8aa556
...,...,...,...,...,...
10326,1485563,Rui Patricio,-0.148446,1485563,2c081c94
10327,1485563,Ruben Neves,0.085313,1485563,2c081c94
10328,1485563,Shola Shoretire,-0.012415,1485563,2c081c94
10329,1485563,Willian Jose,-0.180282,1485563,2c081c94


In [63]:
dataset = pd.read_csv('./data/augusto/input_data.csv',index_col=0)
dataset.player = remove_special_characters(dataset.player)

dataset = dataset.merge(vaep[['game_id_y', 'player', 'vaep_value']], left_on=['game_id', 'player'], right_on=['game_id_y', 'player'])
dataset

Unnamed: 0,game_id,home_team,away_team,home_points,away_points,home_result,away_result,full_dt,player,team,...,last_completed_passes_perc,last_result,last_xg,last_xag,last_full_dt,days_from_last_game,played_at_home,Season_End_Year,game_id_y,vaep_value
0,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00,Ainsley Maitland-Niles,Arsenal,...,0.0,0,0.0,0.0,,14.0,False,2021,bf52349b,0.442736
1,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00,Ainsley Maitland-Niles,Arsenal,...,0.0,0,0.0,0.0,,14.0,False,2022,bf52349b,0.442736
2,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00,Alexandre Lacazette,Arsenal,...,0.0,0,0.0,0.0,,14.0,False,2021,bf52349b,0.498623
3,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00,Alexandre Lacazette,Arsenal,...,0.0,0,0.0,0.0,,14.0,False,2022,bf52349b,0.498623
4,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00,Bernd Leno,Arsenal,...,0.0,0,0.0,0.0,,14.0,False,2021,bf52349b,-1.273726
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19967,2c081c94,Wolves,Manchester Utd,1,2,lose,win,2021-05-23 16:00:00,Ruben Neves,Wolverhampton Wanderers,...,86.3,lose,0.0,0.0,2021-05-19 18:00:00,3.0,False,2022,2c081c94,0.085313
19968,2c081c94,Wolves,Manchester Utd,1,2,lose,win,2021-05-23 16:00:00,Willian Jose,Wolverhampton Wanderers,...,44.4,lose,0.0,0.0,2021-05-19 18:00:00,3.0,False,2021,2c081c94,-0.180282
19969,2c081c94,Wolves,Manchester Utd,1,2,lose,win,2021-05-23 16:00:00,Willian Jose,Wolverhampton Wanderers,...,44.4,lose,0.0,0.0,2021-05-19 18:00:00,3.0,False,2022,2c081c94,-0.180282
19970,2c081c94,Wolves,Manchester Utd,1,2,lose,win,2021-05-23 16:00:00,Willy Boly,Wolverhampton Wanderers,...,81.5,lose,0.0,0.0,2021-05-19 18:00:00,3.0,False,2021,2c081c94,-0.104621


In [64]:
dataset.to_csv("./data/input_datas/input_data_vaep.csv")

# Padronizando Datasets

In [70]:
pd.set_option('display.max_columns', 500)

In [74]:
matches_with_weather = pd.read_pickle('./data/augusto/bkp_matches_w_weather.pkl')
matches_with_weather.head()

Unnamed: 0,Season_End_Year,Wk,Date,Team,HomeGoals,AwayGoals,Away,FTR,FDCOUK,City,Stadium,Capacity,Latitude,Longitude,Country,stadium,date,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun
0,1993,1,1992-08-15,Coventry City,2,1,Middlesbrough,H,Coventry,Coventry,Ricoh Arena,32609,52.448056,-1.495556,England,Ricoh Arena,1992-08-15,16.1,11.3,20.9,,,212.0,15.9,,1014.6,
1,1993,4,1992-08-26,Coventry City,0,1,QPR,A,Coventry,Coventry,Ricoh Arena,32609,52.448056,-1.495556,England,Ricoh Arena,1992-08-26,14.2,11.7,17.5,,,219.0,16.2,,,
2,1993,5,1992-08-29,Coventry City,0,2,Blackburn,A,Coventry,Coventry,Ricoh Arena,32609,52.448056,-1.495556,England,Ricoh Arena,1992-08-29,12.5,6.2,16.3,,,234.0,11.6,,1004.0,
3,1993,8,1992-09-14,Coventry City,1,0,Tottenham,H,Coventry,Coventry,Ricoh Arena,32609,52.448056,-1.495556,England,Ricoh Arena,1992-09-14,11.3,5.1,17.7,,,230.0,12.8,,1013.2,
4,1993,11,1992-10-03,Coventry City,2,2,Crystal Palace,D,Coventry,Coventry,Ricoh Arena,32609,52.448056,-1.495556,England,Ricoh Arena,1992-10-03,11.1,10.1,12.6,,,,12.2,,1007.0,


In [82]:
dataset_xt = pd.read_csv("./data/input_datas/input_data_xt.csv")
dataset_vaep = pd.read_csv("./data/input_datas/input_data_vaep.csv")

In [83]:
dataset_vaep.head()

Unnamed: 0.1,Unnamed: 0,game_id,home_team,away_team,home_points,away_points,home_result,away_result,full_dt,player,team,pos,player_age,time_played,shots_on_target,goals,touches,yellow_cards,completed_passes_perc,xg,xag,result,last_time_played,last_shots_on_target,last_goals,last_touches,last_yellow_cards,last_completed_passes_perc,last_result,last_xg,last_xag,last_full_dt,days_from_last_game,played_at_home,Season_End_Year,game_id_y,vaep_value
0,0,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00,Ainsley Maitland-Niles,Arsenal,LM,23,90.0,0,0,49,0,74.4,0.0,0.0,win,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,,14.0,False,2021,bf52349b,0.442736
1,1,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00,Ainsley Maitland-Niles,Arsenal,LM,23,90.0,0,0,49,0,74.4,0.0,0.0,win,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,,14.0,False,2022,bf52349b,0.442736
2,2,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00,Alexandre Lacazette,Arsenal,FW,29,86.0,1,1,27,0,82.4,0.9,0.1,win,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,,14.0,False,2021,bf52349b,0.498623
3,3,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00,Alexandre Lacazette,Arsenal,FW,29,86.0,1,1,27,0,82.4,0.9,0.1,win,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,,14.0,False,2022,bf52349b,0.498623
4,4,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00,Bernd Leno,Arsenal,GK,28,90.0,0,0,34,0,100.0,0.0,0.0,win,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,,14.0,False,2021,bf52349b,-1.273726


In [84]:
dataset_vaep.columns

Index(['Unnamed: 0', 'game_id', 'home_team', 'away_team', 'home_points',
       'away_points', 'home_result', 'away_result', 'full_dt', 'player',
       'team', 'pos', 'player_age', 'time_played', 'shots_on_target', 'goals',
       'touches', 'yellow_cards', 'completed_passes_perc', 'xg', 'xag',
       'result', 'last_time_played', 'last_shots_on_target', 'last_goals',
       'last_touches', 'last_yellow_cards', 'last_completed_passes_perc',
       'last_result', 'last_xg', 'last_xag', 'last_full_dt',
       'days_from_last_game', 'played_at_home', 'Season_End_Year', 'game_id_y',
       'vaep_value'],
      dtype='object')

In [88]:
mid_columns_to_use = ["home_team", "away_team", "player", "team", "pos", "player_age", "last_time_played", "last_shots_on_target",
                      "last_goals", "last_touches", "last_yellow_cards", "last_completed_passes_perc", "last_result", "last_xg",
                      "last_xag", "days_from_last_game", "played_at_home", "Season_End_Year", "target"]

In [86]:
positions_to_use = ["AM", "CM", "FW", "LM", "LW", "MF", "RM", "RW"]

## xG dataset

In [89]:
dataset_xg = dataset_vaep.copy()
dataset_xg["target"] = dataset_xg["xg"]

dataset_xg = dataset_xg[mid_columns_to_use]
dataset_xg = dataset_xg[dataset_xg["pos"].isin(positions_to_use)]
dataset_xg

Unnamed: 0,home_team,away_team,player,team,pos,player_age,last_time_played,last_shots_on_target,last_goals,last_touches,last_yellow_cards,last_completed_passes_perc,last_result,last_xg,last_xag,days_from_last_game,played_at_home,Season_End_Year,target
0,Fulham,Arsenal,Ainsley Maitland-Niles,Arsenal,LM,23,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,0.0
1,Fulham,Arsenal,Ainsley Maitland-Niles,Arsenal,LM,23,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2022,0.0
2,Fulham,Arsenal,Alexandre Lacazette,Arsenal,FW,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,0.9
3,Fulham,Arsenal,Alexandre Lacazette,Arsenal,FW,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2022,0.9
6,Fulham,Arsenal,Dani Ceballos,Arsenal,CM,24,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19959,Wolves,Manchester Utd,Morgan Gibbs-White,Wolverhampton Wanderers,CM,21,67.0,1.0,0.0,38.0,0.0,85.2,lose,0.1,0.0,3.0,False,2022,0.1
19966,Wolves,Manchester Utd,Ruben Neves,Wolverhampton Wanderers,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,lose,0.0,0.0,3.0,False,2021,0.1
19967,Wolves,Manchester Utd,Ruben Neves,Wolverhampton Wanderers,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,lose,0.0,0.0,3.0,False,2022,0.1
19968,Wolves,Manchester Utd,Willian Jose,Wolverhampton Wanderers,FW,29,23.0,0.0,0.0,14.0,0.0,44.4,lose,0.0,0.0,3.0,False,2021,0.2


## xT dataset

In [104]:
dataset_xt["target"] = dataset_xt["action_value"]

dataset_xt = dataset_xt[mid_columns_to_use]
dataset_xt = dataset_xt[dataset_xt["pos"].isin(positions_to_use)]
dataset_xt

Unnamed: 0,home_team,away_team,player,team,pos,player_age,last_time_played,last_shots_on_target,last_goals,last_touches,last_yellow_cards,last_completed_passes_perc,last_result,last_xg,last_xag,days_from_last_game,played_at_home,Season_End_Year,target
0,Fulham,Arsenal,Ainsley Maitland-Niles,Arsenal,LM,23,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2022,0.017502
1,Fulham,Arsenal,Alexandre Lacazette,Arsenal,FW,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2022,-0.015915
3,Fulham,Arsenal,Dani Ceballos,Arsenal,CM,24,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2022,0.023188
4,Fulham,Arsenal,Eddie Nketiah,Arsenal,FW,21,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2022,0.000589
5,Fulham,Arsenal,Granit Xhaka,Arsenal,CM,27,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2022,0.112550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9894,Wolves,Manchester Utd,Joao Moutinho,Wolverhampton Wanderers,CM,34,79.0,0.0,0.0,83.0,0.0,76.0,lose,0.0,0.1,3.0,False,2022,0.059596
9895,Wolves,Manchester Utd,Leander Dendoncker,Wolverhampton Wanderers,CM,26,11.0,0.0,0.0,10.0,0.0,100.0,lose,0.0,0.0,3.0,False,2022,0.050709
9896,Wolves,Manchester Utd,Morgan Gibbs-White,Wolverhampton Wanderers,CM,21,67.0,1.0,0.0,38.0,0.0,85.2,lose,0.1,0.0,3.0,False,2022,0.087964
9900,Wolves,Manchester Utd,Ruben Neves,Wolverhampton Wanderers,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,lose,0.0,0.0,3.0,False,2022,0.514115


## VAEP dataset

In [105]:
dataset_vaep["target"] = dataset_vaep["vaep_value"]

dataset_vaep = dataset_vaep[mid_columns_to_use]
dataset_vaep = dataset_vaep[dataset_vaep["pos"].isin(positions_to_use)]
dataset_vaep

Unnamed: 0,home_team,away_team,player,team,pos,player_age,last_time_played,last_shots_on_target,last_goals,last_touches,last_yellow_cards,last_completed_passes_perc,last_result,last_xg,last_xag,days_from_last_game,played_at_home,Season_End_Year,target
0,Fulham,Arsenal,Ainsley Maitland-Niles,Arsenal,LM,23,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,0.442736
1,Fulham,Arsenal,Ainsley Maitland-Niles,Arsenal,LM,23,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2022,0.442736
2,Fulham,Arsenal,Alexandre Lacazette,Arsenal,FW,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,0.498623
3,Fulham,Arsenal,Alexandre Lacazette,Arsenal,FW,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2022,0.498623
6,Fulham,Arsenal,Dani Ceballos,Arsenal,CM,24,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,0.006840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19959,Wolves,Manchester Utd,Morgan Gibbs-White,Wolverhampton Wanderers,CM,21,67.0,1.0,0.0,38.0,0.0,85.2,lose,0.1,0.0,3.0,False,2022,0.136599
19966,Wolves,Manchester Utd,Ruben Neves,Wolverhampton Wanderers,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,lose,0.0,0.0,3.0,False,2021,0.085313
19967,Wolves,Manchester Utd,Ruben Neves,Wolverhampton Wanderers,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,lose,0.0,0.0,3.0,False,2022,0.085313
19968,Wolves,Manchester Utd,Willian Jose,Wolverhampton Wanderers,FW,29,23.0,0.0,0.0,14.0,0.0,44.4,lose,0.0,0.0,3.0,False,2021,-0.180282


# Dados de Clima

In [122]:
def merge_with_weather(dataset):
    dataset = dataset.merge(matches_with_weather,left_on=['home_team', 'away_team', 'Season_End_Year'], right_on=['Team','Away', 'Season_End_Year'], how='inner')
    dataset.drop(columns=['home_team', 'away_team', 'team', 'FDCOUK', 'FTR', 'stadium', 'Date'], inplace=True)
    dataset.drop(columns=['tsun', 'Country', 'Stadium','City', 'HomeGoals', 'AwayGoals', 'date', 'Latitude', 'Longitude'], inplace=True)

    dataset[['snow', 'wpgt', 'prcp']] = dataset[['snow', 'wpgt', 'prcp']] .fillna(0)

    target = dataset["target"].copy()
    dataset.drop(columns=['wdir', 'pres', 'target'], inplace=True)
    dataset["target"] = target

    return dataset

In [123]:
dataset_xg_weather = merge_with_weather(dataset_xg)
dataset_xg_weather

Unnamed: 0,player,pos,player_age,last_time_played,last_shots_on_target,last_goals,last_touches,last_yellow_cards,last_completed_passes_perc,last_result,last_xg,last_xag,days_from_last_game,played_at_home,Season_End_Year,Wk,Team,Away,Capacity,tavg,tmin,tmax,prcp,snow,wspd,wpgt,target
0,Ainsley Maitland-Niles,LM,23,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,0.0
1,Alexandre Lacazette,FW,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,0.9
2,Dani Ceballos,CM,24,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,0.0
3,Eddie Nketiah,FW,21,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,0.0
4,Granit Xhaka,CM,27,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,0.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6884,Joao Moutinho,CM,34,79.0,0.0,0.0,83.0,0.0,76.0,lose,0.0,0.1,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.0
6885,Leander Dendoncker,CM,26,11.0,0.0,0.0,10.0,0.0,100.0,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.0
6886,Morgan Gibbs-White,CM,21,67.0,1.0,0.0,38.0,0.0,85.2,lose,0.1,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.1
6887,Ruben Neves,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.1


In [124]:
dataset_xt_weather = merge_with_weather(dataset_xt)
dataset_xt_weather

Unnamed: 0,player,pos,player_age,last_time_played,last_shots_on_target,last_goals,last_touches,last_yellow_cards,last_completed_passes_perc,last_result,last_xg,last_xag,days_from_last_game,played_at_home,Season_End_Year,Wk,Team,Away,Capacity,tavg,tmin,tmax,prcp,snow,wspd,wpgt,target
0,Andros Townsend,RM,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,True,2022,17,Crystal Palace,Southampton,26309,11.2,10.4,12.0,0.0,0.0,11.7,25.9,0.094984
1,Eberechi Eze,LM,22,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,True,2022,17,Crystal Palace,Southampton,26309,11.2,10.4,12.0,0.0,0.0,11.7,25.9,0.006211
2,James McArthur,CM,32,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,True,2022,17,Crystal Palace,Southampton,26309,11.2,10.4,12.0,0.0,0.0,11.7,25.9,0.026621
3,James McCarthy,CM,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,True,2022,17,Crystal Palace,Southampton,26309,11.2,10.4,12.0,0.0,0.0,11.7,25.9,-0.000674
4,Jeffrey Schlupp,LM,27,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,True,2022,17,Crystal Palace,Southampton,26309,11.2,10.4,12.0,0.0,0.0,11.7,25.9,0.031046
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2928,Joao Moutinho,CM,34,79.0,0.0,0.0,83.0,0.0,76.0,lose,0.0,0.1,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.059596
2929,Leander Dendoncker,CM,26,11.0,0.0,0.0,10.0,0.0,100.0,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.050709
2930,Morgan Gibbs-White,CM,21,67.0,1.0,0.0,38.0,0.0,85.2,lose,0.1,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.087964
2931,Ruben Neves,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.514115


In [125]:
dataset_vaep_weather = merge_with_weather(dataset_vaep)
dataset_vaep_weather

Unnamed: 0,player,pos,player_age,last_time_played,last_shots_on_target,last_goals,last_touches,last_yellow_cards,last_completed_passes_perc,last_result,last_xg,last_xag,days_from_last_game,played_at_home,Season_End_Year,Wk,Team,Away,Capacity,tavg,tmin,tmax,prcp,snow,wspd,wpgt,target
0,Ainsley Maitland-Niles,LM,23,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,0.442736
1,Alexandre Lacazette,FW,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,0.498623
2,Dani Ceballos,CM,24,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,0.006840
3,Eddie Nketiah,FW,21,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,0.007416
4,Granit Xhaka,CM,27,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,0.418577
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6884,Joao Moutinho,CM,34,79.0,0.0,0.0,83.0,0.0,76.0,lose,0.0,0.1,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.098399
6885,Leander Dendoncker,CM,26,11.0,0.0,0.0,10.0,0.0,100.0,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.194685
6886,Morgan Gibbs-White,CM,21,67.0,1.0,0.0,38.0,0.0,85.2,lose,0.1,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.136599
6887,Ruben Neves,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,0.085313


# Dados de Distancia

In [126]:
travel_distances = pd.read_pickle("./data/bkp_travel_distances.pkl")
travel_distances.head()

Unnamed: 0,Season_End_Year,Wk,Date,Team,HomeGoals,AwayGoals,Away,FTR,FDCOUK,City,Stadium,Capacity,Latitude,Longitude,Country,home_travel_distance,away_travel_distance
0,1993,1,1992-08-15,Coventry City,2,1,Middlesbrough,H,Coventry,Coventry,Ricoh Arena,32609,52.448056,-1.495556,England,0.0,0.0
1,1993,4,1992-08-26,Coventry City,0,1,QPR,A,Coventry,Coventry,Ricoh Arena,32609,52.448056,-1.495556,England,135.610239,124.380354
2,1993,5,1992-08-29,Coventry City,0,2,Blackburn,A,Coventry,Coventry,Ricoh Arena,32609,52.448056,-1.495556,England,0.0,139.75998
3,1993,8,1992-09-14,Coventry City,1,0,Tottenham,H,Coventry,Coventry,Ricoh Arena,32609,52.448056,-1.495556,England,0.0,135.610239
4,1993,10,1992-09-26,Coventry City,1,1,Norwich City,D,Coventry,Coventry,Ricoh Arena,32609,52.448056,-1.495556,England,0.0,190.641456


In [134]:
def merge_with_distance(dataset):
    dataset = dataset.merge(travel_distances[["Team", "Away", "Season_End_Year", "home_travel_distance", "away_travel_distance"]], on=["Team", "Away", "Season_End_Year"])
    dataset["travel_distance"] = np.where(dataset["played_at_home"], dataset["home_travel_distance"], dataset["away_travel_distance"])
    target = dataset["target"]
    dataset = dataset.drop(["target", "home_travel_distance", "away_travel_distance"], axis=1)
    dataset["target"] = target

    return dataset

## xG dataset

In [135]:
dataset_xg_distance = merge_with_distance(dataset_xg_weather)
dataset_xg_distance

Unnamed: 0,player,pos,player_age,last_time_played,last_shots_on_target,last_goals,last_touches,last_yellow_cards,last_completed_passes_perc,last_result,last_xg,last_xag,days_from_last_game,played_at_home,Season_End_Year,Wk,Team,Away,Capacity,tavg,tmin,tmax,prcp,snow,wspd,wpgt,travel_distance,target
0,Ainsley Maitland-Niles,LM,23,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,5748.416939,0.0
1,Alexandre Lacazette,FW,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,5748.416939,0.9
2,Dani Ceballos,CM,24,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,5748.416939,0.0
3,Eddie Nketiah,FW,21,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,5748.416939,0.0
4,Granit Xhaka,CM,27,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,5748.416939,0.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6884,Joao Moutinho,CM,34,79.0,0.0,0.0,83.0,0.0,76.0,lose,0.0,0.1,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.0
6885,Leander Dendoncker,CM,26,11.0,0.0,0.0,10.0,0.0,100.0,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.0
6886,Morgan Gibbs-White,CM,21,67.0,1.0,0.0,38.0,0.0,85.2,lose,0.1,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.1
6887,Ruben Neves,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.1


## xT dataset

In [136]:
dataset_xt_distance = merge_with_distance(dataset_xt_weather)
dataset_xt_distance

Unnamed: 0,player,pos,player_age,last_time_played,last_shots_on_target,last_goals,last_touches,last_yellow_cards,last_completed_passes_perc,last_result,last_xg,last_xag,days_from_last_game,played_at_home,Season_End_Year,Wk,Team,Away,Capacity,tavg,tmin,tmax,prcp,snow,wspd,wpgt,travel_distance,target
0,Andros Townsend,RM,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,True,2022,17,Crystal Palace,Southampton,26309,11.2,10.4,12.0,0.0,0.0,11.7,25.9,0.000000,0.094984
1,Eberechi Eze,LM,22,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,True,2022,17,Crystal Palace,Southampton,26309,11.2,10.4,12.0,0.0,0.0,11.7,25.9,0.000000,0.006211
2,James McArthur,CM,32,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,True,2022,17,Crystal Palace,Southampton,26309,11.2,10.4,12.0,0.0,0.0,11.7,25.9,0.000000,0.026621
3,James McCarthy,CM,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,True,2022,17,Crystal Palace,Southampton,26309,11.2,10.4,12.0,0.0,0.0,11.7,25.9,0.000000,-0.000674
4,Jeffrey Schlupp,LM,27,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,True,2022,17,Crystal Palace,Southampton,26309,11.2,10.4,12.0,0.0,0.0,11.7,25.9,0.000000,0.031046
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2928,Joao Moutinho,CM,34,79.0,0.0,0.0,83.0,0.0,76.0,lose,0.0,0.1,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.059596
2929,Leander Dendoncker,CM,26,11.0,0.0,0.0,10.0,0.0,100.0,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.050709
2930,Morgan Gibbs-White,CM,21,67.0,1.0,0.0,38.0,0.0,85.2,lose,0.1,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.087964
2931,Ruben Neves,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.514115


## VAEP dataset

In [137]:
dataset_vaep_distance = merge_with_distance(dataset_vaep_weather)
dataset_vaep_distance

Unnamed: 0,player,pos,player_age,last_time_played,last_shots_on_target,last_goals,last_touches,last_yellow_cards,last_completed_passes_perc,last_result,last_xg,last_xag,days_from_last_game,played_at_home,Season_End_Year,Wk,Team,Away,Capacity,tavg,tmin,tmax,prcp,snow,wspd,wpgt,travel_distance,target
0,Ainsley Maitland-Niles,LM,23,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,5748.416939,0.442736
1,Alexandre Lacazette,FW,29,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,5748.416939,0.498623
2,Dani Ceballos,CM,24,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,5748.416939,0.006840
3,Eddie Nketiah,FW,21,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,5748.416939,0.007416
4,Granit Xhaka,CM,27,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,14.0,False,2021,1,Fulham,Arsenal,25700,16.2,11.7,21.3,0.3,0.0,14.3,31.5,5748.416939,0.418577
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6884,Joao Moutinho,CM,34,79.0,0.0,0.0,83.0,0.0,76.0,lose,0.0,0.1,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.098399
6885,Leander Dendoncker,CM,26,11.0,0.0,0.0,10.0,0.0,100.0,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.194685
6886,Morgan Gibbs-White,CM,21,67.0,1.0,0.0,38.0,0.0,85.2,lose,0.1,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.136599
6887,Ruben Neves,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,lose,0.0,0.0,3.0,False,2022,3,Wolves,Manchester Utd,27828,14.4,10.7,19.2,0.0,0.0,10.8,24.1,194.088306,0.085313


# Salvando Dataset

In [138]:
dataset_xg_distance.to_parquet("./data/final_datasets/dataset_xg.parquet")
dataset_xt_distance.to_parquet("./data/final_datasets/dataset_xt.parquet")
dataset_vaep_distance.to_parquet("./data/final_datasets/dataset_vaep.parquet")