In [1]:
import pandas as pd
import soccerdata as sd

In [22]:
fbref = sd.FBref(leagues="ENG-Premier League", seasons=2020, no_cache=False)
schedule = fbref.read_schedule()
schedule = schedule[schedule.game_id.notna()]
schedule.loc[:, 'full_dt'] = pd.to_datetime(schedule.date.astype(str) + ' ' + schedule.time )
schedule.sort_values(by='full_dt', inplace=True)
schedule.loc[:, ['home_points', 'away_points']] = schedule.score.str.split('–', expand=True).values
schedule['home_result'] = 'draw'
schedule['away_result'] = 'draw'

schedule.loc[schedule['home_points'] > schedule['away_points'], 'home_result'] = 'win'
schedule.loc[schedule['home_points'] > schedule['away_points'], 'away_result'] = 'lose'
schedule.loc[schedule['home_points'] < schedule['away_points'], 'home_result'] = 'lose'
schedule.loc[schedule['home_points'] < schedule['away_points'], 'away_result'] = 'win'

schedule = schedule.reset_index().loc[:, ['game_id', 'home_team','away_team','home_points', 'away_points',  'home_result', 'away_result', 'full_dt']]

# DATAFRAME CONTENDO OS JOGOS ORDENADOS PELA DATA CONTENDO OS RESULTADOS PARA OS TIMES
schedule.head(5)

Unnamed: 0,game_id,home_team,away_team,home_points,away_points,home_result,away_result,full_dt
0,bf52349b,Fulham,Arsenal,0,3,lose,win,2020-09-12 12:30:00
1,db261cb0,Crystal Palace,Southampton,1,0,win,lose,2020-09-12 15:00:00
2,21b58926,Liverpool,Leeds United,4,3,win,lose,2020-09-12 17:30:00
3,78495ced,West Ham,Newcastle Utd,0,2,lose,win,2020-09-12 20:00:00
4,7dd01ca9,West Brom,Leicester City,0,3,lose,win,2020-09-13 14:00:00


In [4]:
def aux(year):
    fbref = sd.FBref(leagues="ENG-Premier League", seasons=year, no_cache=False)
    schedule = fbref.read_schedule()
    schedule = schedule[schedule.game_id.notna()]
    schedule.loc[:, 'full_dt'] = pd.to_datetime(schedule.date.astype(str) + ' ' + schedule.time )
    schedule.sort_values(by='full_dt', inplace=True)
    schedule.loc[:, ['home_points', 'away_points']] = schedule.score.str.split('–', expand=True).values
    schedule['home_result'] = 'draw'
    schedule['away_result'] = 'draw'

    schedule.loc[schedule['home_points'] > schedule['away_points'], 'home_result'] = 'win'
    schedule.loc[schedule['home_points'] > schedule['away_points'], 'away_result'] = 'lose'
    schedule.loc[schedule['home_points'] < schedule['away_points'], 'home_result'] = 'lose'
    schedule.loc[schedule['home_points'] < schedule['away_points'], 'away_result'] = 'win'

    schedule = schedule.reset_index().loc[:, ['game_id', 'home_team','away_team','home_points', 'away_points',  'home_result', 'away_result', 'full_dt']]

    # DATAFRAME CONTENDO OS JOGOS ORDENADOS PELA DATA CONTENDO OS RESULTADOS PARA OS TIMES
    return schedule
    
schedules = {year : aux(year) for year in range(2021, 2022)}



In [6]:
def match_data(match_id):
    """ get match data based on id """
    players_match_stats = fbref.read_player_match_stats(stat_type="summary", match_id=match_id)
    players_match_stats = players_match_stats.reset_index().set_index('game_id').sort_index(axis=1,sort_remaining=True)
    players_match_stats = players_match_stats.drop(columns=['season', 'league', 'nation', 'jersey_number'])
    return players_match_stats

In [27]:
def preprocess_match_data(match_data):
    pmd = pd.DataFrame()
    pmd.loc[:, 'player'] = match_data.player
    pmd.loc[:, 'team'] = match_data.team
    pmd.loc[:, 'pos'] = match_data.pos

    pmd.loc[:, 'player_age'] = match_data.age.str.split('-').str.get(0)
    pmd.loc[:, 'time_played'] = match_data['min']
    pmd.loc[:, 'shots_on_target'] = match_data[('Performance', 'SoT')]
    pmd.loc[:, 'goals'] = match_data[('Performance', 'Gls')]
    pmd.loc[:, 'touches'] = match_data[('Performance', 'Touches')]
    pmd.loc[:, 'yellow_cards'] = match_data[('Performance', 'CrdY')]
    pmd.loc[:, 'completed_passes_perc'] = match_data[('Passes', 'Cmp%')]
    
    pmd.loc[:, 'xg'] = match_data[('Expected', 'xG')]
    pmd.loc[:, 'xag'] = match_data[('Expected', 'xAG')]
                                        
    return pmd

In [35]:
# raw_matches_data = match_data(schedule.game_id.to_list())
# raw_matches_data.to_pickle('bkp_raw_matches.pkl')
raw_matches_data = pd.read_pickle('bkp_raw_matches.pkl')
matches_data = preprocess_match_data(raw_matches_data)
matches_data

Unnamed: 0_level_0,player,team,pos,player_age,time_played,shots_on_target,goals,touches,yellow_cards,completed_passes_perc,xg,xag
game_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
db261cb0,Andros Townsend,Crystal Palace,RM,29,90,0,0,38,0,43.3,0.0,0.6
db261cb0,Cheikhou Kouyaté,Crystal Palace,CB,30,90,1,0,25,0,57.9,0.2,0.0
db261cb0,Eberechi Eze,Crystal Palace,LM,22,10,0,0,13,0,80.0,0.0,0.0
db261cb0,James McArthur,Crystal Palace,CM,32,90,0,0,45,0,46.9,0.1,0.4
db261cb0,James McCarthy,Crystal Palace,CM,29,73,0,0,29,1,60.9,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2c081c94,Romain Saïss,Wolverhampton Wanderers,CB,31,90,0,0,90,0,88.8,0.1,0.0
2c081c94,Rui Patrício,Wolverhampton Wanderers,GK,33,90,0,0,14,0,91.7,0.0,0.0
2c081c94,Rúben Neves,Wolverhampton Wanderers,CM,24,90,0,0,98,0,86.0,0.1,0.1
2c081c94,Willian José,Wolverhampton Wanderers,FW,29,64,1,0,22,0,72.2,0.2,0.0


In [7]:
for year, schedule in schedules.items():
    print(year)
    fbref = sd.FBref(leagues="ENG-Premier League", seasons=year, no_cache=False)
    raw_matches_data = match_data(schedule.game_id.to_list())
    raw_matches_data.to_pickle(f'matches_tmp/bkp_raw_matches_{year}.pkl')

2020


In [9]:

positions = set(','.join(matches_data.pos).split(','))
positions = {
    'AM': 'Attacking Midfielder',
    'CB': 'Center Back',
    'CM': 'Center Midfielder',
    'DF': 'Defense',
    'DM': 'Defense Midfielder',
    'FW': 'Forward',
    'GK': 'Goalkeeper',
    'LB': 'Left Back',
    'LM': 'Left Midfielder',
    'LW': 'Left Wing',
    'MF': 'Midfielder',
    'RB': 'Right Back',
    'RM': 'Right Midfielder',
    'RW': 'Right Wing',
    'WB': 'Wing Back'
}
['CM', 'LM', 'RM', 'MF', 'FW', 'AM']

In [55]:

import numpy as np 

full_train_data = []
for year, schedule in schedules.items():
    if year < 2017:
        continue
    raw_matches_data = pd.read_pickle(f'matches_tmp/bkp_raw_matches_{year}.pkl')
    matches_data = preprocess_match_data(raw_matches_data)
    df = pd.merge(left=schedule, right=matches_data, left_on='game_id', right_index=True)
    print(year, df.full_dt.max())

    train_data = df.copy()
    train_data.loc[:, 'result'] = np.where(train_data.team == train_data.home_team, train_data.home_result, train_data.away_result)
    train_data.drop(columns=['game_id', 'home_points', 'away_points', 'home_result', 'away_result'], inplace=True)

    for c in ['time_played', 'shots_on_target', 'goals', 'touches', 'yellow_cards', 'completed_passes_perc', 'result']:
        train_data[c] = train_data.groupby('player')[c].shift().fillna(0)
        
    for c in ['xg', 'xag',  'full_dt']:
        train_data['last_' + c] = train_data.groupby('player')[c].shift()

    train_data.loc[:, 'days_from_last_game'] = (train_data['full_dt'] - train_data.last_full_dt).dt.days.fillna(14.0)
    train_data.last_xg.fillna(0, inplace=True)
    train_data.last_xag.fillna(0, inplace=True)

    train_data.loc[:, 'played_at_home'] = train_data.team == train_data.home_team
    train_data.loc[:, 'Season_End_Year'] = year+1
    
    train_data.drop(columns=['full_dt', 'last_full_dt'], inplace=True)
    
    full_train_data.append(train_data)
    
full_train_df = pd.concat(full_train_data)
full_train_df

2017 2018-05-13 15:00:00
2018 2019-05-12 15:00:00
2019 2020-07-26 16:00:00
2020 2021-05-23 16:00:00


Unnamed: 0,home_team,away_team,player,team,pos,player_age,time_played,shots_on_target,goals,touches,yellow_cards,completed_passes_perc,xg,xag,result,last_xg,last_xag,days_from_last_game,played_at_home,Season_End_Year
0,Arsenal,Leicester City,Aaron Ramsey,Arsenal,"DM,CM",26,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0,0.0,0.0,14.0,True,2018
0,Arsenal,Leicester City,Alex Oxlade-Chamberlain,Arsenal,"WB,RB",23,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.1,0,0.0,0.0,14.0,True,2018
0,Arsenal,Leicester City,Alexandre Lacazette,Arsenal,"FW,LW,LM",26,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.4,0,0.0,0.0,14.0,True,2018
0,Arsenal,Leicester City,Danny Welbeck,Arsenal,"AM,FW",26,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.4,0,0.0,0.0,14.0,True,2018
0,Arsenal,Leicester City,Granit Xhaka,Arsenal,"CM,DM",24,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.2,0,0.0,0.0,14.0,True,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
379,Wolves,Manchester Utd,Romain Saïss,Wolverhampton Wanderers,CB,31,90.0,1.0,0.0,61.0,0.0,75.5,0.1,0.0,lose,0.2,0.0,7.0,False,2021
379,Wolves,Manchester Utd,Rui Patrício,Wolverhampton Wanderers,GK,33,90.0,0.0,0.0,26.0,0.0,77.3,0.0,0.0,lose,0.0,0.0,7.0,False,2021
379,Wolves,Manchester Utd,Rúben Neves,Wolverhampton Wanderers,CM,24,90.0,0.0,0.0,91.0,1.0,86.3,0.1,0.1,lose,0.0,0.0,3.0,False,2021
379,Wolves,Manchester Utd,Willian José,Wolverhampton Wanderers,FW,29,23.0,0.0,0.0,14.0,0.0,44.4,0.2,0.0,lose,0.0,0.0,3.0,False,2021


In [49]:
full_train_df.to_csv('input_data.csv')

In [50]:
ws = sd.WhoScored('ENG-Premier League', seasons=2021, no_cache=False)



In [2]:
epl_schedule = pd.read_pickle('bkp_premier_league_2021_whoscored.pkl')
# para ser usado no epl schedule
epl_rename_map = {
    'Leeds' : 'Leeds United',
    'Leicester': 'Leicester City',
    'Manchester United': 'Manchester Utd',
    'Newcastle': 'Newcastle Utd' ,
    'Sheffield United': 'Sheffield Utd',
    'West Bromwich': 'West Brom',
    'Wolverhampton': 'Wolves'
}
epl_schedule.home_team = epl_schedule.home_team.replace(epl_rename_map)
epl_schedule.away_team = epl_schedule.away_team.replace(epl_rename_map)
epl_schedule

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,date,home_team,away_team,game_id,url,stage
league,season,game,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ENG-Premier League,2021,2020-09-12 Crystal Palace-Southampton,2020-09-12 15:00:00,Crystal Palace,Southampton,1485186,https://1xbet.whoscored.com/Matches/1485186/Li...,
ENG-Premier League,2021,2020-09-12 Fulham-Arsenal,2020-09-12 12:30:00,Fulham,Arsenal,1485187,https://1xbet.whoscored.com/Matches/1485187/Li...,
ENG-Premier League,2021,2020-09-12 Liverpool-Leeds,2020-09-12 17:30:00,Liverpool,Leeds United,1485188,https://1xbet.whoscored.com/Matches/1485188/Li...,
ENG-Premier League,2021,2020-09-12 West Ham-Newcastle,2020-09-12 20:00:00,West Ham,Newcastle Utd,1485191,https://1xbet.whoscored.com/Matches/1485191/Li...,
ENG-Premier League,2021,2020-09-13 Tottenham-Everton,2020-09-13 16:30:00,Tottenham,Everton,1485189,https://1xbet.whoscored.com/Matches/1485189/Li...,
ENG-Premier League,2021,...,...,...,...,...,...,...
ENG-Premier League,2021,2021-05-23 Liverpool-Crystal Palace,2021-05-23 16:00:00,Liverpool,Crystal Palace,1485559,https://1xbet.whoscored.com/Matches/1485559/Li...,
ENG-Premier League,2021,2021-05-23 Manchester City-Everton,2021-05-23 16:00:00,Manchester City,Everton,1485560,https://1xbet.whoscored.com/Matches/1485560/Li...,
ENG-Premier League,2021,2021-05-23 Sheffield United-Burnley,2021-05-23 16:00:00,Sheffield Utd,Burnley,1485561,https://1xbet.whoscored.com/Matches/1485561/Li...,
ENG-Premier League,2021,2021-05-23 West Ham-Southampton,2021-05-23 16:00:00,West Ham,Southampton,1485562,https://1xbet.whoscored.com/Matches/1485562/Li...,


In [5]:
joined_schedule = epl_schedule.merge(schedules[2021], on=['home_team', 'away_team'])
print(joined_schedule.shape)
joined_schedule.head(5)

(380, 12)


Unnamed: 0,date,home_team,away_team,game_id_x,url,stage,game_id_y,home_points,away_points,home_result,away_result,full_dt
0,2020-09-12 15:00:00,Crystal Palace,Southampton,1485186,https://1xbet.whoscored.com/Matches/1485186/Li...,,db261cb0,1,0,win,lose,2020-09-12 15:00:00
1,2020-09-12 12:30:00,Fulham,Arsenal,1485187,https://1xbet.whoscored.com/Matches/1485187/Li...,,bf52349b,0,3,lose,win,2020-09-12 12:30:00
2,2020-09-12 17:30:00,Liverpool,Leeds United,1485188,https://1xbet.whoscored.com/Matches/1485188/Li...,,21b58926,4,3,win,lose,2020-09-12 17:30:00
3,2020-09-12 20:00:00,West Ham,Newcastle Utd,1485191,https://1xbet.whoscored.com/Matches/1485191/Li...,,78495ced,0,2,lose,win,2020-09-12 20:00:00
4,2020-09-13 16:30:00,Tottenham,Everton,1485189,https://1xbet.whoscored.com/Matches/1485189/Li...,,fc7f9aa1,0,1,lose,win,2020-09-13 16:30:00


In [112]:
ws = sd.WhoScored('ENG-Premier League', seasons=2021, no_cache=False)
all_matches = ws.read_events(match_id=joined_schedule.game_id_x.tolist(), output_fmt='spadl')




In [8]:
# all_matches.to_pickle('who_scored_premier_2021_events.pkl')
all_matches = pd.read_pickle('who_scored_premier_2021_events.pkl')
actions = all_matches.copy()

In [116]:
epl_schedule

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,date,home_team,away_team,game_id,url,stage
league,season,game,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ENG-Premier League,2021,2020-09-12 Crystal Palace-Southampton,2020-09-12 15:00:00,Crystal Palace,Southampton,1485186,https://1xbet.whoscored.com/Matches/1485186/Li...,
ENG-Premier League,2021,2020-09-12 Fulham-Arsenal,2020-09-12 12:30:00,Fulham,Arsenal,1485187,https://1xbet.whoscored.com/Matches/1485187/Li...,
ENG-Premier League,2021,2020-09-12 Liverpool-Leeds,2020-09-12 17:30:00,Liverpool,Leeds United,1485188,https://1xbet.whoscored.com/Matches/1485188/Li...,
ENG-Premier League,2021,2020-09-12 West Ham-Newcastle,2020-09-12 20:00:00,West Ham,Newcastle Utd,1485191,https://1xbet.whoscored.com/Matches/1485191/Li...,
ENG-Premier League,2021,2020-09-13 Tottenham-Everton,2020-09-13 16:30:00,Tottenham,Everton,1485189,https://1xbet.whoscored.com/Matches/1485189/Li...,
ENG-Premier League,2021,...,...,...,...,...,...,...
ENG-Premier League,2021,2021-05-23 Liverpool-Crystal Palace,2021-05-23 16:00:00,Liverpool,Crystal Palace,1485559,https://1xbet.whoscored.com/Matches/1485559/Li...,
ENG-Premier League,2021,2021-05-23 Manchester City-Everton,2021-05-23 16:00:00,Manchester City,Everton,1485560,https://1xbet.whoscored.com/Matches/1485560/Li...,
ENG-Premier League,2021,2021-05-23 Sheffield United-Burnley,2021-05-23 16:00:00,Sheffield Utd,Burnley,1485561,https://1xbet.whoscored.com/Matches/1485561/Li...,
ENG-Premier League,2021,2021-05-23 West Ham-Southampton,2021-05-23 16:00:00,West Ham,Southampton,1485562,https://1xbet.whoscored.com/Matches/1485562/Li...,


In [133]:
epl_schedule.home_team.unique()

array(['Crystal Palace', 'Fulham', 'Liverpool', 'West Ham', 'Tottenham',
       'West Brom', 'Brighton', 'Sheffield Utd', 'Arsenal', 'Everton',
       'Leeds United', 'Manchester Utd', 'Chelsea', 'Leicester City',
       'Newcastle Utd', 'Southampton', 'Aston Villa', 'Wolves', 'Burnley',
       'Manchester City'], dtype=object)

In [9]:
team_df = actions[['team_id', 'team']].drop_duplicates()
team_map = {
    'Leeds' : 'Leeds United',
    'Leicester': 'Leicester City',
    'Man Utd': 'Manchester Utd',
    'Newcastle': 'Newcastle Utd' ,
    'Sheff Utd': 'Sheffield Utd',
    'Man City' :'Manchester City'
}
team_df.loc[:, 'team_epl'] = team_df.team.replace(team_map)

home_team_id = epl_schedule[['game_id', 'home_team']].merge(team_df, left_on='home_team', right_on='team_epl')

In [142]:
home_team_id

Unnamed: 0,game_id,home_team,team_id,team,team_epl
0,1485186,Crystal Palace,162,Crystal Palace,Crystal Palace
1,1485206,Crystal Palace,162,Crystal Palace,Crystal Palace
2,1485225,Crystal Palace,162,Crystal Palace,Crystal Palace
3,1485277,Crystal Palace,162,Crystal Palace,Crystal Palace
4,1485242,Crystal Palace,162,Crystal Palace,Crystal Palace
...,...,...,...,...,...
375,1485382,Manchester City,167,Man City,Manchester City
376,1485550,Manchester City,167,Man City,Manchester City
377,1485508,Manchester City,167,Man City,Manchester City
378,1485503,Manchester City,167,Man City,Manchester City


In [10]:

actions.sort_values(by=['game_id', 'original_event_id'], inplace=True)

# match_home_id = team_matches.loc[(team_matches.matchId == g) & (team_matches.side == 'home'), 'teamId'].values[0]

spadl = []
import socceraction.spadl as spd
from tqdm import tqdm
for game_id, actions_subdf in tqdm(actions.groupby('game_id')):
    match_home_id = home_team_id[home_team_id.game_id == game_id].team_id.values[0]
    match_actions = spd.play_left_to_right(actions=actions, home_team_id=match_home_id)
    match_actions = spd.add_names(match_actions)
    spadl.append(match_actions)
spadl = pd.concat(spadl).reset_index(drop=True)

 76%|███████▌  | 289/380 [04:30<01:20,  1.13it/s]

In [115]:
import socceraction.xthreat as xthreat
url_grid = "https://karun.in/blog/data/open_xt_12x8_v1.json"
xTModel = xthreat.load_model(url_grid)

In [13]:
actions.loc[:, 'xT'] = xTModel.rate(actions)

In [36]:
X_train

Unnamed: 0,full_dt,player,team,pos,player_age,time_played,shots_on_target,goals,touches,yellow_cards,completed_passes_perc,result
0,2020-09-12 12:30:00,Ainsley Maitland-Niles,Arsenal,LM,23,90,0,0,49,0,74.4,win
0,2020-09-12 12:30:00,Alexandre Lacazette,Arsenal,FW,29,86,1,1,27,0,82.4,win
0,2020-09-12 12:30:00,Bernd Leno,Arsenal,GK,28,90,0,0,34,0,100.0,win
0,2020-09-12 12:30:00,Dani Ceballos,Arsenal,CM,24,12,0,0,20,0,78.9,win
0,2020-09-12 12:30:00,Eddie Nketiah,Arsenal,FW,21,4,0,0,1,0,100.0,win
...,...,...,...,...,...,...,...,...,...,...,...,...
379,2021-05-23 16:00:00,Romain Saïss,Wolverhampton Wanderers,CB,31,90,0,0,90,0,88.8,win
379,2021-05-23 16:00:00,Rui Patrício,Wolverhampton Wanderers,GK,33,90,0,0,14,0,91.7,win
379,2021-05-23 16:00:00,Rúben Neves,Wolverhampton Wanderers,CM,24,90,0,0,98,0,86.0,win
379,2021-05-23 16:00:00,Willian José,Wolverhampton Wanderers,FW,29,64,1,0,22,0,72.2,win


In [38]:
X_train

Unnamed: 0,full_dt,player,team,pos,player_age,time_played,shots_on_target,goals,touches,yellow_cards,completed_passes_perc,result
0,2020-09-12 12:30:00,Ainsley Maitland-Niles,Arsenal,LM,23,90,0,0,49,0,74.4,win
0,2020-09-12 12:30:00,Alexandre Lacazette,Arsenal,FW,29,86,1,1,27,0,82.4,win
0,2020-09-12 12:30:00,Bernd Leno,Arsenal,GK,28,90,0,0,34,0,100.0,win
0,2020-09-12 12:30:00,Dani Ceballos,Arsenal,CM,24,12,0,0,20,0,78.9,win
0,2020-09-12 12:30:00,Eddie Nketiah,Arsenal,FW,21,4,0,0,1,0,100.0,win
...,...,...,...,...,...,...,...,...,...,...,...,...
379,2021-05-23 16:00:00,Romain Saïss,Wolverhampton Wanderers,CB,31,90,0,0,90,0,88.8,win
379,2021-05-23 16:00:00,Rui Patrício,Wolverhampton Wanderers,GK,33,90,0,0,14,0,91.7,win
379,2021-05-23 16:00:00,Rúben Neves,Wolverhampton Wanderers,CM,24,90,0,0,98,0,86.0,win
379,2021-05-23 16:00:00,Willian José,Wolverhampton Wanderers,FW,29,64,1,0,22,0,72.2,win


In [44]:
X_train.merge(actions.groupby('player').xT.sum().reset_index(), how='outer')

Unnamed: 0,full_dt,player,team,pos,player_age,time_played,shots_on_target,goals,touches,yellow_cards,completed_passes_perc,result,xT
0,2020-09-12 12:30:00,Ainsley Maitland-Niles,Arsenal,LM,23.0,90.0,0.0,0.0,49.0,0.0,74.4,win,0.072553
1,2020-09-12 12:30:00,Alexandre Lacazette,Arsenal,FW,29.0,86.0,1.0,1.0,27.0,0.0,82.4,win,0.022638
2,2020-09-12 12:30:00,Bernd Leno,Arsenal,GK,28.0,90.0,0.0,0.0,34.0,0.0,100.0,win,-1.961479
3,2020-09-12 12:30:00,Dani Ceballos,Arsenal,CM,24.0,12.0,0.0,0.0,20.0,0.0,78.9,win,-0.023268
4,2020-09-12 12:30:00,Eddie Nketiah,Arsenal,FW,21.0,4.0,0.0,0.0,1.0,0.0,100.0,win,-0.000981
5,2020-09-12 12:30:00,Gabriel Dos Santos,Arsenal,CB,22.0,90.0,1.0,1.0,120.0,0.0,94.7,win,
6,2020-09-12 12:30:00,Granit Xhaka,Arsenal,CM,27.0,78.0,0.0,0.0,68.0,0.0,91.7,win,-0.092769
7,2020-09-12 12:30:00,Héctor Bellerín,Arsenal,RM,25.0,90.0,0.0,0.0,71.0,1.0,86.8,win,0.025177
8,2020-09-12 12:30:00,Kieran Tierney,Arsenal,CB,23.0,90.0,0.0,0.0,99.0,0.0,84.6,win,0.045531
9,2020-09-12 12:30:00,Mohamed Elneny,Arsenal,CM,28.0,90.0,0.0,0.0,68.0,0.0,95.2,win,-0.224297


In [28]:
players = pd.read_json('players.json')
y = players.loc[:, ['firstName', 'middleName', 'lastName', 'wyId']]

x = actions.loc[:, ['player_id']]
x.player_id = x.player_id.astype(int)

x.merge(y, left_on='player_id', right_on='wyId')

Unnamed: 0,player_id,firstName,middleName,lastName,wyId
