In [1]:
import pandas as pd
import networkx as nx
import numpy as np
import json
from collections import defaultdict
import utils
from tqdm.notebook import tqdm, tqdm_notebook
tqdm_notebook.pandas()

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [10]:
print(" -- HOME --")
print("")
for tournament in tqdm(utils.TOURNAMENTS, total= len(utils.TOURNAMENTS)):   
    print("TOURNAMENT: %s" %tournament)

    events = pd.read_json('events\events_%s.json' %tournament)
    matches = pd.read_json('matches\matches_%s.json' %tournament)
    teams = pd.read_json('teams.json')

    #tira penalidades
    events = events[events['matchPeriod'] != 'P']

    # adaptando coluna 'tags' para conseguir interpretar se evento foi bem sucedido ou não. 'id' = 1801 ou 1802
    events['tags'] = events['tags'].map(utils.converting_tags)

    # -- HOME --

    #fazer o mesmo do df das networks para esse aqui (criar df com features)
    dfh = matches[['wyId']].copy()
    dfh = dfh.rename(columns={'wyId':'matchID'})

    #colocar team1 e team2
    home_l = []
    away_l = []

    for match in matches['teamsData']:
        t0 = list(match.values())[0]
        t1 = list(match.values())[1]

        if t0['side'] == 'home':
            home_l.append(t0['teamId'])
            away_l.append(t1['teamId'])  
        else: 
            away_l.append(t0['teamId'])
            home_l.append(t1['teamId'])

    dfh['team1_ID'] = home_l
    dfh['team2_ID'] = away_l

    dfh['date'] = matches['date']

    dfh['tournament'] = tournament


    # GK_SAVES

    #contar quantos subEvents = 'Save attempt' com tag = 1801(bem sucedido) para todos os (match, team)
    gk_saves = events[(events['eventName'] == 'Save attempt') & (events['tags'].apply(lambda x: 1801 in x))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    gk_saves.columns = gk_saves.columns.droplevel(0)
    gk_saves = gk_saves.reset_index().rename(columns={'count':'gk_saves'})

    #juntar feature com df
    dfh = dfh.merge(gk_saves, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfh = dfh.rename(columns={"gk_saves": "gk_saves_T1"})


    #RED_CARDS

    #contar quantos subEvents = 'Foul' com tag = 1701(red card) ou 1703(second yellow card) para todos os (match, team)
    red_card = events[events['tags'].apply(lambda x: (1701 in x) or (1703 in x))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    red_card.columns = red_card.columns.droplevel(0)
    red_card = red_card.reset_index().rename(columns={'count':'red_card'})

    #juntar feature com df
    dfh = dfh.merge(red_card, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfh = dfh.rename(columns={"red_card": "red_card_T1"})


    #YELLOW_CARDS

    #contar quantos subEvents = 'Foul' com tag = 1702(yellow card) ou 1703(second yellow card) para todos os (match, team)
    yellow_card = events[events['tags'].apply(lambda x: (1702 in x) or (1703 in x))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    yellow_card.columns = yellow_card.columns.droplevel(0)
    yellow_card = yellow_card.reset_index().rename(columns={'count':'yellow_card'})

    #juntar feature com df
    dfh = dfh.merge(yellow_card, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfh = dfh.rename(columns={"yellow_card": "yellow_card_T1"})


    #ASSISTS

    assists = events[events['tags'].apply(lambda x: (301 in x))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    assists.columns = assists.columns.droplevel(0)
    assists = assists.reset_index().rename(columns={'count':'assists'})

    #juntar feature com df
    dfh = dfh.merge(assists, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfh = dfh.rename(columns={"assists": "assists_T1"})


    #FINALIZAÇÕES

    #contar quantos subEventName = 'Shot', 'Free kick shot' ou 'Penalty' para todos os (match, team)
    shots = events[events['subEventName'].isin(['Shot', 'Free kick shot', 'Penalty'])].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    shots.columns = shots.columns.droplevel(0)
    shots = shots.reset_index().rename(columns={'count':'shots'})

    #juntar feature com df
    dfh = dfh.merge(shots, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfh = dfh.merge(shots, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)

    dfh = dfh.rename(columns={"shots_x": "shots_T1", "shots_y": "shots_against_T1"})


    #CHUTES A GOL

    #contar quantos subEventName = 'Shot', 'Free kick shot' ou 'Penalty' com tag = 1801(accurate) para todos os (match, team)
    shots_on_target = events[(events['subEventName'].isin(['Shot', 'Free kick shot', 'Penalty'])) & (events['tags'].apply(lambda x: (1801 in x)))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    shots_on_target.columns = shots_on_target.columns.droplevel(0)
    shots_on_target = shots_on_target.reset_index().rename(columns={'count':'shots_on_target'})

    #juntar feature com df
    dfh = dfh.merge(shots_on_target, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfh = dfh.rename(columns={"shots_on_target": "shots_on_target_T1"})


    #QUANTIDADE DE PASSES

    passes = events[(events['eventName'] == 'Pass') & (events['tags'].apply(lambda x: (1801 in x)))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    passes.columns = passes.columns.droplevel(0)
    passes = passes.reset_index().rename(columns={'count':'passes'})

    #juntar feature com df
    dfh = dfh.merge(passes, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfh = dfh.merge(passes, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)

    dfh = dfh.rename(columns={"passes_x": "passes_T1", "passes_y": "passes_T2"})


    #GOLS

    goals = events[(events['eventName'] != 'Save attempt') & (events['tags'].apply(lambda x: (101 in x)))].groupby(['matchId', 'teamId']).agg({'eventId':['count']}).copy()
    goals.columns = goals.columns.droplevel(0)
    goals = goals.reset_index().rename(columns={'count':'goals'})

    own_goals = events[(events['eventName'] != 'Save attempt') & (events['tags'].apply(lambda x: (102 in x)))].groupby(['matchId', 'teamId']).agg({'eventId':['count']}).copy()
    own_goals.columns = own_goals.columns.droplevel(0)
    own_goals = own_goals.reset_index().rename(columns={'count':'own_goals'})

    #juntar feature com df
    dfh = dfh.merge(goals, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfh = dfh.merge(goals, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)

    dfh = dfh.merge(own_goals, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfh = dfh.merge(own_goals, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)

    dfh = dfh.rename(columns={"goals_x": "goals_T1", "goals_y": "goals_T2"})
    dfh = dfh.rename(columns={"own_goals_x": "own_goals_T1", "own_goals_y": "own_goals_T2"})

    dfh = dfh.fillna(0)

    dfh['goals_T1'] = dfh['goals_T1'] + dfh['own_goals_T2']
    dfh['goals_against_T1'] = dfh['goals_T2'] + dfh['own_goals_T1']

    dfh = dfh.drop(['own_goals_T1', 'own_goals_T2', 'goals_T2'], axis=1)


    #POSSE DE BOLA

    #passes dados / total de passes na partida
    dfh['ball_possession_T1'] = dfh['passes_T1'] / (dfh['passes_T1'] + dfh['passes_T2'])
    dfh = dfh.drop(['passes_T2'], axis=1)


    #ACCURACY

    # **pass_accuracy**
    total_passes = events[events['eventName'] == 'Pass'].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    total_passes.columns = total_passes.columns.droplevel(0)
    total_passes = total_passes.reset_index().rename(columns={'count':'total_passes'})

    #juntar feature com df
    dfh = dfh.merge(total_passes, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfh = dfh.rename(columns={"total_passes": "total_passes_T1"})

    dfh['pass_acc_T1'] = dfh['passes_T1'] / dfh['total_passes_T1']

    # **gk_accuracy**
    gk_att = events[(events['eventName'] == 'Save attempt')].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    gk_att.columns = gk_att.columns.droplevel(0)
    gk_att = gk_att.reset_index().rename(columns={'count':'gk_att'})

    #juntar feature com df
    dfh = dfh.merge(gk_att, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfh = dfh.rename(columns={"gk_att": "gk_att_T1"})

    dfh['gk_acc_T1'] = dfh['gk_saves_T1'] / dfh['gk_att_T1']

    # **shot_accuracy**
    dfh['shot_acc_T1'] = dfh['shots_on_target_T1'] / dfh['shots_T1']

    dfh = dfh.drop(['total_passes_T1', 'gk_att_T1'], axis=1)
    dfh = dfh.fillna(0)

    #WIN COLUMN
    #definindo vencedor e retirando empates - apenas no dfh (df home)
    dfh['winner'] = matches['winner']
    dfh = dfh.loc[dfh['winner'] != 0]
    dfh['winner'] = np.where((dfh['winner'] == dfh['team1_ID']), 1, 0)

    print("gerando csv")
    dfh.to_csv('analitics/traditional_%s_home.csv' %tournament, encoding='utf-8', index=False)


 -- HOME --

TOURNAMENT: World_Cup
gerando csv


In [13]:
dfh.shape

(55, 20)

In [20]:
print("-- AWAY --")
print("")
for tournament in tqdm(utils.TOURNAMENTS, total= len(utils.TOURNAMENTS)):   
    print("TOURNAMENT: %s" %tournament)

    events = pd.read_json('events\events_%s.json' %tournament)
    matches = pd.read_json('matches\matches_%s.json' %tournament)
    teams = pd.read_json('teams.json')

    #tira penalidades
    events = events[events['matchPeriod'] != 'P']

    # adaptando coluna 'tags' para conseguir interpretar se evento foi bem sucedido ou não. 'id' = 1801 ou 1802
    events['tags'] = events['tags'].map(utils.converting_tags)

    # -- AWAY --

    #fazer o mesmo do df das networks para esse aqui (criar df com features)
    dfa = matches[['wyId']].copy()
    dfa = dfa.rename(columns={'wyId':'matchID'})

    #colocar team1 e team2
    home_l = []
    away_l = []


    for match in matches['teamsData']:
        t0 = list(match.values())[0]
        t1 = list(match.values())[1]

        if t0['side'] == 'home':
            home_l.append(t0['teamId'])
            away_l.append(t1['teamId'])  
        else: 
            away_l.append(t0['teamId'])
            home_l.append(t1['teamId'])

    dfa['team1_ID'] = home_l
    dfa['team2_ID'] = away_l

    #colocar team1 e team2
    dfa['team1_ID'] = home_l
    dfa['team2_ID'] = away_l

    dfa['date'] = matches['date']

    dfa['tournament'] = tournament


    # GK_SAVES

    #contar quantos subEvents = 'Save attempt' com tag = 1801(bem sucedido) para todos os (match, team)
    gk_saves = events[(events['eventName'] == 'Save attempt') & (events['tags'].apply(lambda x: 1801 in x))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    gk_saves.columns = gk_saves.columns.droplevel(0)
    gk_saves = gk_saves.reset_index().rename(columns={'count':'gk_saves'})

    #juntar feature com df
    dfa = dfa.merge(gk_saves, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfa = dfa.rename(columns={"gk_saves": "gk_saves_T2"})


    #RED_CARDS

    #contar quantos subEvents = 'Foul' com tag = 1701(red card) ou 1703(second yellow card) para todos os (match, team)
    red_card = events[events['tags'].apply(lambda x: (1701 in x) or (1703 in x))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    red_card.columns = red_card.columns.droplevel(0)
    red_card = red_card.reset_index().rename(columns={'count':'red_card'})

    #juntar feature com df
    dfa = dfa.merge(red_card, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfa = dfa.rename(columns={"red_card": "red_card_T2"})


    #YELLOW_CARDS

    #contar quantos subEvents = 'Foul' com tag = 1702(yellow card) ou 1703(second yellow card) para todos os (match, team)
    yellow_card = events[events['tags'].apply(lambda x: (1702 in x) or (1703 in x))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    yellow_card.columns = yellow_card.columns.droplevel(0)
    yellow_card = yellow_card.reset_index().rename(columns={'count':'yellow_card'})

    #juntar feature com df
    dfa = dfa.merge(yellow_card, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfa = dfa.rename(columns={"yellow_card": "yellow_card_T2"})


    #ASSISTS

    assists = events[events['tags'].apply(lambda x: (301 in x))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    assists.columns = assists.columns.droplevel(0)
    assists = assists.reset_index().rename(columns={'count':'assists'})

    #juntar feature com df
    dfa = dfa.merge(assists, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfa = dfa.rename(columns={"assists": "assists_T2"})


    #FINALIZAÇÕES

    #contar quantos subEventName = 'Shot', 'Free kick shot' ou 'Penalty' para todos os (match, team)
    shots = events[events['subEventName'].isin(['Shot', 'Free kick shot', 'Penalty'])].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    shots.columns = shots.columns.droplevel(0)
    shots = shots.reset_index().rename(columns={'count':'shots'})

    #juntar feature com df
    dfa = dfa.merge(shots, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfa = dfa.merge(shots, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)

    dfa = dfa.rename(columns={"shots_x": "shots_T2", "shots_y": "shots_against_T2"})


    #CHUTES A GOL

    #contar quantos subEventName = 'Shot', 'Free kick shot' ou 'Penalty' com tag = 1801(accurate) para todos os (match, team)
    shots_on_target = events[(events['subEventName'].isin(['Shot', 'Free kick shot', 'Penalty'])) & (events['tags'].apply(lambda x: (1801 in x)))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    shots_on_target.columns = shots_on_target.columns.droplevel(0)
    shots_on_target = shots_on_target.reset_index().rename(columns={'count':'shots_on_target'})

    #juntar feature com df
    dfa = dfa.merge(shots_on_target, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfa = dfa.rename(columns={"shots_on_target": "shots_on_target_T2"})


    #QUANTIDADE DE PASSES

    passes = events[(events['eventName'] == 'Pass') & (events['tags'].apply(lambda x: (1801 in x)))].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    passes.columns = passes.columns.droplevel(0)
    passes = passes.reset_index().rename(columns={'count':'passes'})

    #juntar feature com df
    dfa = dfa.merge(passes, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfa = dfa.merge(passes, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)

    dfa = dfa.rename(columns={"passes_x": "passes_T2", "passes_y": "passes_T1"})


    #GOLS

    goals = events[(events['eventName'] != 'Save attempt') & (events['tags'].apply(lambda x: (101 in x)))].groupby(['matchId', 'teamId']).agg({'eventId':['count']}).copy()
    goals.columns = goals.columns.droplevel(0)
    goals = goals.reset_index().rename(columns={'count':'goals'})

    own_goals = events[(events['eventName'] != 'Save attempt') & (events['tags'].apply(lambda x: (102 in x)))].groupby(['matchId', 'teamId']).agg({'eventId':['count']}).copy()
    own_goals.columns = own_goals.columns.droplevel(0)
    own_goals = own_goals.reset_index().rename(columns={'count':'own_goals'})

    #juntar feature com df
    dfa = dfa.merge(goals, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfa = dfa.merge(goals, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)

    dfa = dfa.merge(own_goals, how='left', left_on=['matchID', 'team1_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfa = dfa.merge(own_goals, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)

    dfa = dfa.rename(columns={"goals_x": "goals_T1", "goals_y": "goals_T2"})
    dfa = dfa.rename(columns={"own_goals_x": "own_goals_T1", "own_goals_y": "own_goals_T2"})

    dfa = dfa.fillna(0)

    dfa['goals_T2'] = dfa['goals_T2'] + dfa['own_goals_T1']
    dfa['goals_against_T2'] = dfa['goals_T1'] + dfa['own_goals_T2']

    dfa = dfa.drop(['own_goals_T1', 'own_goals_T2', 'goals_T1'], axis=1)



    #POSSE DE BOLA

    #passes dados / total de passes na partida
    dfa['ball_possession_T2'] = dfa['passes_T2'] / (dfa['passes_T2'] + dfa['passes_T1'])
    dfa = dfa.drop(['passes_T1'], axis=1)


    #ACCURACY

    # **pass_accuracy**
    total_passes = events[events['eventName'] == 'Pass'].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    total_passes.columns = total_passes.columns.droplevel(0)
    total_passes = total_passes.reset_index().rename(columns={'count':'total_passes'})

    #juntar feature com df
    dfa = dfa.merge(total_passes, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfa = dfa.rename(columns={"total_passes": "total_passes_T2"})

    dfa['pass_acc_T2'] = dfa['passes_T2'] / dfa['total_passes_T2']

    # **gk_accuracy**
    gk_att = events[(events['eventName'] == 'Save attempt')].groupby(['matchId', 'teamId']).agg({'eventId':['count']})
    gk_att.columns = gk_att.columns.droplevel(0)
    gk_att = gk_att.reset_index().rename(columns={'count':'gk_att'})

    #juntar feature com df
    dfa = dfa.merge(gk_att, how='left', left_on=['matchID', 'team2_ID'], right_on=['matchId', 'teamId']).drop(['matchId', 'teamId'], axis=1)
    dfa = dfa.rename(columns={"gk_att": "gk_att_T2"})

    dfa['gk_acc_T2'] = dfa['gk_saves_T2'] / dfa['gk_att_T2']

    # **shot_accuracy**
    dfa['shot_acc_T2'] = dfa['shots_on_target_T2'] / dfa['shots_T2']

    dfa = dfa.drop(['total_passes_T2', 'gk_att_T2'], axis=1)
    dfa = dfa.fillna(0)

    dfa = dfa.loc[matches['winner'] != 0]

    print("gerando csv")
    dfa.to_csv('analitics/traditional_%s_away.csv' %tournament, encoding='utf-8', index=False)


-- AWAY --



  0%|          | 0/7 [00:00<?, ?it/s]

TOURNAMENT: Italy
gerando csv
TOURNAMENT: England
gerando csv
TOURNAMENT: Germany
gerando csv
TOURNAMENT: France
gerando csv
TOURNAMENT: Spain
gerando csv
TOURNAMENT: European_Championship
gerando csv
TOURNAMENT: World_Cup
gerando csv


In [6]:
dfa

Unnamed: 0,matchID,team1_ID,team2_ID,date,tournament,gk_saves_T2,red_card_T2,yellow_card_T2,assists_T2,shots_T2,shots_against_T2,shots_on_target_T2,passes_T2,goals_T2,goals_against_T2,ball_possession_T2,pass_acc_T2,gk_acc_T2,shot_acc_T2
0,2058017,4418,9598,2018-07-15 17:00:00-02:00,World_Cup,3.0,0.0,1.0,1.0,14,8,3.0,429,2.0,4.0,0.697561,0.828185,0.428571,0.214286
1,2058016,5629,2413,2018-07-14 16:00:00-02:00,World_Cup,2.0,0.0,2.0,0.0,14,11,5.0,593,0.0,2.0,0.581373,0.906728,0.5,0.357143
2,2058015,9598,2413,2018-07-11 20:00:00-02:00,World_Cup,5.0,0.0,1.0,0.0,11,22,2.0,370,1.0,2.0,0.440476,0.825893,0.714286,0.181818
3,2058014,4418,5629,2018-07-10 20:00:00-02:00,World_Cup,4.0,0.0,3.0,0.0,9,18,2.0,535,0.0,1.0,0.654835,0.905245,0.8,0.222222
4,2058012,14358,9598,2018-07-07 20:00:00-02:00,World_Cup,5.0,0.0,4.0,2.0,17,11,3.0,594,2.0,2.0,0.684332,0.830769,0.714286,0.176471
5,2058013,7047,2413,2018-07-07 16:00:00-02:00,World_Cup,3.0,0.0,1.0,2.0,11,7,2.0,402,2.0,0.0,0.589443,0.828866,1.0,0.181818
6,2058011,6380,5629,2018-07-06 20:00:00-02:00,World_Cup,8.0,0.0,2.0,1.0,8,25,3.0,304,2.0,1.0,0.410256,0.873563,0.888889,0.375
7,2058010,15670,4418,2018-07-06 16:00:00-02:00,World_Cup,3.0,0.0,2.0,2.0,9,9,2.0,417,2.0,0.0,0.647516,0.817647,1.0,0.222222
8,2058009,12430,2413,2018-07-03 20:00:00-02:00,World_Cup,3.0,0.0,2.0,0.0,15,13,2.0,457,1.0,1.0,0.529548,0.863894,0.75,0.133333
9,2058008,7047,6697,2018-07-03 16:00:00-02:00,World_Cup,2.0,1.0,2.0,0.0,17,12,4.0,476,0.0,1.0,0.712575,0.853047,0.666667,0.235294


In [19]:
dfh.shape

(55, 20)

In [5]:
matches.shape


(64, 15)