### Código para la obtención y limpieza de los datos de StatsBoom 

In [1]:
from statsbombpy import sb
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

In [216]:
def cleanData(team,events,comp,season):
    """
    Algoritmo que recibe un Data Frame de eventos y lo convierte en un Data Frame de las métricas que nos interesan para el análisis.
    """
    # Lista donde se agregan los valores de las métricas
    metricas = [team,comp,season]
    # Nombre de las columnas asociadas a las métricas
    columns=['Equipo','Competición','Temporada','Partidos','%Posesion de balon','%Posesion en campo rival','Tiros','Tiros al arco','Goles','Goles esperados','Pases totales',
             'Centros','Pases bajo presion', '%Pases exitosos totales', '%Pases exitosos bajo presion',
             '%Pases exitosos sin presion','Offsides','Recuperaciones de balon',
             'tiempo_presion','tiempo_presion_ofensiva','tiempo_presion_defensiva',
             'bloqueos','despejes','duelos_ganados','faltas_cometidas','#se_llevan_a_un_defensa','intercepciones']

    # Partidos
    partidos=events['match_id'].unique()
    num_partidos=len(partidos)
    metricas.append(num_partidos)

    # Posesión promedio del equipo
    eventos_con_tiempos = events[events['type'].isin(['Pass', 'Carry', 'Duel', 'Shot', '50/50'])]
    tiempo_total = eventos_con_tiempos['duration'].sum()
    posesion = eventos_con_tiempos[eventos_con_tiempos['possession_team']==team]
    porcentaje_posesion = posesion['duration'].sum()*100/tiempo_total
    metricas.append(porcentaje_posesion)

    # Posesión promedio del equipo en campo rival
    posesion_campo_rival = posesion[posesion['location'].notna()]
    posesion_campo_rival['location_x_greater_than_60'] = posesion_campo_rival['location'].apply(lambda loc: loc[0] > 60 if isinstance(loc, list) else False)
    posesion_campo_rival = posesion_campo_rival[posesion_campo_rival['location_x_greater_than_60']==True]
    porcentaje_posesion_cam_rival = (posesion_campo_rival['duration'].sum()*100/tiempo_total)
    metricas.append(porcentaje_posesion_cam_rival)

    # Tiros
    tiros = events[(events['type']=='Shot') & (events['team']==team) & ~(events['shot_outcome']=='Wayward')]
    metricas.append(tiros.shape[0]/num_partidos)

    # Tiros al arco
    tiros_arco = tiros[tiros['shot_outcome'].isin(['Goal','Saved','Saved to Post','Post'])]
    if 'shot_deflected' in tiros_arco.columns:
        tiros_arco = tiros_arco[tiros_arco['shot_deflected'].isna()]

    metricas.append(tiros_arco.shape[0]/num_partidos)

    # Goles
    goles = tiros_arco[tiros_arco['shot_outcome'] == 'Goal']
    metricas.append(goles.shape[0]/num_partidos)

    # Goles esperados
    xg_statsbomb = tiros['shot_statsbomb_xg'].sum()
    metricas.append(xg_statsbomb/num_partidos)

    # Mapeo de valores para su fácil filtración
    events['successful_pass'] = events['pass_outcome'].isna()
    events['under_pressure'] = events['under_pressure'].replace({np.nan: False})

    # Pases totales
    pases_tot = events[(events['type'] == 'Pass') & (events['team']==team) & ~(events['pass_outcome'].isin(['Unknown','Injury Clearance','Pass Offside']))]
    metricas.append((pases_tot.shape[0])/num_partidos)

    # Centros 
    centros = pases_tot[~pases_tot['pass_cross'].isna()].shape[0]
    metricas.append(centros/num_partidos)

    # Pases bajo presión
    total_bajo_presion = pases_tot[pases_tot['under_pressure'] == True]
    metricas.append(total_bajo_presion.shape[0]/num_partidos)

    # Otros tipos de pases para calcular los porcentajes
    pases_exitosos_totales = pases_tot[pases_tot['successful_pass'] == True]
    exitos_bajo_presion = total_bajo_presion[total_bajo_presion['successful_pass'] == True]
    total_sin_presion = pases_tot[pases_tot['under_pressure'] == False]
    exitos_sin_presion = total_sin_presion[total_sin_presion['successful_pass'] == True]

    # Porcentajes relevantes de los pases.
    ppe_tot = round(100*pases_exitosos_totales.shape[0] / pases_tot.shape[0], 2)
    metricas.append(ppe_tot)
    ppe_bajo_presion = round(100*exitos_bajo_presion.shape[0] / total_bajo_presion.shape[0], 2)
    metricas.append(ppe_bajo_presion)
    ppe_sin_presion = round(100*exitos_sin_presion.shape[0] / total_sin_presion.shape[0], 2)
    metricas.append(ppe_sin_presion)

    # Offsides
    pass_offsides = pases_tot[pases_tot['pass_outcome']=='Pass Offside']
    other_offsides = events[events['type']=='Offside']
    metricas.append((pass_offsides.shape[0]+other_offsides.shape[0]) / num_partidos)

    # Recuperaciones de balón
    recuperaciones = events[(events['type']=='Ball Recovery') & (events['ball_recovery_recovery_failure'].isna())
                            & (events['team']==team)].shape[0]
    metricas.append(recuperaciones/num_partidos)

    # Presión por partido
    # Puede ser mayor a 90 ya que a la vez puede haber varios jugadores presionando
    presion_total =events[(events['type']=='Pressure') & (events['team'] == team)]
    presion_total[['x','y']]=presion_total['location'].apply(pd.Series)
    presion_ofensiva=presion_total[(presion_total['x']>=60)]
    presion_defensiva=presion_total[(presion_total['x']<60)]
    tiempo_presion=presion_total['duration'].sum()
    tiempo_presion_ofensiva=presion_ofensiva['duration'].sum()
    tiempo_presion_defensiva=presion_defensiva['duration'].sum()
    metricas.append(tiempo_presion/num_partidos)
    metricas.append(tiempo_presion_ofensiva/num_partidos)
    metricas.append(tiempo_presion_defensiva/num_partidos)

    # Bloqueos
    bloqueos = events[(events['type']=='Block') & (events['team']==team)]
    metricas.append(bloqueos.shape[0]/num_partidos)

    #Despejes
    despejes=events[(events['type']=='Clearance') & (events['team']==team)]
    metricas.append(despejes.shape[0]/num_partidos)

    #Duelos ganados
    duelos=events[(events['type']=='Duel') & (events['team']==team)]
    duelos=duelos[duelos['duel_outcome'].isin(['Won','Lost Out','Success','Succes In Play','Succes Out'])]
    metricas.append(duelos.shape[0]/num_partidos)

    # Faltas
    faltas = events[(events['type']=='Foul Committed') & (events['team']==team)]
    metricas.append(faltas.shape[0]/num_partidos)

    # Veces que un jugador contrario se lleva a un defensor
    llevadas=events[(events['type']=='Dribbled Past') & (events['team']==team)]
    metricas.append(llevadas.shape[0]/num_partidos)

    # Intercepciones
    inter=events[(events['type']=='Interception') & (events['team']==team)]
    inter= inter[(inter['interception_outcome'].isin(['Won','Lost Out','Success','Succes In Play','Succes Out']))]
    metricas.append(inter.shape[0]/num_partidos)

    return pd.DataFrame([metricas], columns=columns)


In [227]:
def obtener_of_evento(competition_id, season_id):
    competitions = sb.competitions()
    comp = competitions[competitions['competition_id']==competition_id]['competition_name'].unique()[0]
    season = competitions[competitions['season_id']==season_id]['season_name'].unique()[0]
    all_matches= sb.matches(competition_id=competition_id, season_id=season_id)
    teams=(all_matches['home_team'].unique())
    team_events = {}
    for team in teams:
        team_events[team] = []
    df = None
    for index, match in all_matches.iterrows():
        team1 = match['home_team']
        team2 = match['away_team']
        id = match.match_id
        match_events = sb.events(match_id=id)
        team_events[team1].append(match_events)
        team_events[team2].append(match_events)
    for team, events in team_events.items():
        events = pd.concat(events, axis=0, ignore_index=True)
        aux = cleanData(team, events,comp,season)
        df = pd.concat([df,aux])
    return df

In [213]:
competitions = [(comp['competition_id'],comp['season_id']) for index,comp in sb.competitions().iterrows()]

In [None]:
warnings.filterwarnings("ignore", category=UserWarning, module="statsbombpy.api_client")
datos=obtener_of_evento(competition_id=55,season_id=282)

In [229]:
datos

Unnamed: 0,Equipo,Competición,Temporada,Partidos,%Posesion de balon,%Posesion en campo rival,Tiros,Tiros al arco,Goles,Goles esperados,Pases totales,Centros,Pases bajo presion,%Pases exitosos totales,%Pases exitosos bajo presion,%Pases exitosos sin presion,Offsides,Recuperaciones de balon,tiempo_presion,tiempo_presion_ofensiva,tiempo_presion_defensiva,bloqueos,despejes,duelos_ganados,faltas_cometidas,#se_llevan_a_un_defensa,intercepciones
0,Netherlands,UEFA Euro,2024,6,52.053381,23.459117,12.666667,3.833333,1.333333,1.096076,515.333333,11.0,55.666667,86.64,72.75,88.32,0.5,32.166667,95.307904,39.449232,55.858673,15.166667,15.666667,6.0,12.333333,3.833333,5.166667
0,Spain,UEFA Euro,2024,7,59.632114,30.032781,16.428571,5.714286,1.714286,1.360641,614.857143,12.428571,87.714286,88.48,79.64,89.95,0.428571,41.857143,132.221201,80.294861,51.92634,19.0,16.285714,5.142857,14.714286,7.142857,4.0
0,Portugal,UEFA Euro,2024,5,67.485387,34.662469,18.2,6.6,1.8,2.847763,769.6,20.0,80.6,88.38,76.67,89.75,0.2,51.6,106.289988,56.413268,49.87672,20.6,17.2,7.4,8.8,8.2,4.0
0,Denmark,UEFA Euro,2024,4,51.233354,22.240022,12.5,4.0,0.5,0.934192,584.75,12.0,76.25,85.21,73.77,86.92,0.5,37.5,109.127401,59.892019,49.235383,13.75,14.75,5.0,13.75,5.5,3.75
0,England,UEFA Euro,2024,7,56.790116,25.780906,11.285714,4.142857,1.714286,1.493675,642.285714,12.428571,81.857143,87.61,77.66,89.06,0.142857,39.285714,91.436024,44.485765,46.950258,18.571429,14.0,5.142857,10.142857,3.571429,3.142857
0,Ukraine,UEFA Euro,2024,3,51.049375,19.765298,13.0,4.0,0.666667,0.888282,492.333333,9.0,46.666667,85.38,70.71,86.91,0.333333,33.0,116.594733,50.292766,66.301966,17.333333,14.0,7.333333,12.0,11.333333,3.666667
0,Czech Republic,UEFA Euro,2024,3,39.664411,18.421262,12.666667,5.666667,1.0,1.410359,366.0,17.333333,63.0,75.32,60.85,78.33,0.0,41.333333,118.966821,50.449702,68.51712,20.666667,20.0,6.666667,15.666667,10.0,5.333333
0,Austria,UEFA Euro,2024,4,54.33399,25.74355,11.75,5.0,1.25,1.483113,513.25,14.5,89.25,82.12,71.15,84.43,0.0,45.25,144.570004,73.420713,71.149292,25.5,13.25,7.0,17.0,9.75,5.25
0,Romania,UEFA Euro,2024,4,40.094516,15.204282,9.25,4.0,1.0,0.933056,333.0,7.5,40.0,74.55,59.38,76.62,0.5,35.5,115.035119,41.412458,73.622662,17.75,23.75,5.0,12.25,7.25,6.0
0,France,UEFA Euro,2024,6,49.383287,25.546411,15.5,4.833333,1.166667,1.811011,572.666667,16.333333,79.0,88.24,76.16,90.18,0.5,33.0,117.716547,57.841161,59.875386,18.833333,17.0,7.5,12.5,4.666667,4.166667
