### Código para la obtención y limpieza de los datos de StatsBoom 

In [190]:
from statsbombpy import sb
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [191]:
def eventsTeam(country,division,season,gender,team):
    """
    Algoritmo que obtiene todos los eventos de todos los partidos del equipo del argumento 'team' y en la 
    competencia específicada por los demás parámetros.
    """
    competitionInCountry = sb.competitions()[sb.competitions()['country_name'] == country]
    competitionGender = competitionInCountry[competitionInCountry['competition_gender'] == gender]
    competitionDivision = competitionGender[competitionGender['competition_name'] == division]
    competition = competitionDivision[competitionDivision['season_name'] == season]

    competition_id = competition['competition_id'].values[0]
    season_id = competition['season_id'].values[0]

    all_matches = sb.matches(competition_id= competition_id, season_id=season_id)
    team_matches = all_matches[(all_matches['home_team'] == team) | (all_matches['away_team'] == team)]

    id_matches = list(team_matches.match_id.unique())
    events = []
    for match in id_matches:
        events.append(sb.events(match_id=match))
    events = pd.concat(events)

    return events

In [276]:
def cleanData(team,events):
    """
    Algoritmo que recibe un Data Frame de eventos y lo convierte en un Data Frame de las métricas que nos interesan para el análisis.
    """
    # Lista donde se agregan los valores de las métricas
    metricas = [team]
    # Nombre de las columnas asociadas a las métricas
    columns=['team','%posesion de balón','%posesion en campo rival','tiros','Tiros al arco','Goles','Goles esperados','pases_totales',
             'Centros','pases_bajo_presion', '%p_exitosos_totales', '%p_exitosos_bajo_presion',
             '%p_exitosos_sin_presion','Recupeaciones de Balón','recuperaciones']
    
    # Posesión promedio del equipo
    eventos_con_tiempos = events[events['type'].isin(['Pass', 'Carry', 'Pressure', 'Duel', 'Shot', '50/50'])]
    tiempo_total = eventos_con_tiempos['duration'].sum()
    posesion = eventos_con_tiempos[eventos_con_tiempos['possession_team']==team]
    porcentaje_posesion = posesion['duration'].sum()*100/tiempo_total
    metricas.append(porcentaje_posesion)

    # Posesión promedio del equipo en campo rival
    posesion_campo_rival = posesion[posesion['location'].notna()]
    posesion_campo_rival['location_x_greater_than_60'] = posesion_campo_rival['location'].apply(lambda loc: loc[0] > 60 if isinstance(loc, list) else False)
    posesion_campo_rival = posesion_campo_rival[posesion_campo_rival['location_x_greater_than_60']==True]
    porcentaje_posesion_cam_rival = (posesion_campo_rival['duration'].sum()*100/tiempo_total)
    metricas.append(porcentaje_posesion_cam_rival)

    # Tiros
    tiros = events[(events['type']=='Shot') & (events['team']==team)]
    metricas.append(tiros.shape[0])

    # Tiros al arco
    tiros_arco = tiros[tiros['shot_outcome'].isin(['Goal','Saved','Saved to Post','Post'])]
    metricas.append(tiros_arco.shape[0])

    # Goles
    goles = tiros_arco[tiros_arco['shot_outcome'] == 'Goal']
    metricas.append(goles.shape[0])

    # Goles esperados
    xg_statsbomb = tiros['shot_statsbomb_xg'].sum()
    metricas.append(xg_statsbomb)

    # Mapeo de valores para su fácil filtración
    events['successful_pass'] = events['pass_outcome'].isna()
    events['under_pressure'] = events['under_pressure'].replace({np.nan: False})

    # Pases totales
    pases_tot = events[(events['type'] == 'Pass') & (events['team']==team)]
    metricas.append(pases_tot.shape[0])

    # Centros 
    centros = pases_tot[~pases_tot['pass_cross'].isna()].shape[0]
    metricas.append(centros)

    # Pases bajo presión
    total_bajo_presion = pases_tot[pases_tot['under_pressure'] == True]
    metricas.append(total_bajo_presion.shape[0])

    # Otros tipos de pases para calcular los porcentajes
    pases_exitosos_totales = pases_tot[pases_tot['successful_pass'] == True]
    exitos_bajo_presion = total_bajo_presion[total_bajo_presion['successful_pass'] == True]
    total_sin_presion = pases_tot[pases_tot['under_pressure'] == False]
    exitos_sin_presion = total_sin_presion[total_sin_presion['successful_pass'] == True]

    # Porcentajes relevantes de los pases.
    ppe_tot = round(100*pases_exitosos_totales.shape[0] / pases_tot.shape[0], 2)
    metricas.append(ppe_tot)
    ppe_bajo_presion = round(100*exitos_bajo_presion.shape[0] / total_bajo_presion.shape[0], 2)
    metricas.append(ppe_bajo_presion)
    ppe_sin_presion = round(100*exitos_sin_presion.shape[0] / total_sin_presion.shape[0], 2)
    metricas.append(ppe_sin_presion)

    # Offsides
    pass_offsides = pases_tot[pases_tot['pass_outcome']=='Pass Offside']
    other_offsides = events[events['type']=='Offside']
    metricas.append(pass_offsides.shape[0]+other_offsides.shape[0])

    # Recuperaciones de balón
    recuperaciones = events[(events['type']=='Ball Recovery') & (events['ball_recovery_recovery_failure'].isna())
                            & (events['team']==team)].shape[0]
    metricas.append(recuperaciones)

    return pd.DataFrame([metricas], columns=columns)


In [None]:
team = 'Manchester City'
events = eventsTeam('England','Premier League','2015/2016','male',team)

In [277]:
data_man_city = cleanData(team,events)

In [278]:
data_man_city

Unnamed: 0,team,%posesion de balón,%posesion en campo rival,tiros,Tiros al arco,Goles,Goles esperados,pases_totales,Centros,pases_bajo_presion,%p_exitosos_totales,%p_exitosos_bajo_presion,%p_exitosos_sin_presion,Recupeaciones de Balón,recuperaciones
0,Manchester City,56.76136,29.702952,614,218,71,64.555055,22340,540,3251,81.32,70.59,83.15,69,1974
