### Código para la obtención y limpieza de los datos de StatsBoom 

In [1]:
from statsbombpy import sb
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
def eventsTeam(country,division,season,gender,team):
    """
    Algoritmo que obtiene todos los eventos de todos los partidos del equipo del argumento 'team' y en la 
    competencia específicada por los demás parámetros.
    """
    competitionInCountry = sb.competitions()[sb.competitions()['country_name'] == country]
    competitionGender = competitionInCountry[competitionInCountry['competition_gender'] == gender]
    competitionDivision = competitionGender[competitionGender['competition_name'] == division]
    competition = competitionDivision[competitionDivision['season_name'] == season]

    competition_id = competition['competition_id'].values[0]
    season_id = competition['season_id'].values[0]

    all_matches = sb.matches(competition_id= competition_id, season_id=season_id)
    team_matches = all_matches[(all_matches['home_team'] == team) | (all_matches['away_team'] == team)]

    id_matches = list(team_matches.match_id.unique())
    events = []
    for match in id_matches:
        events.append(sb.events(match_id=match))
    events = pd.concat(events)

    return events

In [3]:
def cleanData(team,events):
    """
    Algoritmo que recibe un Data Frame de eventos y lo convierte en un Data Frame de las métricas que nos interesan para el análisis.
    """
    # Lista donde se agregan los valores de las métricas
    metricas = [team]
    # Nombre de las columnas asociadas a las métricas
    columns=['team','%posesion de balón','%posesion en campo rival','tiros','Tiros al arco','Goles','Goles esperados','pases_totales',
             'Centros','pases_bajo_presion', '%p_exitosos_totales', '%p_exitosos_bajo_presion',
             '%p_exitosos_sin_presion','Recupeaciones de Balón','recuperaciones']
    
    # Posesión promedio del equipo
    eventos_con_tiempos = events[events['type'].isin(['Pass', 'Carry', 'Pressure', 'Duel', 'Shot', '50/50'])]
    tiempo_total = eventos_con_tiempos['duration'].sum()
    posesion = eventos_con_tiempos[eventos_con_tiempos['possession_team']==team]
    porcentaje_posesion = posesion['duration'].sum()*100/tiempo_total
    metricas.append(porcentaje_posesion)

    # Posesión promedio del equipo en campo rival
    posesion_campo_rival = posesion[posesion['location'].notna()]
    posesion_campo_rival['location_x_greater_than_60'] = posesion_campo_rival['location'].apply(lambda loc: loc[0] > 60 if isinstance(loc, list) else False)
    posesion_campo_rival = posesion_campo_rival[posesion_campo_rival['location_x_greater_than_60']==True]
    porcentaje_posesion_cam_rival = (posesion_campo_rival['duration'].sum()*100/tiempo_total)
    metricas.append(porcentaje_posesion_cam_rival)

    # Tiros
    tiros = events[(events['type']=='Shot') & (events['team']==team)]
    metricas.append(tiros.shape[0])

    # Tiros al arco
    tiros_arco = tiros[tiros['shot_outcome'].isin(['Goal','Saved','Saved to Post','Post'])]
    metricas.append(tiros_arco.shape[0])

    # Goles
    goles = tiros_arco[tiros_arco['shot_outcome'] == 'Goal']
    metricas.append(goles.shape[0])

    # Goles esperados
    xg_statsbomb = tiros['shot_statsbomb_xg'].sum()
    metricas.append(xg_statsbomb)

    # Mapeo de valores para su fácil filtración
    events['successful_pass'] = events['pass_outcome'].isna()
    events['under_pressure'] = events['under_pressure'].replace({np.nan: False})

    # Pases totales
    pases_tot = events[(events['type'] == 'Pass') & (events['team']==team)]
    metricas.append(pases_tot.shape[0])

    # Centros 
    centros = pases_tot[~pases_tot['pass_cross'].isna()].shape[0]
    metricas.append(centros)

    # Pases bajo presión
    total_bajo_presion = pases_tot[pases_tot['under_pressure'] == True]
    metricas.append(total_bajo_presion.shape[0])

    # Otros tipos de pases para calcular los porcentajes
    pases_exitosos_totales = pases_tot[pases_tot['successful_pass'] == True]
    exitos_bajo_presion = total_bajo_presion[total_bajo_presion['successful_pass'] == True]
    total_sin_presion = pases_tot[pases_tot['under_pressure'] == False]
    exitos_sin_presion = total_sin_presion[total_sin_presion['successful_pass'] == True]

    # Porcentajes relevantes de los pases.
    ppe_tot = round(100*pases_exitosos_totales.shape[0] / pases_tot.shape[0], 2)
    metricas.append(ppe_tot)
    ppe_bajo_presion = round(100*exitos_bajo_presion.shape[0] / total_bajo_presion.shape[0], 2)
    metricas.append(ppe_bajo_presion)
    ppe_sin_presion = round(100*exitos_sin_presion.shape[0] / total_sin_presion.shape[0], 2)
    metricas.append(ppe_sin_presion)

    # Offsides
    pass_offsides = pases_tot[pases_tot['pass_outcome']=='Pass Offside']
    other_offsides = events[events['type']=='Offside']
    metricas.append(pass_offsides.shape[0]+other_offsides.shape[0])

    # Recuperaciones de balón
    recuperaciones = events[(events['type']=='Ball Recovery') & (events['ball_recovery_recovery_failure'].isna())
                            & (events['team']==team)].shape[0]
    metricas.append(recuperaciones)

    return pd.DataFrame([metricas], columns=columns)


In [4]:
team = 'Manchester City'
events = eventsTeam('England','Premier League','2015/2016','male',team)



In [5]:
data_man_city = cleanData(team,events)

  events['successful_pass'] = events['pass_outcome'].isna()
  events['under_pressure'] = events['under_pressure'].replace({np.nan: False})


In [6]:
data_man_city

Unnamed: 0,team,%posesion de balón,%posesion en campo rival,tiros,Tiros al arco,Goles,Goles esperados,pases_totales,Centros,pases_bajo_presion,%p_exitosos_totales,%p_exitosos_bajo_presion,%p_exitosos_sin_presion,Recupeaciones de Balón,recuperaciones
0,Manchester City,56.76136,29.702952,614,218,71,64.555055,22340,540,3251,81.32,70.59,83.15,69,1974


LO DE EMIL

In [16]:
def defensiveData(team,events,df=None):
  """
    Algoritmo que obtiene todos los datos defensivos sobre un equipo
    Todos los datos son promediados entre todos los partidos
  """
  dataFrame = pd.DataFrame()
  metricas = [team]

  columns=['team','partidos','tiempo_presion','tiempo_presion_ofensiva','tiempo_presion_defensiva',
           'bloqueos','despejes','duelos_ganados','faltas_cometidas','#se_llevan_a_un_defensa','intersepciones']

  # Partidos
  partidos=events['match_id'].unique()
  num_partidos=len(partidos)
  metricas.append(num_partidos)

  # Presión por partido
  # Puede ser mayor a 90 ya que a la vez puede haber varios jugadores presionando
  presion_total =events[(events['type']=='Pressure') & (events['possession_team'] == team)]
  presion_total[['x','y']]=presion_total['location'].apply(pd.Series)
  presion_ofensiva=presion_total[(presion_total['x']>=60)]
  presion_defensiva=presion_total[(presion_total['x']<60)]
  tiempo_presion=presion_total['duration'].sum()
  tiempo_presion_ofensiva=presion_ofensiva['duration'].sum()
  tiempo_presion_defensiva=presion_defensiva['duration'].sum()
  metricas.append(tiempo_presion/num_partidos)
  metricas.append(tiempo_presion_ofensiva/num_partidos)
  metricas.append(tiempo_presion_defensiva/num_partidos)

  # Bloqueos
  bloqueos = events[(events['type']=='Block') & (events['team']==team)]
  metricas.append(bloqueos.shape[0]/num_partidos)

  #Despejes
  despejes=events[(events['type']=='Clearance') & (events['team']==team)]
  metricas.append(bloqueos.shape[0]/num_partidos)

  #Duelos ganados
  duelos=events[(events['type']=='Duel') & (events['team']==team)]
  duelos=duelos[duelos['duel_outcome'].isin(['Won','Lost Out','Success','Succes In Play','Succes Out'])]
  metricas.append(duelos.shape[0]/num_partidos)

  # Faltas
  faltas = events[(events['type']=='Foul Committed') & (events['team']==team)]
  metricas.append(faltas.shape[0]/num_partidos)

  # Veces que un jugador contrario se lleva a un defensor
  llevadas=events[(events['type']=='Dribbled Past') & (events['team']==team)]
  metricas.append(llevadas.shape[0]/num_partidos)

  # Intercepciones
  inter=events[(events['type']=='Interception') & (events['team']==team)]
  inter= inter[(inter['interception_outcome'].isin(['Won','Lost Out','Success','Succes In Play','Succes Out']))]
  metricas.append(inter.shape[0]/num_partidos)

  if df is None:
    return pd.DataFrame([metricas], columns=columns)

  return pd.concat([df,pd.DataFrame([metricas], columns=columns)])


In [17]:
def obtener_def_evento(competition_id=55, season_id=282):

  #Sacamos los equipos de la competición
  matches=sb.matches(competition_id=competition_id, season_id=season_id)
  teams=(matches['home_team'].unique())

  #Creamos nuestro DataFrame
  columns=['team','partidos','tiempo_presion','tiempo_presion_ofensiva','tiempo_presion_defensiva',
           'bloqueos','despejes','duelos_ganados','faltas_cometidas','#se_llevan_a_un_defensa','intersepciones']
  df=pd.DataFrame(columns=columns)

  #Mandamos a llamar la función que calcula las estadísticas defensivas y las vamos uniendo
  for team in teams:
    all_matches = sb.matches(competition_id= competition_id, season_id=season_id)
    team_matches = all_matches[(all_matches['home_team'] == team) | (all_matches['away_team'] == team)]

    id_matches = list(team_matches.match_id.unique())
    events = []
    for match in id_matches:
        events.append(sb.events(match_id=match))
    events = pd.concat(events)
    df=defensiveData(team,events,df)
  return df

In [18]:
defensivas=obtener_def_evento()
defensivas

  presion_total[['x','y']]=presion_total['location'].apply(pd.Series)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  presion_total[['x','y']]=presion_total['location'].apply(pd.Series)
  presion_total[['x','y']]=presion_total['location'].apply(pd.Series)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  presion_total[['x','y']]=presion_total['location'].apply(pd.Series)
  return pd.concat([df,pd.DataFrame([metricas], columns=columns)])
  presion_total[['x','y']]=presion_total['location'].apply(pd.Series)
A value is trying to be set on a copy of a slice from a D

Unnamed: 0,team,partidos,tiempo_presion,tiempo_presion_ofensiva,tiempo_presion_defensiva,bloqueos,despejes,duelos_ganados,faltas_cometidas,#se_llevan_a_un_defensa,intersepciones
0,Netherlands,6,108.205366,40.218287,67.987079,15.166667,15.166667,6.0,12.333333,3.833333,5.166667
0,Spain,7,151.656203,63.984414,87.671789,19.0,19.0,5.142857,14.714286,7.142857,4.0
0,Portugal,5,122.101417,51.386232,70.715185,20.6,20.6,7.4,8.8,8.2,4.0
0,Denmark,4,102.497693,45.472968,57.024725,13.75,13.75,5.0,13.75,5.5,3.75
0,England,7,138.641497,58.79598,79.845517,18.571429,18.571429,5.142857,10.142857,3.571429,3.142857
0,Ukraine,3,70.724676,29.1846,41.540077,17.333333,17.333333,7.333333,12.0,11.333333,3.666667
0,Czech Republic,3,93.500797,41.468378,52.032418,20.666667,20.666667,6.666667,15.666667,10.0,5.333333
0,Austria,4,121.254593,54.548182,66.706411,25.5,25.5,7.0,17.0,9.75,5.25
0,Romania,4,76.820652,36.148277,40.672375,17.75,17.75,5.0,12.25,7.25,6.0
0,France,6,125.518143,50.745692,74.772451,18.833333,18.833333,7.5,12.5,4.666667,4.166667


In [19]:
defensivas

Unnamed: 0,team,partidos,tiempo_presion,tiempo_presion_ofensiva,tiempo_presion_defensiva,bloqueos,despejes,duelos_ganados,faltas_cometidas,#se_llevan_a_un_defensa,intersepciones
0,Netherlands,6,108.205366,40.218287,67.987079,15.166667,15.166667,6.0,12.333333,3.833333,5.166667
0,Spain,7,151.656203,63.984414,87.671789,19.0,19.0,5.142857,14.714286,7.142857,4.0
0,Portugal,5,122.101417,51.386232,70.715185,20.6,20.6,7.4,8.8,8.2,4.0
0,Denmark,4,102.497693,45.472968,57.024725,13.75,13.75,5.0,13.75,5.5,3.75
0,England,7,138.641497,58.79598,79.845517,18.571429,18.571429,5.142857,10.142857,3.571429,3.142857
0,Ukraine,3,70.724676,29.1846,41.540077,17.333333,17.333333,7.333333,12.0,11.333333,3.666667
0,Czech Republic,3,93.500797,41.468378,52.032418,20.666667,20.666667,6.666667,15.666667,10.0,5.333333
0,Austria,4,121.254593,54.548182,66.706411,25.5,25.5,7.0,17.0,9.75,5.25
0,Romania,4,76.820652,36.148277,40.672375,17.75,17.75,5.0,12.25,7.25,6.0
0,France,6,125.518143,50.745692,74.772451,18.833333,18.833333,7.5,12.5,4.666667,4.166667


In [20]:
def obtener_of_evento(competition_id=55, season_id=282):
  #Sacamos los equipos de la competición
  matches=sb.matches(competition_id=competition_id, season_id=season_id)
  teams=(matches['home_team'].unique())

  #Creamos nuestro DataFrame
  columns=['team','%posesion de balón','%posesion en campo rival','tiros','Tiros al arco','Goles','Goles esperados','pases_totales',
             'Centros','pases_bajo_presion', '%p_exitosos_totales', '%p_exitosos_bajo_presion',
             '%p_exitosos_sin_presion','Recupeaciones de Balón','recuperaciones']
  df=pd.DataFrame(columns=columns)

  #Mandamos a llamar la función que calcula las estadísticas ofensivas y las vamos uniendo
  for team in teams:
    all_matches = sb.matches(competition_id= competition_id, season_id=season_id)
    team_matches = all_matches[(all_matches['home_team'] == team) | (all_matches['away_team'] == team)]

    id_matches = list(team_matches.match_id.unique())
    events = []
    for match in id_matches:
        events.append(sb.events(match_id=match))
    events = pd.concat(events)
    aux=cleanData(team,events)
    df=pd.concat([df,aux])
  return df

In [21]:
datos=obtener_of_evento()
datos

  events['successful_pass'] = events['pass_outcome'].isna()
  events['under_pressure'] = events['under_pressure'].replace({np.nan: False})
  df=pd.concat([df,aux])
  events['successful_pass'] = events['pass_outcome'].isna()
  events['under_pressure'] = events['under_pressure'].replace({np.nan: False})
  events['successful_pass'] = events['pass_outcome'].isna()
  events['under_pressure'] = events['under_pressure'].replace({np.nan: False})
  events['under_pressure'] = events['under_pressure'].replace({np.nan: False})
  events['successful_pass'] = events['pass_outcome'].isna()
  events['under_pressure'] = events['under_pressure'].replace({np.nan: False})
  events['under_pressure'] = events['under_pressure'].replace({np.nan: False})
  events['under_pressure'] = events['under_pressure'].replace({np.nan: False})
  events['under_pressure'] = events['under_pressure'].replace({np.nan: False})
  events['under_pressure'] = events['under_pressure'].replace({np.nan: False})
  events['under_pressure

Unnamed: 0,team,%posesion de balón,%posesion en campo rival,tiros,Tiros al arco,Goles,Goles esperados,pases_totales,Centros,pases_bajo_presion,%p_exitosos_totales,%p_exitosos_bajo_presion,%p_exitosos_sin_presion,Recupeaciones de Balón,recuperaciones
0,Netherlands,52.097149,23.215227,83,26,9,7.318485,3117,66,338,85.95,71.89,87.66,14,193
0,Spain,59.227921,29.465086,123,44,14,10.57447,4335,89,620,87.84,78.87,89.34,12,293
0,Portugal,66.724181,33.980228,95,33,9,14.709344,3870,101,405,87.88,76.3,89.24,13,258
0,Denmark,51.025652,22.179273,52,16,2,3.869597,2361,48,308,84.41,73.05,86.12,8,150
0,England,56.944401,25.74342,81,31,13,10.508677,4540,87,583,86.76,76.33,88.3,12,275
0,Ukraine,50.250656,19.510495,40,13,2,2.984301,1491,29,142,84.57,69.72,86.14,8,99
0,Czech Republic,39.883479,18.437567,44,19,3,4.590542,1107,52,191,74.71,60.21,77.73,2,124
0,Austria,53.575678,25.29224,50,22,6,6.115813,2070,59,359,81.45,70.75,83.69,5,181
0,Romania,40.128016,15.454466,38,16,4,3.756728,1347,31,161,73.72,59.01,75.72,5,142
0,France,49.563417,25.257876,101,31,7,12.112084,3453,99,480,87.81,75.21,89.84,8,198


In [22]:
datos

Unnamed: 0,team,%posesion de balón,%posesion en campo rival,tiros,Tiros al arco,Goles,Goles esperados,pases_totales,Centros,pases_bajo_presion,%p_exitosos_totales,%p_exitosos_bajo_presion,%p_exitosos_sin_presion,Recupeaciones de Balón,recuperaciones
0,Netherlands,52.097149,23.215227,83,26,9,7.318485,3117,66,338,85.95,71.89,87.66,14,193
0,Spain,59.227921,29.465086,123,44,14,10.57447,4335,89,620,87.84,78.87,89.34,12,293
0,Portugal,66.724181,33.980228,95,33,9,14.709344,3870,101,405,87.88,76.3,89.24,13,258
0,Denmark,51.025652,22.179273,52,16,2,3.869597,2361,48,308,84.41,73.05,86.12,8,150
0,England,56.944401,25.74342,81,31,13,10.508677,4540,87,583,86.76,76.33,88.3,12,275
0,Ukraine,50.250656,19.510495,40,13,2,2.984301,1491,29,142,84.57,69.72,86.14,8,99
0,Czech Republic,39.883479,18.437567,44,19,3,4.590542,1107,52,191,74.71,60.21,77.73,2,124
0,Austria,53.575678,25.29224,50,22,6,6.115813,2070,59,359,81.45,70.75,83.69,5,181
0,Romania,40.128016,15.454466,38,16,4,3.756728,1347,31,161,73.72,59.01,75.72,5,142
0,France,49.563417,25.257876,101,31,7,12.112084,3453,99,480,87.81,75.21,89.84,8,198
