### Código para la obtención y limpieza de los datos de StatsBoom 

In [77]:
from statsbombpy import sb
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [50]:
def eventsTeam(country,division,season,gender,team):
    """
    Algoritmo que obtiene todos los eventos de todos los partidos del equipo del argumento 'team' y en la 
    competencia específicada por los demás parámetros.
    """
    competitionInCountry = sb.competitions()[sb.competitions()['country_name'] == country]
    competitionGender = competitionInCountry[competitionInCountry['competition_gender'] == gender]
    competitionDivision = competitionGender[competitionGender['competition_name'] == division]
    competition = competitionDivision[competitionDivision['season_name'] == season]

    competition_id = competition['competition_id'].values[0]
    season_id = competition['season_id'].values[0]

    all_matches = sb.matches(competition_id= competition_id, season_id=season_id)
    team_matches = all_matches[(all_matches['home_team'] == team) | (all_matches['away_team'] == team)]

    id_matches = list(team_matches.match_id.unique())
    events = []
    for match in id_matches:
        events.append(sb.events(match_id=match))
    events = pd.concat(events)

    return events

In [114]:
def cleanData(team,events):
    """
    Algoritmo que recibe un Data Frame de eventos y lo convierte en un Data Frame de las métricas que nos interesan para el análisis.
    """
    dataFrame = pd.DataFrame()
    metricas = [team]
    columns=['team','pases_totales','pases_bajo_presion','pp_exitosos_totales',
                                        'pp_exitosos_bajo_presion','pp_exitosos_sin_presion']

    # Pases
    events['successful_pass'] = events['pass_outcome'].isna()
    events['under_pressure'] = events['under_pressure'].replace({np.nan: False})

    pases_tot = events[events['type'] == 'Pass']
    metricas.append(pases_tot.shape[0])

    pases_exitosos_totales = pases_tot[pases_tot['successful_pass'] == True]
    total_bajo_presion = pases_tot[pases_tot['under_pressure'] == True]
    metricas.append(total_bajo_presion.shape[0])
    
    exitos_bajo_presion = total_bajo_presion[total_bajo_presion['successful_pass'] == True]
    total_sin_presion = pases_tot[pases_tot['under_pressure'] == False]
    exitos_sin_presion = total_sin_presion[total_sin_presion['successful_pass'] == True]

    ppe_tot = round(100*pases_exitosos_totales.shape[0] / pases_tot.shape[0], 2) if pases_tot.shape[0] else 0
    metricas.append(ppe_tot)
    ppe_bajo_presion = round(100*exitos_bajo_presion.shape[0] / total_bajo_presion.shape[0], 2) if total_bajo_presion.shape[0] else 0
    metricas.append(ppe_bajo_presion)
    ppe_sin_presion = round(100*exitos_sin_presion.shape[0] / total_sin_presion.shape[0], 2) if total_sin_presion.shape[0] else 0
    metricas.append(ppe_sin_presion)

    return pd.DataFrame([metricas], columns=columns)


In [None]:
team = 'Manchester City'
events = eventsTeam('England','Premier League','2015/2016','male',team)

In [117]:
data_man_city = cleanData(team,events)

In [118]:
data_man_city

Unnamed: 0,team,pases_totales,pases_bajo_presion,pp_exitosos_totales,pp_exitosos_bajo_presion,pp_exitosos_sin_presion
0,Manchester City,39186,5784,78.88,69.88,80.44
