# Player Event Analysis

In [5]:
# PARAMETROS
PLAYER_NAME = "Ansu Fati"
PLAYER_SEARCH = "Ansu|Fati"
TEAM_NAME = "Monaco"
CSV_FILE = "../data/monaco_25-26.csv"
SEASON = "25-26"
OUTPUT_FILE = f"../data/{PLAYER_NAME.lower().replace(' ', '_')}_all_events_{SEASON}.csv"

In [7]:
# STEP 1: Generate match list CSV (run once)
import sys
sys.path.append('../..')
from blog.get_match_ids import get_match_ids

matches_df = get_match_ids(
    team_name=TEAM_NAME,
    league="FRA-Ligue 1",
    season=SEASON,
    verbose=True
)

matches_df.to_csv(CSV_FILE, index=False)
print(f"\nGenerated {CSV_FILE} with {len(matches_df)} matches")
matches_df.head()


Extrayendo IDs de partidos para:
  Equipo: Monaco
  Liga: FRA-Ligue 1
  Temporada: 25-26

üìä Extrayendo de WhoScored...


KeyboardInterrupt: 

In [None]:
# STEP 2: Load match list
import sys
sys.path.append('../..')
from viz.match_data import extract_match_complete
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

matches_df = pd.read_csv(CSV_FILE)
print(f"Total partidos: {len(matches_df)}")
matches_df.head()

In [None]:
# STEP 3: Process all matches and extract player events
all_player_events = []

for idx, match in matches_df.iterrows():
    print(f"\n[{idx+1}/{len(matches_df)}] {match['home_team']} vs {match['away_team']}")
    
    try:
        extract_match_complete(
            ws_id=int(match['whoscored_id']),
            us_id=int(match['understat_id']),
            league=match['league'],
            season=match['season'],
            home_team=match['home_team'],
            away_team=match['away_team'],
            match_date=match['date']
        )
        
        events = pd.read_csv('../../viz/data/match_events.csv')
        player_events = events[events['player'].str.contains(PLAYER_SEARCH, case=False, na=False)].copy()
        
        if len(player_events) > 0:
            is_home = TEAM_NAME.lower() in match['home_team'].lower()
            is_away = TEAM_NAME.lower() in match['away_team'].lower()
            
            if is_home:
                player_events['venue'] = 'Home'
                player_events['opponent'] = match['away_team']
            elif is_away:
                player_events['venue'] = 'Away'
                player_events['opponent'] = match['home_team']
            else:
                print(f"  WARN: {TEAM_NAME} no encontrado en ning√∫n equipo")
                continue
            
            player_events['match_date'] = match['date']
            player_events['match_id_ws'] = match['whoscored_id']
            player_events['match_id_us'] = match['understat_id']
            all_player_events.append(player_events)
            print(f"  {len(player_events)} eventos {PLAYER_NAME}")
        else:
            print(f"  Sin eventos {PLAYER_NAME}")
            
    except Exception as e:
        print(f"  Error: {e}")
        continue

print(f"\nPartidos con eventos {PLAYER_NAME}: {len(all_player_events)}")

In [None]:
# STEP 4: Concatenate and save all player events
if len(all_player_events) > 0:
    df = pd.concat(all_player_events, ignore_index=True)
    df = df.sort_values('match_date').reset_index(drop=True)
    
    df.to_csv(OUTPUT_FILE, index=False)
    
    print(f"Guardado: {OUTPUT_FILE}")
    print(f"Total eventos: {len(df)}")
    display(df[['match_date', 'opponent', 'venue', 'minute', 'type', 'player']].head(15))
else:
    print(f"Sin eventos {PLAYER_NAME}")

In [None]:
# STEP 5: Generate statistics
if len(all_player_events) > 0:
    print(f"ESTADISTICAS {PLAYER_NAME.upper()} - {TEAM_NAME.upper()} {SEASON}")
    print("="*60)
    
    total = len(df)
    partidos = df['match_date'].nunique()
    
    print(f"\nGENERAL:")
    print(f"  Partidos: {partidos}")
    print(f"  Total eventos: {total}")
    print(f"  Eventos/partido: {total/partidos:.1f}")
    
    print(f"\nTIPOS DE EVENTOS:")
    tipos = df['type'].value_counts().head(10)
    for tipo, count in tipos.items():
        pct = (count/total*100)
        print(f"  {tipo}: {count} ({pct:.1f}%)")
    
    print(f"\nHOME vs AWAY:")
    venue_stats = df.groupby('venue').agg({'player': 'count', 'match_date': 'nunique'}).rename(columns={'player': 'eventos', 'match_date': 'partidos'})
    print(venue_stats)
    
    print(f"\nTOP 10 PARTIDOS (m√°s eventos):")
    top_matches = df.groupby(['match_date', 'opponent']).size().sort_values(ascending=False).head(10)
    for (fecha, rival), eventos in top_matches.items():
        print(f"  {fecha[:10]} vs {rival}: {eventos} eventos")