In [None]:
import sys
import os
from pathlib import Path
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

# Add project root to path
project_root = Path.cwd().parent.parent
sys.path.insert(0, str(project_root))

from viz.match_data import extract_match_complete

In [None]:
# Cargar CSV de partidos
matches_df = pd.read_csv('rb_leipzig_matches_25-26.csv')
matches_df['date'] = pd.to_datetime(matches_df['date'])

print(f"Total de partidos: {len(matches_df)}")
print(f"\nPrimeros 5 partidos:")
matches_df.head()

In [None]:
# DataFrames acumulativos
all_events = []
all_diomande_shots = []

for idx, match in matches_df.iterrows():
    print(f"\n[{idx+1}/{len(matches_df)}] {match['home_team']} vs {match['away_team']}")
    print(f"  Fecha: {match['date'].strftime('%Y-%m-%d')}")
    
    try:
        # Extraer data del partido
        extract_match_complete(
            ws_id=int(match['whoscored_id']),
            us_id=int(match['understat_id']),
            league=match['league'],
            season=match['season'],
            home_team=match['home_team'],
            away_team=match['away_team'],
            match_date=match['date'].strftime('%Y-%m-%d')
        )
        
        # Leer eventing completo
        events = pd.read_csv('../../viz/data/match_events.csv')
        
        # Agregar metadata del partido
        events['match_date'] = match['date']
        events['match_home'] = match['home_team']
        events['match_away'] = match['away_team']
        events['match_id_ws'] = match['whoscored_id']
        events['match_id_us'] = match['understat_id']
        
        # Guardar todo el eventing
        all_events.append(events)
        print(f"  ✓ {len(events)} eventos extraídos")
        
        # Filtrar shots de Diomande
        shots = events[events['type'].str.contains('Shot|Goal', case=False, na=False)].copy()
        diomande_shots = shots[shots['player'].str.contains('Diomande', case=False, na=False)].copy()
        
        if len(diomande_shots) > 0:
            # Determinar si Leipzig es local o visitante
            is_home = 'Leipzig' in match['home_team']
            diomande_shots['opponent'] = match['away_team'] if is_home else match['home_team']
            diomande_shots['venue'] = 'Home' if is_home else 'Away'
            
            all_diomande_shots.append(diomande_shots)
            print(f"  ✓ {len(diomande_shots)} tiros Diomande")
        else:
            print(f"  - Sin tiros Diomande")
            
    except Exception as e:
        print(f"  ✗ Error: {e}")
        continue

print(f"\n{'='*60}")
print(f"RESUMEN:")
print(f"  Partidos procesados: {len(all_events)}")
print(f"  Partidos con tiros Diomande: {len(all_diomande_shots)}")
print(f"{'='*60}")

In [None]:
# DataFrame con TODO el eventing
df_all_events = pd.concat(all_events, ignore_index=True) if all_events else pd.DataFrame()

# DataFrame con shots de Diomande
df_diomande_shots = pd.concat(all_diomande_shots, ignore_index=True) if all_diomande_shots else pd.DataFrame()

print(f"Total eventos: {len(df_all_events):,}")
print(f"Total shots Diomande: {len(df_diomande_shots)}")

In [None]:
# Tipos de eventos
print("Tipos de eventos:")
print(df_all_events['type'].value_counts())

In [None]:
# Filtrar eventos de Diomande
diomande_events = df_all_events[
    df_all_events['player'].str.contains('Diomande', case=False, na=False)
].copy()

print(f"\nEventos de Diomande: {len(diomande_events)}")
print("\nTipos de eventos Diomande:")
print(diomande_events['type'].value_counts())

In [None]:
# Vista previa de eventos Diomande
print("\nPrimeros 10 eventos de Diomande:")
diomande_events[['match_date', 'opponent', 'minute', 'type', 'player', 'outcome']].head(10)

In [None]:
if len(df_diomande_shots) > 0:
    print(f"Total shots: {len(df_diomande_shots)}")
    print(f"\nShots por partido:")
    print(df_diomande_shots.groupby('match_date').size().sort_values(ascending=False))
    
    print(f"\nShots por venue:")
    print(df_diomande_shots['venue'].value_counts())
    
    print(f"\nShots por resultado:")
    print(df_diomande_shots['outcome'].value_counts())
else:
    print("No se encontraron shots de Diomande")

In [None]:
# Vista detallada de shots
if len(df_diomande_shots) > 0:
    print("\nTodos los shots de Diomande:")
    df_diomande_shots[[
        'match_date', 'opponent', 'venue', 'minute', 
        'type', 'outcome', 'x', 'y'
    ]].sort_values('match_date')

In [None]:
# Filtrar pases
diomande_passes = df_all_events[
    (df_all_events['player'].str.contains('Diomande', case=False, na=False)) &
    (df_all_events['type'].str.contains('Pass', case=False, na=False))
].copy()

print(f"Total pases Diomande: {len(diomande_passes)}")

if len(diomande_passes) > 0:
    print(f"\nPases por partido:")
    passes_per_match = diomande_passes.groupby('match_date').size().sort_index()
    print(passes_per_match)
    print(f"\nPromedio: {passes_per_match.mean():.1f} pases/partido")
    
    print(f"\nPrecisión de pases:")
    successful = (diomande_passes['outcome'] == 'Successful').sum()
    total = len(diomande_passes)
    print(f"  {successful}/{total} ({100*successful/total:.1f}%)")

In [None]:
# Filtrar regates (dribbles/take-ons)
diomande_dribbles = df_all_events[
    (df_all_events['player'].str.contains('Diomande', case=False, na=False)) &
    (df_all_events['type'].str.contains('TakeOn|Dribble', case=False, na=False))
].copy()

print(f"Total regates Diomande: {len(diomande_dribbles)}")

if len(diomande_dribbles) > 0:
    print(f"\nRegates por partido:")
    dribbles_per_match = diomande_dribbles.groupby('match_date').size().sort_index()
    print(dribbles_per_match)
    print(f"\nPromedio: {dribbles_per_match.mean():.1f} regates/partido")
    
    print(f"\nÉxito en regates:")
    successful_dribbles = (diomande_dribbles['outcome'] == 'Successful').sum()
    total_dribbles = len(diomande_dribbles)
    print(f"  {successful_dribbles}/{total_dribbles} ({100*successful_dribbles/total_dribbles:.1f}%)")

In [None]:
# Guardar eventing completo
output_all = 'diomande_all_events.csv'
df_all_events.to_csv(output_all, index=False)
print(f"✓ Eventing completo guardado: {output_all}")
print(f"  {len(df_all_events):,} eventos | {df_all_events.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

# Guardar solo eventos de Diomande
output_diomande = 'diomande_events.csv'
diomande_events.to_csv(output_diomande, index=False)
print(f"\n✓ Eventos Diomande guardados: {output_diomande}")
print(f"  {len(diomande_events)} eventos")

# Guardar shots de Diomande
if len(df_diomande_shots) > 0:
    output_shots = 'diomande_shots.csv'
    df_diomande_shots.to_csv(output_shots, index=False)
    print(f"\n✓ Shots Diomande guardados: {output_shots}")
    print(f"  {len(df_diomande_shots)} shots")

# Guardar pases de Diomande
if len(diomande_passes) > 0:
    output_passes = 'diomande_passes.csv'
    diomande_passes.to_csv(output_passes, index=False)
    print(f"\n✓ Pases Diomande guardados: {output_passes}")
    print(f"  {len(diomande_passes)} pases")

# Guardar regates de Diomande
if len(diomande_dribbles) > 0:
    output_dribbles = 'diomande_dribbles.csv'
    diomande_dribbles.to_csv(output_dribbles, index=False)
    print(f"\n✓ Regates Diomande guardados: {output_dribbles}")
    print(f"  {len(diomande_dribbles)} regates")

In [None]:
print("="*60)
print("RESUMEN FINAL - DIOMANDE (RB Leipzig 25-26)")
print("="*60)
print(f"\nPartidos procesados: {len(all_events)}/{len(matches_df)}")
print(f"\nEventos totales: {len(df_all_events):,}")
print(f"Eventos Diomande: {len(diomande_events)}")
print(f"  - Pases: {len(diomande_passes)}")
print(f"  - Regates: {len(diomande_dribbles)}")
print(f"  - Shots: {len(df_diomande_shots)}")
print("="*60)