In [1]:
import sys
import os
from pathlib import Path
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

# Add project root to path
project_root = Path.cwd().parent.parent
sys.path.insert(0, str(project_root))

from viz.match_data import extract_match_complete

In [2]:
# Cargar CSV de partidos
matches_df = pd.read_csv('rb_leipzig_matches_25-26.csv')
matches_df['date'] = pd.to_datetime(matches_df['date'])

print(f"Total de partidos: {len(matches_df)}")
print(f"\nPrimeros 5 partidos:")
matches_df.head()

Total de partidos: 15

Primeros 5 partidos:


Unnamed: 0,date,home_team,away_team,whoscored_id,understat_id,league,season
0,2025-08-22 18:30:00,Bayern Munich,RB Leipzig,1908319,30224,GER-Bundesliga,25-26
1,2025-08-30 13:30:00,RB Leipzig,FC Heidenheim,1910607,30235,GER-Bundesliga,25-26
2,2025-09-13 13:30:00,Mainz 05,RB Leipzig,1910620,30245,GER-Bundesliga,25-26
3,2025-09-20 16:30:00,RB Leipzig,FC Koln,1910639,30256,GER-Bundesliga,25-26
4,2025-09-27 13:30:00,Wolfsburg,RB Leipzig,1910661,30264,GER-Bundesliga,25-26


In [3]:
# DataFrames acumulativos
all_events = []
all_diomande_shots = []

for idx, match in matches_df.iterrows():
    print(f"\n[{idx+1}/{len(matches_df)}] {match['home_team']} vs {match['away_team']}")
    print(f"  Fecha: {match['date'].strftime('%Y-%m-%d')}")
    
    try:
        # Extraer data del partido
        extract_match_complete(
            ws_id=int(match['whoscored_id']),
            us_id=int(match['understat_id']),
            league=match['league'],
            season=match['season'],
            home_team=match['home_team'],
            away_team=match['away_team'],
            match_date=match['date'].strftime('%Y-%m-%d')
        )
        
        # Leer eventing completo
        events = pd.read_csv('../../viz/data/match_events.csv')
        
        # Agregar metadata del partido
        events['match_date'] = match['date']
        events['match_home'] = match['home_team']
        events['match_away'] = match['away_team']
        events['match_id_ws'] = match['whoscored_id']
        events['match_id_us'] = match['understat_id']
        
        # Guardar todo el eventing
        all_events.append(events)
        print(f"  ✓ {len(events)} eventos extraídos")
        
        # Filtrar shots de Diomande
        shots = events[events['type'].str.contains('Shot|Goal', case=False, na=False)].copy()
        diomande_shots = shots[shots['player'].str.contains('Diomand', case=False, na=False)].copy()
        
        if len(diomande_shots) > 0:
            # Determinar si Leipzig es local o visitante
            is_home = 'Leipzig' in match['home_team']
            diomande_shots['opponent'] = match['away_team'] if is_home else match['home_team']
            diomande_shots['venue'] = 'Home' if is_home else 'Away'
            
            all_diomande_shots.append(diomande_shots)
            print(f"  ✓ {len(diomande_shots)} tiros Diomande")
        else:
            print(f"  - Sin tiros Diomande")
            
    except Exception as e:
        print(f"  ✗ Error: {e}")
        continue

print(f"\n{'='*60}")
print(f"RESUMEN:")
print(f"  Partidos procesados: {len(all_events)}")
print(f"  Partidos con tiros Diomande: {len(all_diomande_shots)}")
print(f"{'='*60}")


[1/15] Bayern Munich vs RB Leipzig
  Fecha: 2025-08-22

Extracting: Bayern Munich vs RB Leipzig (2025-08-22)
--------------------------------------------------


1. match_events.csv: 2055 events
2. player_network.csv: 293 records
3. match_aggregates.csv: 68 records
4. spatial_analysis.csv: 48 spatial records
5. match_info.csv: 65 info records

Shots: 31 | Goals: 6 | xG: 3.82
  ✓ 2055 eventos extraídos
  - Sin tiros Diomande

[2/15] RB Leipzig vs FC Heidenheim
  Fecha: 2025-08-30

Extracting: RB Leipzig vs FC Heidenheim (2025-08-30)
--------------------------------------------------
1. match_events.csv: 1784 events
2. player_network.csv: 250 records
3. match_aggregates.csv: 68 records
4. spatial_analysis.csv: 49 spatial records
5. match_info.csv: 79 info records

Shots: 24 | Goals: 2 | xG: 4.34
  ✓ 1784 eventos extraídos
  ✓ 1 tiros Diomande

[3/15] Mainz 05 vs RB Leipzig
  Fecha: 2025-09-13

Extracting: Mainz 05 vs RB Leipzig (2025-09-13)
--------------------------------------------------
1. match_events.csv: 1910 events
2. player_network.csv: 273 records
3. match_aggregates.csv: 68 records
4. spatial_analysis.csv: 48 spatial records
5. match_i

In [4]:
# DataFrame con TODO el eventing
df_all_events = pd.concat(all_events, ignore_index=True) if all_events else pd.DataFrame()

# DataFrame con shots de Diomande
df_diomande_shots = pd.concat(all_diomande_shots, ignore_index=True) if all_diomande_shots else pd.DataFrame()

print(f"Total eventos: {len(df_all_events):,}")
print(f"Total shots Diomande: {len(df_diomande_shots)}")

Total eventos: 29,404
Total shots Diomande: 23


In [5]:
# Tipos de eventos
print("Tipos de eventos:")
print(df_all_events['type'].value_counts())

Tipos de eventos:
type
Pass               15500
Carry               5081
BallRecovery        1332
BallTouch           1147
Clearance            846
Aerial               762
Foul                 606
TakeOn               579
Tackle               511
CornerAwarded        316
Interception         277
Challenge            260
Dispossessed         254
BlockedPass          253
SavedShot            230
Save                 227
KeeperPickup         175
MissedShots          157
SubstitutionOn       142
SubstitutionOff      142
End                   90
Start                 60
Card                  56
OffsidePass           56
OffsideGiven          56
OffsideProvoked       56
Goal                  49
FormationChange       37
FormationSet          30
Claim                 26
ShieldBallOpp         25
Error                 23
KeeperSweeper         20
ShotOnPost            10
Punch                  8
PenaltyFaced           2
Smother                2
GoodSkill              1
Name: count, dtype: int64


In [6]:
# Filtrar eventos de Diomande
diomande_events = df_all_events[
    df_all_events['player'].str.contains('Diomand', case=False, na=False)
].copy()

print(f"\nEventos de Diomande: {len(diomande_events)}")
print("\nTipos de eventos Diomande:")
print(diomande_events['type'].value_counts())


Eventos de Diomande: 837

Tipos de eventos Diomande:
type
Pass               378
Carry              101
TakeOn              79
BallTouch           65
BallRecovery        57
Dispossessed        23
Aerial              17
CornerAwarded       17
Foul                16
Tackle              13
SavedShot           12
BlockedPass         11
SubstitutionOff      7
Interception         7
Challenge            7
Goal                 6
MissedShots          5
Clearance            5
SubstitutionOn       4
OffsideGiven         2
Card                 2
Save                 1
OffsidePass          1
OffsideProvoked      1
Name: count, dtype: int64


In [7]:
# Vista previa de eventos Diomande
print("\nPrimeros 10 eventos de Diomande:")
diomande_events[['match_date', 'minute', 'type', 'player']].head(10)


Primeros 10 eventos de Diomande:


Unnamed: 0,match_date,minute,type,player
35,2025-08-22 18:30:00,1.0,Pass,Yan Diomande
36,2025-08-22 18:30:00,1.0,Carry,Yan Diomande
107,2025-08-22 18:30:00,4.0,Pass,Yan Diomande
108,2025-08-22 18:30:00,4.0,Carry,Yan Diomande
111,2025-08-22 18:30:00,4.0,TakeOn,Yan Diomande
113,2025-08-22 18:30:00,4.0,Dispossessed,Yan Diomande
115,2025-08-22 18:30:00,4.0,BallTouch,Yan Diomande
117,2025-08-22 18:30:00,4.0,Foul,Yan Diomande
123,2025-08-22 18:30:00,6.0,Pass,Yan Diomande
124,2025-08-22 18:30:00,6.0,Carry,Yan Diomande


In [8]:
# 1. SHOTS - Filtrar todos los tiros
shot_types = ['SavedShot', 'Goal', 'MissedShots', 'ShotOnPost', 'BlockedShot']
diomande_shots = diomande_events[diomande_events['type'].isin(shot_types)].copy()

print(f"SHOTS DE DIOMANDE")
print("=" * 60)
print(f"Total shots: {len(diomande_shots)}")
print(f"\nPor tipo:")
print(diomande_shots['type'].value_counts())

print(f"\nShots con xG > 0: {(diomande_shots['xg'] > 0).sum()}")
print(f"Total xG: {diomande_shots['xg'].sum():.2f}")

print(f"\nShots por partido:")
shots_per_match = diomande_shots.groupby('match_date').size().sort_values(ascending=False)
print(shots_per_match)
print(f"Promedio: {shots_per_match.mean():.1f} shots/partido")

# Vista detallada
print(f"\nDetalle shots (con xG):")
diomande_shots[['match_date', 'minute', 'type', 'xg', 'x', 'y']].sort_values('match_date')

SHOTS DE DIOMANDE
Total shots: 23

Por tipo:
type
SavedShot      12
Goal            6
MissedShots     5
Name: count, dtype: int64

Shots con xG > 0: 22
Total xG: 5.51

Shots por partido:
match_date
2025-11-23 14:30:00    4
2025-11-08 14:30:00    3
2025-12-06 17:30:00    3
2025-09-20 16:30:00    2
2025-09-27 13:30:00    2
2025-11-01 14:30:00    2
2025-11-28 19:30:00    2
2025-12-12 19:30:00    2
2025-08-30 13:30:00    1
2025-09-13 13:30:00    1
2025-10-25 13:30:00    1
dtype: int64
Promedio: 2.1 shots/partido

Detalle shots (con xG):


Unnamed: 0,match_date,minute,type,xg,x,y
3771,2025-08-30 13:30:00,89.0,MissedShots,0.478616,87.7,47.2
5309,2025-09-13 13:30:00,73.0,SavedShot,0.083073,86.4,38.7
6289,2025-09-20 16:30:00,25.0,SavedShot,0.37802,81.0,49.5
6889,2025-09-20 16:30:00,58.0,SavedShot,0.575723,87.2,62.4
7779,2025-09-27 13:30:00,10.0,SavedShot,0.014915,80.1,68.1
8989,2025-09-27 13:30:00,70.0,SavedShot,0.037744,84.5,64.4
13794,2025-10-25 13:30:00,9.0,Goal,0.016667,92.7,34.2
16758,2025-11-01 14:30:00,52.0,Goal,0.289361,85.8,44.4
17481,2025-11-01 14:30:00,93.0,SavedShot,0.289361,97.1,65.0
18506,2025-11-08 14:30:00,47.0,MissedShots,0.938924,77.9,48.3


In [9]:
# 2. PASSES - Análisis de pases
diomande_passes = diomande_events[diomande_events['type'] == 'Pass'].copy()

print(f"\nPASES DE DIOMANDE")
print("=" * 60)
print(f"Total pases: {len(diomande_passes)}")

# Precisión de pases
if 'outcome' in diomande_passes.columns:
    successful = (diomande_passes['outcome'] == 'Successful').sum()
    unsuccessful = (diomande_passes['outcome'] == 'Unsuccessful').sum()
    print(f"\nPrecisión:")
    print(f"  Exitosos: {successful}")
    print(f"  Fallados: {unsuccessful}")
    print(f"  Precisión: {100*successful/(successful+unsuccessful):.1f}%")

# Pases por partido
print(f"\nPases por partido:")
passes_per_match = diomande_passes.groupby('match_date').size().sort_index()
print(passes_per_match)
print(f"Promedio: {passes_per_match.mean():.1f} pases/partido")

# Pases por zona del campo
if 'x' in diomande_passes.columns and 'y' in diomande_passes.columns:
    print(f"\nDistribución por zona (coordenada X):")
    diomande_passes['zone'] = pd.cut(diomande_passes['x'], 
                                      bins=[0, 33, 66, 100], 
                                      labels=['Defensa', 'Centro', 'Ataque'])
    print(diomande_passes['zone'].value_counts())


PASES DE DIOMANDE
Total pases: 378

Pases por partido:
match_date
2025-08-22 18:30:00    11
2025-08-30 13:30:00    16
2025-09-13 13:30:00    15
2025-09-20 16:30:00    32
2025-09-27 13:30:00    34
2025-10-04 13:30:00     9
2025-10-18 13:30:00    12
2025-10-25 13:30:00    26
2025-11-01 14:30:00    32
2025-11-08 14:30:00    29
2025-11-23 14:30:00    54
2025-11-28 19:30:00    39
2025-12-06 17:30:00    31
2025-12-12 19:30:00    38
dtype: int64
Promedio: 27.0 pases/partido

Distribución por zona (coordenada X):
zone
Ataque     173
Centro     156
Defensa     49
Name: count, dtype: int64


In [10]:
# 3. REGATES (TAKE-ONS) - Análisis de dribbles
diomande_takeons = diomande_events[diomande_events['type'] == 'TakeOn'].copy()

print(f"\nREGATES DE DIOMANDE")
print("=" * 60)
print(f"Total regates: {len(diomande_takeons)}")

# Éxito en regates
if 'outcome' in diomande_takeons.columns:
    successful = (diomande_takeons['outcome'] == 'Successful').sum()
    unsuccessful = (diomande_takeons['outcome'] == 'Unsuccessful').sum()
    print(f"\nÉxito en regates:")
    print(f"  Exitosos: {successful}")
    print(f"  Fallados: {unsuccessful}")
    print(f"  Tasa de éxito: {100*successful/(successful+unsuccessful):.1f}%")

# Regates por partido
print(f"\nRegates por partido:")
takeons_per_match = diomande_takeons.groupby('match_date').size().sort_index()
print(takeons_per_match)
print(f"Promedio: {takeons_per_match.mean():.1f} regates/partido")

# Regates por zona
if 'x' in diomande_takeons.columns:
    print(f"\nDistribución por zona (coordenada X):")
    diomande_takeons['zone'] = pd.cut(diomande_takeons['x'], 
                                       bins=[0, 33, 66, 100], 
                                       labels=['Defensa', 'Centro', 'Ataque'])
    print(diomande_takeons['zone'].value_counts())


REGATES DE DIOMANDE
Total regates: 79

Regates por partido:
match_date
2025-08-22 18:30:00     3
2025-08-30 13:30:00     6
2025-09-13 13:30:00     1
2025-09-20 16:30:00     3
2025-09-27 13:30:00     5
2025-10-04 13:30:00     5
2025-10-18 13:30:00     4
2025-10-25 13:30:00     7
2025-11-01 14:30:00     6
2025-11-08 14:30:00     7
2025-11-23 14:30:00     5
2025-11-28 19:30:00     4
2025-12-06 17:30:00     9
2025-12-12 19:30:00    14
dtype: int64
Promedio: 5.6 regates/partido

Distribución por zona (coordenada X):
zone
Ataque     50
Centro     22
Defensa     7
Name: count, dtype: int64


In [11]:
# 4. CARRIES - Análisis de conducciones
diomande_carries = diomande_events[diomande_events['type'] == 'Carry'].copy()

print(f"\nCARRIES DE DIOMANDE")
print("=" * 60)
print(f"Total carries: {len(diomande_carries)}")

# Carries por partido
print(f"\nCarries por partido:")
carries_per_match = diomande_carries.groupby('match_date').size().sort_index()
print(carries_per_match)
print(f"Promedio: {carries_per_match.mean():.1f} carries/partido")

# Carries progresivos (avance > 10 yards en X)
if 'x' in diomande_carries.columns and 'endX' in diomande_carries.columns:
    diomande_carries['x_advance'] = diomande_carries['endX'] - diomande_carries['x']
    progressive = diomande_carries[diomande_carries['x_advance'] > 10]
    
    print(f"\nCarries progresivos (avance > 10 yardas):")
    print(f"  Total: {len(progressive)}")
    print(f"  % del total: {100*len(progressive)/len(diomande_carries):.1f}%")
    print(f"  Promedio/partido: {len(progressive)/len(matches_df):.1f}")

# Distancia total recorrida
if 'x' in diomande_carries.columns and 'y' in diomande_carries.columns and 'endX' in diomande_carries.columns and 'endY' in diomande_carries.columns:
    import numpy as np
    diomande_carries['distance'] = np.sqrt(
        (diomande_carries['endX'] - diomande_carries['x'])**2 + 
        (diomande_carries['endY'] - diomande_carries['y'])**2
    )
    print(f"\nDistancia total en carries: {diomande_carries['distance'].sum():.1f} yardas")
    print(f"Distancia promedio/carry: {diomande_carries['distance'].mean():.1f} yardas")


CARRIES DE DIOMANDE
Total carries: 101

Carries por partido:
match_date
2025-08-22 18:30:00     5
2025-08-30 13:30:00     4
2025-09-13 13:30:00     6
2025-09-20 16:30:00    14
2025-09-27 13:30:00     7
2025-10-04 13:30:00     4
2025-10-18 13:30:00     4
2025-10-25 13:30:00     2
2025-11-01 14:30:00    12
2025-11-08 14:30:00    11
2025-11-23 14:30:00     8
2025-11-28 19:30:00    10
2025-12-06 17:30:00     2
2025-12-12 19:30:00    12
dtype: int64
Promedio: 7.2 carries/partido


In [12]:
# 5. GUARDAR DATASETS ESPECÍFICOS
print("\n" + "="*60)
print("GUARDANDO DATASETS ESPECÍFICOS")
print("="*60)

# Eventos completos
output_all = 'diomande_all_events.csv'
df_all_events.to_csv(output_all, index=False)
print(f"✓ Todos los eventos: {output_all} ({len(df_all_events):,} eventos)")

# Eventos de Diomande
output_diomande = 'diomande_events.csv'
diomande_events.to_csv(output_diomande, index=False)
print(f"✓ Eventos Diomande: {output_diomande} ({len(diomande_events)} eventos)")

# Shots
if len(diomande_shots) > 0:
    diomande_shots.to_csv('diomande_shots.csv', index=False)
    print(f"✓ Shots: diomande_shots.csv ({len(diomande_shots)} shots)")

# Passes
if len(diomande_passes) > 0:
    diomande_passes.to_csv('diomande_passes.csv', index=False)
    print(f"✓ Pases: diomande_passes.csv ({len(diomande_passes)} pases)")

# TakeOns
if len(diomande_takeons) > 0:
    diomande_takeons.to_csv('diomande_takeons.csv', index=False)
    print(f"✓ Regates: diomande_takeons.csv ({len(diomande_takeons)} regates)")

# Carries
if len(diomande_carries) > 0:
    diomande_carries.to_csv('diomande_carries.csv', index=False)
    print(f"✓ Carries: diomande_carries.csv ({len(diomande_carries)} carries)")

print("="*60)


GUARDANDO DATASETS ESPECÍFICOS
✓ Todos los eventos: diomande_all_events.csv (29,404 eventos)
✓ Eventos Diomande: diomande_events.csv (837 eventos)
✓ Shots: diomande_shots.csv (23 shots)
✓ Pases: diomande_passes.csv (378 pases)
✓ Regates: diomande_takeons.csv (79 regates)
✓ Carries: diomande_carries.csv (101 carries)
