In [1]:
import sys
import os
from pathlib import Path
import pandas as pd
import warnings

# Agregar raíz del proyecto al path
# notebook está en /home/jaime/FD/data/blog/notebooks/
# raíz está en /home/jaime/FD/data/
project_root = Path.cwd().parent.parent
sys.path.insert(0, str(project_root))

from viz.match_data import extract_match_complete

warnings.filterwarnings('ignore')

print(f"Raíz del proyecto: {project_root}")
print(f"Directorio actual: {Path.cwd()}")

Raíz del proyecto: /home/jaime/FD/data
Directorio actual: /home/jaime/FD/data/blog/notebooks


In [2]:
csv_path = Path.cwd().parent / 'inter_miami_match_ids_2025.csv'
matches = pd.read_csv(csv_path)

print(f"Total partidos en CSV: {len(matches)}")
print(f"\nÚltimos 5 partidos:")
matches.tail()

Total partidos en CSV: 40

Últimos 5 partidos:


Unnamed: 0,date,home_team,away_team,whoscored_id,stage,home_score,away_score,status,league,season
35,2025-11-01 23:30:00,Nashville SC,Inter Miami CF,1951143,Major League Soccer Playoff,2,1,6,USA-MLS,2025
36,2025-11-09 01:00:00,Inter Miami CF,Nashville SC,1951675,Major League Soccer Playoff,4,0,6,USA-MLS,2025
37,2025-11-23 22:00:00,FC Cincinnati,Inter Miami CF,1952056,Major League Soccer Playoff,0,4,6,USA-MLS,2025
38,2025-11-29 23:00:00,Inter Miami CF,New York City FC,1953168,Major League Soccer Playoff,5,1,6,USA-MLS,2025
39,2025-12-06 19:30:00,Inter Miami CF,Vancouver Whitecaps,1953302,Major League Soccer Playoff,3,1,6,USA-MLS,2025


In [3]:
last_match = matches.iloc[-1]

print("Información del último partido:")
print("=" * 60)
print(f"Fecha: {last_match['date']}")
print(f"Partido: {last_match['home_team']} vs {last_match['away_team']}")
print(f"Resultado: {last_match['home_score']}-{last_match['away_score']}")
print(f"Competición: {last_match['stage']}")
print(f"WhoScored ID: {last_match['whoscored_id']}")
print(f"Liga: {last_match['league']}")
print(f"Temporada: {last_match['season']}")
print("=" * 60)

Información del último partido:
Fecha: 2025-12-06 19:30:00
Partido: Inter Miami CF vs Vancouver Whitecaps
Resultado: 3-1
Competición: Major League Soccer Playoff
WhoScored ID: 1953302
Liga: USA-MLS
Temporada: 2025


In [4]:
# Extraer fecha en formato YYYY-MM-DD
match_date = pd.to_datetime(last_match['date']).strftime('%Y-%m-%d')

# Convertir año a formato YY-YY
# MLS 2025 = temporada 25-26
year = int(last_match['season'])
season = f"{str(year % 100).zfill(2)}-{str((year + 1) % 100).zfill(2)}"

print(f"Año original: {last_match['season']}")
print(f"Season convertida: {season}")
print("\nIniciando extracción...\n")

result = extract_match_complete(
    ws_id=int(last_match['whoscored_id']),
    us_id=None,  # MLS no disponible en Understat
    league=last_match['league'],
    season=season,  # Formato YY-YY (ej: 25-26)
    home_team=last_match['home_team'],
    away_team=last_match['away_team'],
    match_date=match_date
)

print("\nExtracción completada!")
print(f"Status: {result.get('status', 'unknown')}")
print(f"Eventos procesados: {result.get('event_count', 0)}")

Año original: 2025
Season convertida: 25-26

Iniciando extracción...


Extracting: Inter Miami CF vs Vancouver Whitecaps (2025-12-06)
--------------------------------------------------


1. match_events.csv: 1633 events
2. player_network.csv: 224 records
3. match_aggregates.csv: 65 records
4. spatial_analysis.csv: 49 spatial records
5. match_info.csv: 76 info records

Shots: 19 | Goals: 4 | xG: 0.00

Extracción completada!
Status: complete
Eventos procesados: 0


In [5]:
data_dir = Path.cwd().parent.parent / 'viz' / 'data'

csv_files = [
    'match_events.csv',
    'player_network.csv',
    'match_aggregates.csv',
    'spatial_analysis.csv',
    'match_info.csv'
]

print("CSVs generados en viz/data/:")
print("=" * 60)
for csv_file in csv_files:
    filepath = data_dir / csv_file
    if filepath.exists():
        size = filepath.stat().st_size / 1024  # KB
        df = pd.read_csv(filepath)
        print(f"✓ {csv_file:25s} - {len(df):5d} rows, {size:6.1f} KB")
    else:
        print(f"✗ {csv_file:25s} - NO ENCONTRADO")
print("=" * 60)

CSVs generados en viz/data/:
✓ match_events.csv          -  1633 rows,  975.9 KB
✓ player_network.csv        -   224 rows,   30.9 KB
✓ match_aggregates.csv      -    65 rows,    9.8 KB
✓ spatial_analysis.csv      -    49 rows,    5.8 KB
✓ match_info.csv            -    76 rows,    5.6 KB


In [6]:
events_path = data_dir / 'match_events.csv'
if events_path.exists():
    events = pd.read_csv(events_path)
    print(f"Total eventos: {len(events)}")
    print(f"\nColumnas ({len(events.columns)}):")
    print(events.columns.tolist())
    print(f"\nPrimeros 10 eventos:")
    events.head(10)
else:
    print("Archivo match_events.csv no encontrado")

Total eventos: 1633

Columnas (55):
['game_id', 'period', 'minute', 'second', 'expanded_minute', 'type', 'outcome_type', 'team_id', 'team', 'player_id', 'player', 'x', 'y', 'end_x', 'end_y', 'goal_mouth_y', 'goal_mouth_z', 'blocked_x', 'blocked_y', 'qualifiers', 'is_touch', 'is_shot', 'is_goal', 'card_type', 'related_event_id', 'related_player_id', 'match_id', 'data_source', 'pass_length', 'is_longball', 'is_header', 'is_cross', 'is_through_ball', 'shot_body_part', 'is_assist', 'field_zone', 'is_successful', 'event_type', 'possession_sequence', 'next_player', 'distance_to_goal', 'pass_distance', 'event_id', 'take_ons_in_carry', 'xthreat', 'xthreat_gen', 'is_pre_assist', 'possession_id', 'possession_team', 'is_progressive', 'is_box_entry', 'pass_outcome', 'action_type', 'zone_id', 'xg']

Primeros 10 eventos:


In [7]:
network_path = data_dir / 'player_network.csv'
if network_path.exists():
    network = pd.read_csv(network_path)
    print(f"Total conexiones: {len(network)}")
    print(f"\nColumnas:")
    print(network.columns.tolist())
    print(f"\nTop 10 conexiones:")
    network.head(10)
else:
    print("Archivo player_network.csv no encontrado")

Total conexiones: 224

Columnas:
['record_type', 'team', 'source_player', 'target_player', 'connection_strength', 'avg_x_start', 'avg_y_start', 'avg_x_end', 'avg_y_end', 'avg_xthreat', 'progressive_passes', 'box_entries', 'pass_distance_avg', 'connection_id', 'total_actions', 'minutes_active', 'position_variance_x', 'position_variance_y', 'xthreat_total']

Top 10 conexiones:


In [8]:
if events_path.exists():
    events = pd.read_csv(events_path)
    
    print("Distribución de eventos por tipo:")
    print("=" * 60)
    event_counts = events['type'].value_counts()
    for event_type, count in event_counts.items():
        print(f"{event_type:30s}: {count:4d}")
    print("=" * 60)
    
    print(f"\nEventos por equipo:")
    print(events['team'].value_counts())

Distribución de eventos por tipo:
Pass                          :  847
Carry                         :  255
Foul                          :   90
BallRecovery                  :   81
BallTouch                     :   74
Aerial                        :   44
TakeOn                        :   37
Clearance                     :   30
Tackle                        :   24
Challenge                     :   18
Interception                  :   17
Dispossessed                  :   11
KeeperPickup                  :   11
Card                          :   10
BlockedPass                   :    9
MissedShots                   :    7
SubstitutionOff               :    7
SubstitutionOn                :    7
Save                          :    6
End                           :    6
SavedShot                     :    6
Start                         :    4
Goal                          :    4
Punch                         :    4
CornerAwarded                 :    4
OffsidePass                   :    3
Offs