# PSG Complete Data Analysis - FootballDecoded

### Objetivo del An√°lisis

Este notebook realiza una **extracci√≥n exhaustiva y organizada** de todos los datos disponibles del **Paris Saint-Germain** para las temporadas **2023/24 y 2024/25**, utilizando el conjunto completo de wrappers desarrollados en FootballDecoded.

**Datos a extraer:**
- Estad√≠sticas de equipo (temporada completa por competici√≥n)
- Estad√≠sticas individuales (todos los jugadores de la plantilla)
- Eventos de partidos (goles, pases, disparos, etc.)
- Datos espaciales (coordenadas, mapas de calor, redes de pase)
- M√©tricas avanzadas (xG, xA, PPDA, build-up chains)
- An√°lisis espec√≠fico del partido PSG vs Auxerre (01/09/2024)

**Fuentes de datos:**
- **FBref**: Estad√≠sticas completas y eventos
- **Understat**: M√©tricas avanzadas (xGChain, PPDA, etc.)
- **WhoScored**: Datos espaciales y coordenadas

In [None]:
# Dependencias base
import pandas as pd
import numpy as np
from typing import Dict, List, Optional, Union
from datetime import datetime
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

import sys
import os
    
# Ir al directorio correcto donde est√° instalado el paquete
data_dir = "/home/oriol/FD/Data"
sys.path.insert(0, data_dir)
    
from scrappers import FBref, Understat, WhoScored
from wrappers import *

In [None]:
# PSG 2023-24
psg_23_24 = fbref_extract_league_players(
    league="FRA-Ligue 1",
    season="2023-24", 
    team_filter="Paris S-G",
    verbose=True
)

# PSG 2024-25  
psg_24_25 = fbref_extract_league_players(
    league="FRA-Ligue 1",
    season="2024-25",
    team_filter="Paris S-G", 
    verbose=True
)

In [None]:
# Extract data for 2023-24 players
psg_data_23_24 = fbref_extract_multiple_players(
    psg_23_24['player'].tolist(),
    "FRA-Ligue 1", 
    "2023-24",
    verbose=True
)

print("\n" + "="*50 + "\n")

# Extract data for 2024-25 players  
psg_data_24_25 = fbref_extract_multiple_players(
    psg_24_25['player'].tolist(),
    "FRA-Ligue 1", 
    "2024-25",
    verbose=True
)

In [None]:
# Use existing player lists from FBref
players_23_24 = psg_23_24['player'].tolist()
players_24_25 = psg_24_25['player'].tolist()

# Extract Understat data for 2023-24 players
psg_understat_23_24 = understat_extract_multiple_players(
    players_23_24,
    "FRA-Ligue 1", 
    "2023-24",
    verbose=True
)

print("\n" + "="*50 + "\n")

# Extract Understat data for 2024-25 players  
psg_understat_24_25 = understat_extract_multiple_players(
    players_24_25,
    "FRA-Ligue 1", 
    "2024-25",
    verbose=True
)

In [9]:
# Merge 2023-24 data
psg_23_24 = pd.merge(
    psg_data_23_24, 
    psg_understat_23_24,
    on=['player_name', 'league'],
    how='left',
    suffixes=('', '_dup')
)

# Remove duplicate columns
dup_cols = [col for col in psg_23_24.columns if col.endswith('_dup')]
psg_23_24 = psg_23_24.drop(columns=dup_cols)

# Merge 2024-25 data
psg_24_25 = pd.merge(
    psg_data_24_25, 
    psg_understat_24_25,
    on=['player_name', 'league'],
    how='left',
    suffixes=('', '_dup')
)

# Remove duplicate columns
dup_cols = [col for col in psg_24_25.columns if col.endswith('_dup')]
psg_24_25 = psg_24_25.drop(columns=dup_cols)

In [None]:
# El match_id de Understat es 28351 (extra√≠do de la URL)
match_id = 28351
league = "FRA-Ligue 1"
season = "2024-25"

# Extraer todos los eventos de disparo con an√°lisis completo
shot_events = understat_extract_shot_events(
    match_id=match_id,
    league=league,
    season=season,
    verbose=True
)

# Reset index para trabajar con el DataFrame normalmente
df = shot_events.reset_index()

# Exportar CSV limpio
filename = f"understat_match_{match_id}_complete"
df.to_csv(f"{filename}.csv", index=False)
print(f"\nüíæ Exportado: {filename}.csv")
print(f"   Filas: {len(df)} | Columnas: {len(df.columns)}")

In [None]:
whoscored_match_id = 1824012
league = "FRA-Ligue 1"
season = "2024-25"

# Extraer todos los eventos espaciales del partido
match_events = whoscored_extract_match_events(
    match_id=whoscored_match_id,
    league=league,
    season=season,
    verbose=True
)

# Reset index si es necesario
df_events = match_events.reset_index() if hasattr(match_events, 'index') else match_events
    
# Exportar CSV
filename = f"whoscored_match_{whoscored_match_id}_complete"
df_events.to_csv(f"{filename}.csv", index=False)
print(f"\nüíæ Exportado: {filename}.csv")
print(f"   Filas: {len(df_events)} | Columnas: {len(df_events.columns)}")

In [None]:
# ====================================================================
# PSG TEAM DATA - Ambas temporadas con merge de fuentes
# ====================================================================

def extract_psg_team_data():
    """Extrae datos completos del PSG como equipo de ambas temporadas"""
    seasons = ["2023-24", "2024-25"]
    league = "FRA-Ligue 1"
    fbref_team_name = "Paris S-G"
    understat_team_name = "Paris Saint Germain"
    
    all_team_data = []
    
    for season in seasons:
        print(f"üìä Extrayendo datos de equipo PSG - {season}")
        
        # FBref data (base)
        fbref_team = fbref_extract_team_season(
            team_name=fbref_team_name,
            league=league,
            season=season,
            include_opponent_stats=True,
            verbose=True
        )
        
        if fbref_team:
            # Understat data (m√©tricas avanzadas)
            understat_team = understat_extract_team_season(
                team_name=understat_team_name,
                league=league,
                season=season,
                verbose=True
            )
            
            # Merge FBref + Understat
            team_df = pd.DataFrame([fbref_team])
            if understat_team:
                understat_df = pd.DataFrame([understat_team])
                # Normalizar nombres para merge
                team_df['normalized_team'] = 'PSG'
                understat_df['normalized_team'] = 'PSG'
                
                team_df = pd.merge(
                    team_df, understat_df,
                    on=['normalized_team', 'league', 'season'],
                    how='left',
                    suffixes=('', '_dup')
                )
                # Limpiar duplicados
                dup_cols = [col for col in team_df.columns if col.endswith('_dup')]
                team_df = team_df.drop(columns=dup_cols)
                team_df = team_df.drop(columns=['normalized_team'])
            
            all_team_data.append(team_df)
    
    # Combinar ambas temporadas
    if all_team_data:
        psg_team_complete = pd.concat(all_team_data, ignore_index=True)
        
        # Exportar
        filename = "psg_team_complete_data"
        psg_team_complete.to_csv(f"{filename}.csv", index=False)
        print(f"‚úÖ Exportado: {filename}.csv")
        print(f"   Filas: {len(psg_team_complete)} | Columnas: {len(psg_team_complete.columns)}")
        
        return psg_team_complete
    return pd.DataFrame()

# Ejecutar extracci√≥n
psg_team_data = extract_psg_team_data()

In [4]:
from scrappers import FBref

# Probar con una liga dom√©stica para comparar
fbref_domestic = FBref("ESP-La Liga", "2023-24")
print("=== LA LIGA (domestic) ===")
try:
    leagues_domestic = fbref_domestic.read_leagues()
    print(leagues_domestic)
    print("\nURL:", leagues_domestic.loc['ESP-La Liga', 'url'] if not leagues_domestic.empty else "No URL")
except Exception as e:
    print(f"Error: {e}")

=== LA LIGA (domestic) ===
            gender governing_body first_season last_season tier  \
league                                                            
ESP-La Liga      M           <NA>         8889        2425  1st   

                                                     awards  \
league                                                        
ESP-La Liga  La Liga Best Player, Trofeo Alfredo Di St√©fano   

                                              url country  
league                                                     
ESP-La Liga  /en/comps/12/history/La-Liga-Seasons     ESP  

URL: /en/comps/12/history/La-Liga-Seasons
