In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
from sqlalchemy import text
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

import sys
import os
sys.path.append(os.path.dirname(os.getcwd()))
from database.connection import get_db_manager

In [None]:
# FILTROS BASE DE DATOS
TABLE_TYPE = ''  # 'domestic' o 'european'
LEAGUES = [      # Lista de ligas a incluir
    #'ENG-Premier League',
    #'ESP-La Liga', 
    #'ITA-Serie A',
    #'GER-Bundesliga',
    #'FRA-Ligue 1'
    #'INT-Champions League'  # Solo si TABLE_TYPE = 'european'
]
SEASON = ''            # Temporada específica (ej: '2425', '2324')
POSITION_FILTER = ''   # 'GK', 'DF', 'MF', 'FW' - primera posición (ej: 'MF')
MIN_MINUTES = 0        # Minutos mínimos (ej: 800)

# JUGADORES A ANALIZAR
PLAYER_1_SEARCH = ''      # Nombre jugador 1 (ej: 'Pedri')
PLAYER_2_SEARCH = ''      # Nombre jugador 2 (ej: 'Bellingham')

# CONFIGURACIÓN RADAR
RADAR_TITLE = ""              # Título (ej: "Centre Mid Template")
RADAR_DESCRIPTION = ""        # Descripción (ej: "vs Top-5 League MF (800+ mins) 2024/25")
TEAM_COLORS = ['#', '#']      # Colores (ej: ['#004D98', '#FFFFFF'])

# COLORES PARA EQUIPOS - Completa para todos los casos
TEAM_COLORS = [
    '#FF6B6B',      # Coral Red (ya usado)
    '#4ECDC4',      # Turquoise (ya usado)
    '#FFFFFF',      # White (Real Madrid, Tottenham)
    '#000000',      # Black (Newcastle, Juventus)
    '#00BFFF',      # Deep Sky Blue (City, Napoli)
    '#00FFFF',      # Cyan (ya usado en node_cmap)
    '#7CFC00',      # Lawn Green (ya usado en node_cmap)
    '#FFFF00',      # Yellow (Dortmund, Norwich)
    '#FFD700',      # Gold (ya usado en node_cmap)
    '#FFB6C1',      # Light Pink (ya usado en node_cmap)
    '#FF6347',      # Tomato (ya usado en node_cmap)
    '#FF4500',      # Orange Red (Valencia, Holanda)
    '#32CD32',      # Lime Green (Athletic, Celtic)
    '#DA70D6',      # Orchid (Fiorentina)
    '#004D98',      # Navy Blue (Chelsea, PSG)
    '#8B0000',      # Dark Red (Liverpool, Arsenal)
    '#800080',      # Purple (genérico)
    '#FF8C00',      # Dark Orange (alternativo)
    '#20B2AA',      # Light Sea Green (alternativo)
    '#DC143C',      # Crimson (alternativo)
    
    # NUEVOS COLORES AÑADIDOS:
    '#A50044',      # Barcelona Grana (contraste perfecto)
    '#0066CC',      # Barcelona Azul / Everton / Leicester
    '#722F37',      # West Ham Granate / Burnley
    '#87CEEB',      # Sky Blue (Lazio, Coventry)
    '#6A5ACD',      # Slate Blue (genérico, contrasta bien)
    '#FF1493',      # Deep Pink (genérico, muy visible)
    '#00FF7F',      # Spring Green (contrasta perfecto)
    '#FFE4B5',      # Moccasin (crema, Tottenham away)
    '#B22222',      # Fire Brick (rojo más oscuro, Milan)
    '#48D1CC',      # Medium Turquoise (diferente al ya usado)
    '#FFA500',      # Orange (Holanda alternativo, Dynamo)
    '#9370DB',      # Medium Purple (genérico elegante)
    '#F0E68C',      # Khaki (amarillo suave, contrasta bien)
    '#FF69B4',      # Hot Pink (muy visible sobre fondo oscuro)
    '#00CED1'       # Dark Turquoise (otro turquesa distintivo)
]

SAVE_PATH_BASE = "images/"    # Ruta base (ej: "images/comparison")

# LOGOS DE EQUIPOS
team_logos = {
    # 'Barcelona': '../logos/LaLiga/FC Barcelona.png',
    # 'Real Madrid': '../logos/LaLiga/Real Madrid.png'
}

# TEXTO FOOTER PERSONALIZABLE PARA LA TABLA
TABLE_FOOTER_TEXT = ""  # Ej: "Stats & Percentiles vs La Liga 24/25 midfielders (800+ mins)"

In [None]:
# Conexión BD
db = get_db_manager()

# Query dinámico
table_suffix = TABLE_TYPE
league_list_str = "', '".join(LEAGUES)

# Construir columnas según tipo de tabla
if TABLE_TYPE == 'domestic':
    metrics_cols = "fbref_metrics, understat_metrics"
    league_col = "league"
else:
    metrics_cols = "fbref_metrics"
    league_col = "competition"

query_template = f"""
SELECT 
    unique_player_id,
    player_name,
    team,
    {league_col},
    season,
    position,
    nationality,
    age,
    {metrics_cols}
FROM footballdecoded.players_{table_suffix} p
WHERE p.{league_col} IN ('{league_list_str}')
AND p.season = '{SEASON}'
ORDER BY p.{league_col}, p.season DESC, p.team, p.player_name
"""

df_raw = pd.read_sql(query_template, db.engine)
db.close()

print(f"Extraídos: {df_raw.shape[0]} registros, {df_raw.shape[1]} columnas")
df_raw.head()

In [None]:
# Verificar posiciones disponibles
positions = df_raw['position'].value_counts().sort_index()
print(f"Posiciones: {sorted(df_raw['position'].unique())}")

# Filtrar por posición
if POSITION_FILTER:
    df_position = df_raw[df_raw['position'].str.startswith(POSITION_FILTER)].copy()
    print(f"Filtrado '{POSITION_FILTER}': {df_position.shape[0]} jugadores")
else:
    df_position = df_raw.copy()
    print(f"Sin filtro posición: {df_position.shape[0]} jugadores")

In [None]:
# Filtrar por minutos mínimos
df_filtered = df_position[
    df_position['fbref_metrics'].apply(
        lambda x: x.get('minutes_played', 0) if x else 0
    ) >= MIN_MINUTES
].copy()

print(f"Filtro {MIN_MINUTES}+ min: {df_filtered.shape[0]} jugadores")
df_filtered.head()

In [None]:
# Extraer métricas disponibles
fbref_metrics_all = set()
understat_metrics_all = set()

for _, row in df_filtered.iterrows():
    if row['fbref_metrics']:
        fbref_metrics_all.update(row['fbref_metrics'].keys())
    if TABLE_TYPE == 'domestic' and row['understat_metrics']:
        understat_metrics_all.update(row['understat_metrics'].keys())

print("MÉTRICAS FBREF DISPONIBLES:")
print("=" * 50)
fbref_sorted = sorted(list(fbref_metrics_all))
for i, metric in enumerate(fbref_sorted, 1):
    print(f"{i:2d}. {metric}")

print(f"\nTotal FBref: {len(fbref_sorted)}")

if TABLE_TYPE == 'domestic':
    print("\n\nMÉTRICAS UNDERSTAT DISPONIBLES:")
    print("=" * 50)
    understat_sorted = sorted(list(understat_metrics_all))
    for i, metric in enumerate(understat_sorted, 1):
        print(f"{i:2d}. {metric}")
    
    print(f"\nTotal Understat: {len(understat_sorted)}")

In [None]:
def extract_metrics(df, col_name):
    """Extrae métricas numéricas de diccionarios"""
    result = pd.DataFrame(index=df.index)
    
    # Obtener keys únicas
    all_keys = set()
    for _, row in df.iterrows():
        if isinstance(row[col_name], dict):
            all_keys.update(row[col_name].keys())
    
    # Extraer cada métrica
    for key in all_keys:
        values = []
        for _, row in df.iterrows():
            if isinstance(row[col_name], dict) and key in row[col_name]:
                raw_value = row[col_name][key]
                converted_value = _convert_to_float(raw_value)
                values.append(converted_value)
            else:
                values.append(np.nan)
        
        # Solo incluir métricas con 5+ valores válidos
        valid_count = pd.Series(values).notna().sum()
        if valid_count >= 5:
            result[key] = values
    
    return result

def _convert_to_float(value):
    """Convierte valor a float de manera robusta"""
    if isinstance(value, (int, float)):
        return float(value)
    
    if value is None or pd.isna(value):
        return np.nan
    
    if isinstance(value, str):
        if value.strip() == '':
            return np.nan
        
        if value.lower().strip() in ['nan', 'none', 'null', '-']:
            return np.nan
        
        try:
            return float(value)
        except (ValueError, TypeError):
            return np.nan
    
    return np.nan

# Extraer métricas numéricas
fbref_nums = extract_metrics(df_filtered, 'fbref_metrics')
if TABLE_TYPE == 'domestic':
    understat_nums = extract_metrics(df_filtered, 'understat_metrics')
    print(f"FBref: {fbref_nums.shape[1]} métricas | Understat: {understat_nums.shape[1]} métricas")
else:
    understat_nums = pd.DataFrame(index=df_filtered.index)
    print(f"FBref: {fbref_nums.shape[1]} métricas extraídas")

In [None]:
# Métricas a excluir de per90 (ya normalizadas o ratios)
exclude_per90 = {
    'pass_completion_pct', 'shots_on_target_pct', 'Take-Ons_Succ%', 'Take-Ons_Tkld%', 
    'Aerial Duels_Won%', 'Challenges_Tkl%', 'Save%', 'Launched_Cmp%', 'Crosses_Stp%',
    'shots_per_90', 'GA90', 'GCA_GCA90', 'SCA_SCA90', 'Team Success_+/-90', 'SoT/90',
    'npxG/Sh', 'xG+xAG', 'non_penalty_xG_plus_xAG', 'avg_shot_distance', 'minutes_per_match',
    'Passes_AvgLen', 'Goal Kicks_AvgLen', 'Starts_Mn/Start', 'Subs_Mn/Sub', 'Min%',
    'matches_played', 'matches_started', 'minutes_played', 'wins', 'draws', 'losses',
    'understat_buildup_involvement_pct', 'understat_player_id', 'understat_team_id'
}

# Calcular per90 FBref
fbref_per90 = fbref_nums.loc[:, ~fbref_nums.columns.isin(exclude_per90)]
fbref_per90 = (fbref_per90.div(fbref_nums['minutes_played'], axis=0) * 90).round(3)
fbref_per90.columns = [f'{col}_per90' for col in fbref_per90.columns]

# Calcular per90 Understat
if TABLE_TYPE == 'domestic':
    understat_per90 = understat_nums.loc[:, ~understat_nums.columns.isin(exclude_per90)]
    understat_per90 = (understat_per90.div(fbref_nums['minutes_played'], axis=0) * 90).round(3)
    understat_per90.columns = [f'{col}_per90' for col in understat_per90.columns]
    print(f"Per90: {fbref_per90.shape[1]} + {understat_per90.shape[1]}")
else:
    understat_per90 = pd.DataFrame(index=df_filtered.index)
    print(f"Per90: {fbref_per90.shape[1]}")

# COMBINAR todas las métricas (originales + per90)
all_metrics = pd.concat([fbref_nums, understat_nums, fbref_per90, understat_per90], axis=1)

In [None]:
# Calcular percentiles de TODAS las métricas juntas
all_percentiles = all_metrics.rank(pct=True) * 98 + 1
all_percentiles = all_percentiles.round(0).astype('Int64')
all_percentiles.columns = [f'{col}_pct' for col in all_percentiles.columns]

print(f"Métricas totales: {all_metrics.shape[1]}")
print(f"Percentiles totales: {all_percentiles.shape[1]}")

In [None]:
# DataFrame final consolidado
base_cols = ['unique_player_id', 'player_name', 'team', 'league', 'season', 'position']
if TABLE_TYPE == 'european':
    base_cols = ['unique_player_id', 'player_name', 'team', 'competition', 'season', 'position']

df_final = pd.concat([
    df_filtered[base_cols],
    all_metrics,       # Todas las métricas (originales + per90)
    all_percentiles    # Todos los percentiles
], axis=1)

print(f"DataFrame final: {df_final.shape[0]} filas, {df_final.shape[1]} columnas")

In [None]:
# Limpieza de Series anidados
def clean_series_values(df):
    """Limpia Series anidados en DataFrame"""
    for col in df.columns:
        series_mask = df[col].apply(lambda x: isinstance(x, pd.Series))
        
        if series_mask.any():
            print(f"Limpiando Series en: {col}")
            df[col] = df[col].apply(
                lambda x: x.iloc[0] if isinstance(x, pd.Series) and not x.empty else x
            )
    return df

df_final = clean_series_values(df_final)

# Eliminar columnas problemáticas conocidas
problem_cols = ['shots_on_target_pct']
df_final = df_final.drop(columns=[col for col in problem_cols if col in df_final.columns])

print(f"Limpieza completada: {df_final.shape[1]} columnas finales")

In [None]:
sample_player = df_final.iloc[0]
print("TODAS LAS COLUMNAS:")
for col in df_final.columns:
    print(f"{col}: {sample_player[col]} (tipo: {type(sample_player[col])})")

In [None]:
# Buscar jugadores configurados
if PLAYER_1_SEARCH:
    search1 = df_filtered[df_filtered['player_name'].str.contains(PLAYER_1_SEARCH, case=False, na=False)]
    print(f"Búsqueda '{PLAYER_1_SEARCH}':")
    for _, player in search1.iterrows():
        print(f"  {player['unique_player_id']} - {player['player_name']} ({player['team']}, {player['league' if TABLE_TYPE == 'domestic' else 'competition']})")

if PLAYER_2_SEARCH:
    search2 = df_filtered[df_filtered['player_name'].str.contains(PLAYER_2_SEARCH, case=False, na=False)]
    print(f"\nBúsqueda '{PLAYER_2_SEARCH}':")
    for _, player in search2.iterrows():
        print(f"  {player['unique_player_id']} - {player['player_name']} ({player['team']}, {player['league' if TABLE_TYPE == 'domestic' else 'competition']})")

In [None]:
PLAYER_1_ID = ""  # Copiar unique_player_id de búsqueda anterior
PLAYER_2_ID = ""  # Copiar unique_player_id de búsqueda anterior (opcional)

In [None]:
# MÉTRICAS DEL RADAR - CONFIGURAR AQUÍ (exactamente 10 métricas)
radar_metrics = [
    # Ejemplo para mediocampistas:
    #'goals_per90', 'assists_per90',
    #'expected_assists_per90', 'understat_npxg_plus_xa_per90', 'progressive_passes_per90',
    #'key_passes_per90', 'passes_final_third_per90', 'pass_completion_pct',
    #'tackles_won_per90', 'interceptions_per90'
]

radar_titles = [
    # Ejemplo para mediocampistas:
    #'Goals\nper 90', 'Assists\nper 90',
    #'Expected\nAssists per 90', 'npxG + xA\nper 90', 'Progressive\nPasses per 90',
    #'Key Passes\nper 90', 'Passes Final\nThird per 90', 'Pass Accuracy\n%',
    #'Tackles Won\nper 90', 'Interceptions\nper 90'
]

# Verificar que hay exactamente 10 métricas
if len(radar_metrics) != 10 or len(radar_titles) != 10:
    print(f"ERROR: Se requieren exactamente 10 métricas y títulos")
    print(f"Actual: {len(radar_metrics)} métricas, {len(radar_titles)} títulos")
else:
    print(f"Métricas radar configuradas: {len(radar_metrics)} métricas")

In [None]:
from viz.swarm_radar import create_player_radar
from viz.stats_radar import create_stats_table, combine_radar_and_table
from IPython.display import Image as IPImage, display

if len(radar_metrics) == 10 and PLAYER_1_ID:
    
    # Gráfico 1: Swarm Radar (comparación)
    if PLAYER_2_ID:
        create_player_radar(
            df_data=df_final,
            player_1_id=PLAYER_1_ID,
            player_2_id=PLAYER_2_ID,
            metrics=radar_metrics,
            metric_titles=radar_titles,
            save_path=f"{SAVE_PATH_BASE}_swarm.png",
            team_colors=TEAM_COLORS,
            use_swarm=True,
            show_plot=False
        )
        
        create_stats_table(
            df_data=df_final,
            player_1_id=PLAYER_1_ID,
            player_2_id=PLAYER_2_ID,
            metrics=radar_metrics,
            metric_titles=radar_titles,
            team_colors=TEAM_COLORS,
            save_path=f"{SAVE_PATH_BASE}_swarm_table.png",
            team_logos=team_logos,
            footer_text=TABLE_FOOTER_TEXT,
            show_plot=False
        )
        
        combine_radar_and_table(
            radar_path=f"{SAVE_PATH_BASE}_swarm.png",
            table_path=f"{SAVE_PATH_BASE}_swarm_table.png",
            output_path=f"{SAVE_PATH_BASE}_swarm_combined.png"
        )
        display(IPImage(f"{SAVE_PATH_BASE}_swarm_combined.png"))
    
    # Gráfico 2: Radar individual (jugador 1)
    create_player_radar(
        df_data=df_final,
        player_1_id=PLAYER_1_ID,
        metrics=radar_metrics,
        metric_titles=radar_titles,
        save_path=f"{SAVE_PATH_BASE}_individual.png",
        team_colors=TEAM_COLORS,
        use_swarm=False,
        show_plot=False
    )
    
    create_stats_table(
        df_data=df_final,
        player_1_id=PLAYER_1_ID,
        metrics=radar_metrics,
        metric_titles=radar_titles,
        team_colors=TEAM_COLORS,
        save_path=f"{SAVE_PATH_BASE}_individual_table.png",
        team_logos=team_logos,
        footer_text=TABLE_FOOTER_TEXT,
        show_plot=False
    )
    
    combine_radar_and_table(
        radar_path=f"{SAVE_PATH_BASE}_individual.png",
        table_path=f"{SAVE_PATH_BASE}_individual_table.png",
        output_path=f"{SAVE_PATH_BASE}_individual_combined.png"
    )
    display(IPImage(f"{SAVE_PATH_BASE}_individual_combined.png"))
    
    # Gráfico 3: Radar tradicional (comparación)
    if PLAYER_2_ID:
        create_player_radar(
            df_data=df_final,
            player_1_id=PLAYER_1_ID,
            player_2_id=PLAYER_2_ID,
            metrics=radar_metrics,
            metric_titles=radar_titles,
            save_path=f"{SAVE_PATH_BASE}_traditional.png",
            team_colors=TEAM_COLORS,
            use_swarm=False,
            show_plot=False
        )
        
        create_stats_table(
            df_data=df_final,
            player_1_id=PLAYER_1_ID,
            player_2_id=PLAYER_2_ID,
            metrics=radar_metrics,
            metric_titles=radar_titles,
            team_colors=TEAM_COLORS,
            save_path=f"{SAVE_PATH_BASE}_traditional_table.png",
            team_logos=team_logos,
            footer_text=TABLE_FOOTER_TEXT,
            show_plot=False
        )
        
        combine_radar_and_table(
            radar_path=f"{SAVE_PATH_BASE}_traditional.png",
            table_path=f"{SAVE_PATH_BASE}_traditional_table.png",
            output_path=f"{SAVE_PATH_BASE}_traditional_combined.png"
        )
        display(IPImage(f"{SAVE_PATH_BASE}_traditional_combined.png"))
else:
    print("No se pueden generar gráficos: verificar métricas y IDs de jugadores")