# Pervis Estupiñán vs Johan Mojica - Radar
## Villarreal

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
from sqlalchemy import text
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..', '..'))
sys.path.insert(0, project_root)

from database.connection import get_db_manager

In [None]:
MIN_MINUTES = 800
SEASON = '2122'

COLORS = ['#FFE500', '#006B3D']

PLAYERS = [
    {'name': 'Pervis Estupiñán', 'search': 'Estupiñán', 'team': 'Villarreal'},
    {'name': 'Johan Mojica', 'search': 'Mojica', 'team': 'Elche'}
]

SAVE_PATH = './estupinan_mojica_radars/'
os.makedirs(SAVE_PATH, exist_ok=True)

In [None]:
db = get_db_manager()
big5_leagues = ['ENG-Premier League', 'ESP-La Liga', 'ITA-Serie A', 'GER-Bundesliga', 'FRA-Ligue 1']
league_list_str = "', '".join(big5_leagues)

query = text(f"SELECT unique_player_id, player_name, team, league, season, position, nationality, age, fbref_metrics, understat_metrics FROM footballdecoded.players_domestic p WHERE p.league IN ('{league_list_str}') AND p.season = '2122' ORDER BY p.league, p.season DESC, p.team, p.player_name")
df_raw = pd.read_sql(query, db.engine)
db.close()
print(f"Extraídos: {df_raw.shape[0]} registros")

In [None]:
df_filtered = df_raw[
    (df_raw['fbref_metrics'].apply(lambda x: x.get('minutes_played', 0) if x else 0) >= MIN_MINUTES) &
    (df_raw['position'].str.contains('DF', case=False, na=False))
].copy()
print(f"Filtrado: {df_filtered.shape[0]} jugadores")

In [None]:
def extract_metrics(df, col_name):
    result = pd.DataFrame(index=df.index)
    all_keys = set()
    for _, row in df.iterrows():
        if isinstance(row[col_name], dict):
            all_keys.update(row[col_name].keys())
    for key in all_keys:
        values = []
        for _, row in df.iterrows():
            if isinstance(row[col_name], dict) and key in row[col_name]:
                raw_value = row[col_name][key]
                converted_value = _convert_to_float(raw_value)
                values.append(converted_value)
            else:
                values.append(np.nan)
        valid_count = pd.Series(values).notna().sum()
        if valid_count >= 5:
            result[key] = values
    return result

def _convert_to_float(value):
    if isinstance(value, (int, float)):
        return float(value)
    if value is None or pd.isna(value):
        return np.nan
    if isinstance(value, str):
        if value.strip() == '' or value.lower().strip() in ['nan', 'none', 'null', '-']:
            return np.nan
        try:
            return float(value)
        except (ValueError, TypeError):
            return np.nan
    return np.nan

fbref_nums = extract_metrics(df_filtered, 'fbref_metrics')
understat_nums = extract_metrics(df_filtered, 'understat_metrics')
print(f"FBref: {fbref_nums.shape[1]} métricas")

In [None]:
exclude_normalization = {'minutes_played', 'age', 'birth_year', 'games_started', 'minutes_per_game', 'minutes_per_start', 'games', 'games_subs', 'unused_sub', 'points_per_game', 'on_goals_for', 'on_goals_against', 'plus_minus', 'plus_minus_per90', 'plus_minus_wowy', 'on_xg_for', 'on_xg_against', 'xg_plus_minus', 'xg_plus_minus_per90', 'xg_plus_minus_wowy', 'pass_completion_pct', 'shots_on_target_pct', 'Take-Ons_Succ%', 'Take-Ons_Tkld%', 'Aerial Duels_Won%', 'Challenges_Tkl%', 'Save%', 'Launched_Cmp%', 'Crosses_Stp%', 'shots_per_90', 'GA90', 'GCA_GCA90', 'SCA_SCA90', 'Team Success_+/-90', 'SoT/90', 'npxG/Sh', 'xG+xAG', 'non_penalty_xG_plus_xAG', 'avg_shot_distance', 'minutes_per_match', 'Passes_AvgLen', 'Goal Kicks_AvgLen', 'Starts_Mn/Start', 'Subs_Mn/Sub', 'Min%', 'matches_played', 'matches_started', 'wins', 'draws', 'losses', 'understat_buildup_involvement_pct', 'understat_player_id', 'understat_team_id', 'CS%', 'CS', 'PSxG+/-', 'PSxG', 'Save_Save%', 'Sweeper_#OPA', 'Sweeper_#OPA/90', 'Sweeper_AvgDist', 'Goal Kicks_Launch%', 'G-xG'}

fbref_per90 = fbref_nums.loc[:, ~fbref_nums.columns.isin(exclude_normalization)]
fbref_per90 = (fbref_per90.div(fbref_nums['minutes_played'], axis=0) * 90).round(3)
fbref_per90.columns = [f'{col}_per90' for col in fbref_per90.columns]

understat_per90 = understat_nums.loc[:, ~understat_nums.columns.isin(exclude_normalization)]
understat_per90 = (understat_per90.div(fbref_nums['minutes_played'], axis=0) * 90).round(3)
understat_per90.columns = [f'{col}_per90' for col in understat_per90.columns]

all_metrics = pd.concat([fbref_nums, understat_nums, fbref_per90, understat_per90], axis=1)
all_percentiles = all_metrics.rank(pct=True) * 98 + 1
all_percentiles = all_percentiles.round(0).astype('Int64')
all_percentiles.columns = [f'{col}_pct' for col in all_percentiles.columns]
print(f"Métricas: {all_metrics.shape[1]}")

In [None]:
base_cols = ['unique_player_id', 'player_name', 'team', 'league', 'season', 'position']
df_final = pd.concat([df_filtered[base_cols], all_metrics, all_percentiles], axis=1)

def clean_series_values(df):
    for col in df.columns:
        series_mask = df[col].apply(lambda x: isinstance(x, pd.Series))
        if series_mask.any():
            df[col] = df[col].apply(lambda x: x.iloc[0] if isinstance(x, pd.Series) and not x.empty else x)
    return df

df_final = clean_series_values(df_final)
df_final = df_final.drop(columns=['shots_on_target_pct'], errors='ignore')
print(f"DataFrame: {df_final.shape[0]} × {df_final.shape[1]}")

In [None]:
player_ids = {}
for p in PLAYERS:
    results = df_filtered[df_filtered['player_name'].str.contains(p['search'], case=False, na=False)]
    if len(results) > 0:
        player_ids[p['name']] = results.iloc[0]['unique_player_id']
        print(f"✓ {p['name']}: {results.iloc[0]['player_name']}")
print(f"\nJugadores: {len(player_ids)}")

In [None]:
metrics = ['pass_completion_pct', 'progressive_passes_per90', 'Carries_PrgC_per90', 'Crs_per90', 'passes_penalty_area_per90', 'expected_assists_per90', 'Touches_Att 3rd_per90', 'Tkl+Int_per90', 'Challenges_Tkl%', 'Take-Ons_Succ_per90']
titles = ['Pass Accuracy %', 'Progressive Passes', 'Progressive Carries', 'Crosses', 'Box Passes', 'Expected Assists', 'Touches Final Third', 'Tackles + Interceptions', 'Tackle Success %', 'Dribbles']
METRICS_CONFIG = {'metrics': metrics, 'titles': titles}

In [None]:
from viz.swarm_radar import create_player_radar
from viz.stats_radar import create_stats_table, combine_radar_and_table
from IPython.display import Image, display

player1_id = player_ids['Pervis Estupiñán']
player2_id = player_ids['Johan Mojica']

position_df = df_final[df_final['position'].str.contains('DF', case=False, na=False)].copy()

team1 = df_final.loc[df_final['unique_player_id'] == player1_id, 'team'].iloc[0]
team2 = df_final.loc[df_final['unique_player_id'] == player2_id, 'team'].iloc[0]

try:
    create_player_radar(
        df_data=position_df,
        player_1_id=player1_id,
        player_2_id=player2_id,
        metrics=metrics,
        metric_titles=titles,
        team_colors=COLORS,
        save_path=f'{SAVE_PATH}estupinan_mojica_radar.png',
        use_swarm=False,
        show_plot=False
    )
    
    create_stats_table(
        df_data=position_df,
        player_1_id=player1_id,
        player_2_id=player2_id,
        metrics=metrics,
        metric_titles=titles,
        team_colors=COLORS,
        team_logos={},
        save_path=f'{SAVE_PATH}estupinan_mojica_table.png',
        footer_text=f'Stats (per 90 mins) & Percentiles\\nvs Top-5 DF ({MIN_MINUTES}+ mins) 2122',
        show_plot=False
    )
    
    combine_radar_and_table(
        radar_path=f'{SAVE_PATH}estupinan_mojica_radar.png',
        table_path=f'{SAVE_PATH}estupinan_mojica_table.png',
        output_path=f'{SAVE_PATH}estupinan_mojica_combined.png'
    )
    
    print(f'✓ Completado: estupinan_mojica_combined.png')
    display(Image(f'{SAVE_PATH}estupinan_mojica_combined.png'))
except Exception as e:
    print(f'✗ Error: {e}')
    import traceback
    traceback.print_exc()