# üîç Diagn√≥stico Completo: Bias Correction de RSDS

## üéØ Objetivo
Verificar si el **Quantile Mapping** se aplic√≥ correctamente a los datos de radiaci√≥n solar (RSDS) de CMIP6.

## üìã Plan de an√°lisis:
1. **Cargar observaciones GHI** (2004-2014) ‚Üí convertir a mensual
2. **Cargar CMIP6 historical original** (2004-2014)
3. **Cargar CMIP6 historical corregido** (bias-corrected)
4. **Comparar estad√≠sticas**: ¬øSon iguales o diferentes?
5. **Diagn√≥stico espacial**: Mapas de diferencias
6. **Conclusi√≥n**: ¬øFuncion√≥ el bias correction?

In [None]:
# ============================================================
# üì¶ IMPORTS
# ============================================================

import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

plt.rcParams['figure.figsize'] = (15, 8)

print("‚úÖ Librer√≠as cargadas")

## 1Ô∏è‚É£ Cargar Observaciones (GHI)

In [None]:
# ============================================================
# CARGAR OBSERVACIONES GHI (REFERENCIA)
# ============================================================

print("üìä Cargando observaciones GHI...")

obs_path = "/home/aninotna/magister/tesis/justh2_pipeline/data/solar/solar_diario_grilla.zarr"
obs_ds = xr.open_zarr(obs_path)

# GHI diario
ghi_daily = obs_ds["ghi"]

# Renombrar date ‚Üí time
if "date" in ghi_daily.dims:
    ghi_daily = ghi_daily.rename({"date": "time"})

# Convertir a mensual (promedio mensual de W/m¬≤)
obs_monthly = ghi_daily.resample(time="MS").mean("time", skipna=True)

# Per√≠odo de calibraci√≥n (2004-2014)
obs_calib = obs_monthly.sel(time=slice("2004-01-01", "2014-12-31"))

print(f"‚úÖ Observaciones cargadas:")
print(f"   ‚Ä¢ Per√≠odo: {obs_calib.time.min().values} a {obs_calib.time.max().values}")
print(f"   ‚Ä¢ Shape: {obs_calib.shape}")
print(f"   ‚Ä¢ Grilla: lat {len(obs_calib.lat)} x lon {len(obs_calib.lon)}")
print(f"   ‚Ä¢ Media global: {float(obs_calib.mean()):.2f} W/m¬≤")
print(f"   ‚Ä¢ % NaN: {100*float(obs_calib.isnull().sum()/obs_calib.size):.1f}%")

## 2Ô∏è‚É£ Cargar CMIP6 Historical ORIGINAL

In [None]:
# ============================================================
# CARGAR CMIP6 HISTORICAL ORIGINAL (SIN CORRECCI√ìN)
# ============================================================

print("\nüìä Cargando CMIP6 Historical ORIGINAL (ACCESS-CM2)...")

hist_pattern = "/home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/rsds/historical/rsds_Amon_access_cm2_historical*.nc"
hist_files = sorted(glob.glob(hist_pattern))

if not hist_files:
    print(f"‚ùå No se encontraron archivos: {hist_pattern}")
else:
    print(f"   Archivos encontrados: {len(hist_files)}")
    for f in hist_files:
        print(f"     - {Path(f).name}")
    
    # Abrir con open_mfdataset
    hist_orig_ds = xr.open_mfdataset(hist_files, combine="by_coords")
    hist_orig = hist_orig_ds["rsds"]
    
    # Ya es mensual (Amon)
    print(f"   Frecuencia: Mensual (Amon)")
    
    # Per√≠odo de calibraci√≥n (2004-2014)
    hist_orig_calib = hist_orig.sel(time=slice("2004-01-01", "2014-12-31"))
    
    print(f"\n‚úÖ Historical ORIGINAL cargado:")
    print(f"   ‚Ä¢ Per√≠odo: {hist_orig_calib.time.min().values} a {hist_orig_calib.time.max().values}")
    print(f"   ‚Ä¢ Shape: {hist_orig_calib.shape}")
    print(f"   ‚Ä¢ Grilla: lat {len(hist_orig_calib.lat)} x lon {len(hist_orig_calib.lon)}")
    print(f"   ‚Ä¢ Media global: {float(hist_orig_calib.mean()):.2f} W/m¬≤")
    print(f"   ‚Ä¢ % NaN: {100*float(hist_orig_calib.isnull().sum()/hist_orig_calib.size):.1f}%")

## 3Ô∏è‚É£ Cargar CMIP6 Historical CORREGIDO

In [None]:
# ============================================================
# CARGAR CMIP6 HISTORICAL BIAS-CORRECTED
# ============================================================

print("\nüìä Buscando CMIP6 Historical BIAS-CORRECTED...")

# Buscar archivos corregidos (pueden estar en diferentes ubicaciones)
bc_paths = [
    "/home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/rsds/bias_corrected_qm/rsds_qm_access_historical.nc",
    "/home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/rsds/bias_corrected_qm/rsds_qm_access_cm2_historical.nc",
]

hist_bc = None
for bc_path in bc_paths:
    if Path(bc_path).exists():
        print(f"‚úÖ Encontrado: {bc_path}")
        hist_bc_ds = xr.open_dataset(bc_path)
        hist_bc = hist_bc_ds["rsds"]
        break

if hist_bc is None:
    print("‚ùå No se encontr√≥ archivo bias-corrected")
    print("   Buscando en directorios alternativos...")
    # Buscar con glob
    bc_pattern = "/home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/rsds/bias_corrected*/*access*historical*.nc"
    bc_files = glob.glob(bc_pattern)
    if bc_files:
        print(f"   Encontrado: {bc_files[0]}")
        hist_bc_ds = xr.open_dataset(bc_files[0])
        hist_bc = hist_bc_ds["rsds"]
    else:
        print("   ‚ùå No se encontraron archivos bias-corrected")

if hist_bc is not None:
    # Per√≠odo de calibraci√≥n
    hist_bc_calib = hist_bc.sel(time=slice("2004-01-01", "2014-12-31"))
    
    print(f"\n‚úÖ Historical BIAS-CORRECTED cargado:")
    print(f"   ‚Ä¢ Per√≠odo: {hist_bc_calib.time.min().values} a {hist_bc_calib.time.max().values}")
    print(f"   ‚Ä¢ Shape: {hist_bc_calib.shape}")
    print(f"   ‚Ä¢ Grilla: lat {len(hist_bc_calib.lat)} x lon {len(hist_bc_calib.lon)}")
    print(f"   ‚Ä¢ Media global: {float(hist_bc_calib.mean()):.2f} W/m¬≤")
    print(f"   ‚Ä¢ % NaN: {100*float(hist_bc_calib.isnull().sum()/hist_bc_calib.size):.1f}%")

## 4Ô∏è‚É£ DIAGN√ìSTICO CR√çTICO: ¬øSon diferentes?

In [None]:
# ============================================================
# DIAGN√ìSTICO: COMPARACI√ìN NUM√âRICA DIRECTA
# ============================================================

print("\n" + "="*70)
print("üîç DIAGN√ìSTICO CR√çTICO: ¬øEL BIAS CORRECTION SE APLIC√ì?")
print("="*70)

if hist_bc is not None:
    
    # 1. VERIFICAR COORDENADAS
    print("\n1Ô∏è‚É£ VERIFICACI√ìN DE COORDENADAS:")
    print(f"   Original: lat {hist_orig_calib.lat.shape}, lon {hist_orig_calib.lon.shape}, time {hist_orig_calib.time.shape}")
    print(f"   Corregido: lat {hist_bc_calib.lat.shape}, lon {hist_bc_calib.lon.shape}, time {hist_bc_calib.time.shape}")
    
    # Verificar si las dimensiones coinciden
    same_shape = hist_orig_calib.shape == hist_bc_calib.shape
    print(f"   ¬øMismo shape? {'‚úÖ S√ç' if same_shape else '‚ùå NO'}")
    
    if not same_shape:
        print("\n   ‚ö†Ô∏è PROBLEMA: Las dimensiones no coinciden")
        print("   Necesitamos alinear las grillas antes de comparar...")
    else:
        # 2. COMPARACI√ìN DE VALORES
        print("\n2Ô∏è‚É£ COMPARACI√ìN DE VALORES:")
        
        # Calcular diferencia
        diff = hist_bc_calib - hist_orig_calib
        
        # Estad√≠sticos de la diferencia
        diff_mean = float(diff.mean())
        diff_std = float(diff.std())
        diff_max = float(diff.max())
        diff_min = float(diff.min())
        diff_abs_mean = float(abs(diff).mean())
        
        print(f"   ‚Ä¢ Diferencia media: {diff_mean:+.4f} W/m¬≤")
        print(f"   ‚Ä¢ Diferencia std: {diff_std:.4f} W/m¬≤")
        print(f"   ‚Ä¢ Diferencia m√°x: {diff_max:+.4f} W/m¬≤")
        print(f"   ‚Ä¢ Diferencia m√≠n: {diff_min:+.4f} W/m¬≤")
        print(f"   ‚Ä¢ Diferencia abs media: {diff_abs_mean:.4f} W/m¬≤")
        
        # Contar celdas con diferencias significativas
        n_diff_1 = float((abs(diff) > 1).sum())
        n_diff_5 = float((abs(diff) > 5).sum())
        n_diff_10 = float((abs(diff) > 10).sum())
        total = diff.size
        
        print(f"\n   ‚Ä¢ Celdas con |diff| > 1 W/m¬≤: {n_diff_1}/{total} ({100*n_diff_1/total:.1f}%)")
        print(f"   ‚Ä¢ Celdas con |diff| > 5 W/m¬≤: {n_diff_5}/{total} ({100*n_diff_5/total:.1f}%)")
        print(f"   ‚Ä¢ Celdas con |diff| > 10 W/m¬≤: {n_diff_10}/{total} ({100*n_diff_10/total:.1f}%)")
        
        # 3. TEST DE IGUALDAD NUM√âRICA
        print("\n3Ô∏è‚É£ TEST DE IGUALDAD NUM√âRICA:")
        are_identical = np.allclose(hist_bc_calib.values, hist_orig_calib.values, 
                                   rtol=1e-5, atol=1e-8, equal_nan=True)
        
        if are_identical:
            print("   ‚ùå ¬°PROBLEMA DETECTADO!")
            print("   Los datos corregidos y originales son NUM√âRICAMENTE ID√âNTICOS")
            print("   ‚Üí El bias correction NO se aplic√≥ correctamente")
        else:
            print("   ‚úÖ Los datos SON DIFERENTES")
            
            # 4. CONCLUSI√ìN
            print("\n4Ô∏è‚É£ CONCLUSI√ìN:")
            if diff_abs_mean < 0.01:
                print("   ‚ùå CORRECCI√ìN INSIGNIFICANTE")
                print(f"   Diferencia abs media: {diff_abs_mean:.6f} W/m¬≤ (pr√°cticamente cero)")
                print("   ‚Üí El bias correction existe pero es despreciable")
            elif diff_abs_mean < 1.0:
                print("   ‚ö†Ô∏è CORRECCI√ìN MENOR")
                print(f"   Diferencia abs media: {diff_abs_mean:.2f} W/m¬≤")
                print("   ‚Üí Correcci√≥n aplicada pero muy sutil")
            else:
                print("   ‚úÖ CORRECCI√ìN SIGNIFICATIVA")
                print(f"   Diferencia abs media: {diff_abs_mean:.2f} W/m¬≤")
                print("   ‚Üí El bias correction se aplic√≥ exitosamente")
else:
    print("\n‚ö†Ô∏è No se puede hacer diagn√≥stico: falta archivo bias-corrected")

## 5Ô∏è‚É£ Visualizaci√≥n: Series Temporales

In [None]:
# ============================================================
# GR√ÅFICO: SERIES TEMPORALES AGREGADAS ESPACIALMENTE
# ============================================================

if hist_bc is not None and same_shape:
    print("\nüìà Generando gr√°fico de series temporales...")
    
    fig, ax = plt.subplots(figsize=(15, 6))
    
    # Promedios espaciales
    obs_ts = obs_calib.mean(['lat', 'lon'])
    orig_ts = hist_orig_calib.mean(['lat', 'lon'])
    bc_ts = hist_bc_calib.mean(['lat', 'lon'])
    
    # Plot
    obs_ts.plot(ax=ax, label='Observaciones (GHI)', color='black', linewidth=2, marker='o', markersize=3)
    orig_ts.plot(ax=ax, label='CMIP6 Original', color='red', linewidth=1.5, alpha=0.7)
    bc_ts.plot(ax=ax, label='CMIP6 Bias-Corrected', color='blue', linewidth=1.5)
    
    ax.set_title('Comparaci√≥n: Observaciones vs CMIP6 (Original vs Corregido)\nPer√≠odo 2004-2014 - Promedio Espacial', 
                fontsize=14, fontweight='bold')
    ax.set_ylabel('RSDS (W/m¬≤)', fontsize=12)
    ax.set_xlabel('Tiempo', fontsize=12)
    ax.legend(fontsize=11)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("‚úÖ Gr√°fico generado")
else:
    print("‚ö†Ô∏è No se puede generar gr√°fico: datos faltantes o dimensiones incompatibles")

## 6Ô∏è‚É£ Comparaci√≥n Estad√≠stica Detallada

In [None]:
# ============================================================
# TABLA COMPARATIVA DE ESTAD√çSTICOS
# ============================================================

if hist_bc is not None and same_shape:
    print("\nüìä TABLA COMPARATIVA DE ESTAD√çSTICOS (2004-2014):")
    print("="*70)
    
    # Calcular estad√≠sticos
    stats = pd.DataFrame({
        'Dataset': ['Observaciones (GHI)', 'CMIP6 Original', 'CMIP6 Bias-Corrected'],
        'Media': [
            float(obs_calib.mean()),
            float(hist_orig_calib.mean()),
            float(hist_bc_calib.mean())
        ],
        'Std': [
            float(obs_calib.std()),
            float(hist_orig_calib.std()),
            float(hist_bc_calib.std())
        ],
        'P25': [
            float(obs_calib.quantile(0.25)),
            float(hist_orig_calib.quantile(0.25)),
            float(hist_bc_calib.quantile(0.25))
        ],
        'P50': [
            float(obs_calib.quantile(0.50)),
            float(hist_orig_calib.quantile(0.50)),
            float(hist_bc_calib.quantile(0.50))
        ],
        'P75': [
            float(obs_calib.quantile(0.75)),
            float(hist_orig_calib.quantile(0.75)),
            float(hist_bc_calib.quantile(0.75))
        ],
        'P95': [
            float(obs_calib.quantile(0.95)),
            float(hist_orig_calib.quantile(0.95)),
            float(hist_bc_calib.quantile(0.95))
        ]
    })
    
    print(stats.round(2).to_string(index=False))
    
    # Calcular sesgo (bias) vs observaciones
    print("\nüìä SESGO (BIAS) vs OBSERVACIONES:")
    print("="*70)
    
    obs_mean = float(obs_calib.mean())
    orig_bias = float(hist_orig_calib.mean()) - obs_mean
    bc_bias = float(hist_bc_calib.mean()) - obs_mean
    
    print(f"   Original: {orig_bias:+.2f} W/m¬≤ ({100*orig_bias/obs_mean:+.1f}%)")
    print(f"   Corregido: {bc_bias:+.2f} W/m¬≤ ({100*bc_bias/obs_mean:+.1f}%)")
    print(f"   Mejora: {abs(orig_bias) - abs(bc_bias):.2f} W/m¬≤")
    
    if abs(bc_bias) < abs(orig_bias):
        reduction = 100 * (1 - abs(bc_bias)/abs(orig_bias))
        print(f"   ‚úÖ Reducci√≥n de sesgo: {reduction:.1f}%")
    else:
        print(f"   ‚ùå El sesgo NO se redujo (empeor√≥)")
else:
    print("‚ö†Ô∏è No se puede generar tabla: datos faltantes")

## 7Ô∏è‚É£ Visualizaci√≥n Espacial

In [None]:
# ============================================================
# MAPAS ESPACIALES: MEDIAS TEMPORALES Y DIFERENCIAS
# ============================================================

if hist_bc is not None and same_shape:
    print("\nüó∫Ô∏è Generando mapas espaciales...")
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Calcular medias temporales
    obs_mean = obs_calib.mean('time')
    orig_mean = hist_orig_calib.mean('time')
    bc_mean = hist_bc_calib.mean('time')
    
    # Diferencias
    diff_orig = orig_mean - obs_mean
    diff_bc = bc_mean - obs_mean
    
    # Mapa 1: Observaciones
    ax1 = axes[0, 0]
    obs_mean.plot(ax=ax1, cmap='YlOrRd', vmin=150, vmax=300, cbar_kwargs={'label': 'W/m¬≤'})
    ax1.set_title('Observaciones (GHI) - Media 2004-2014', fontweight='bold')
    ax1.set_xlabel('Longitud')
    ax1.set_ylabel('Latitud')
    
    # Mapa 2: CMIP6 Original
    ax2 = axes[0, 1]
    orig_mean.plot(ax=ax2, cmap='YlOrRd', vmin=150, vmax=300, cbar_kwargs={'label': 'W/m¬≤'})
    ax2.set_title('CMIP6 Original - Media 2004-2014', fontweight='bold')
    ax2.set_xlabel('Longitud')
    ax2.set_ylabel('Latitud')
    
    # Mapa 3: Sesgo Original
    ax3 = axes[1, 0]
    diff_orig.plot(ax=ax3, cmap='RdBu_r', vmin=-50, vmax=50, cbar_kwargs={'label': 'W/m¬≤'})
    ax3.set_title(f'Sesgo Original (CMIP6 - OBS)\nMedia: {float(diff_orig.mean()):+.2f} W/m¬≤', fontweight='bold')
    ax3.set_xlabel('Longitud')
    ax3.set_ylabel('Latitud')
    
    # Mapa 4: Sesgo Corregido
    ax4 = axes[1, 1]
    diff_bc.plot(ax=ax4, cmap='RdBu_r', vmin=-50, vmax=50, cbar_kwargs={'label': 'W/m¬≤'})
    ax4.set_title(f'Sesgo Corregido (BC - OBS)\nMedia: {float(diff_bc.mean()):+.2f} W/m¬≤', fontweight='bold')
    ax4.set_xlabel('Longitud')
    ax4.set_ylabel('Latitud')
    
    plt.suptitle('Comparaci√≥n Espacial: Observaciones vs CMIP6 (Original vs Bias-Corrected)', 
                fontsize=16, fontweight='bold', y=0.995)
    plt.tight_layout()
    plt.show()
    
    print("‚úÖ Mapas generados")
else:
    print("‚ö†Ô∏è No se pueden generar mapas: datos faltantes")

## üìù RESUMEN Y CONCLUSIONES

In [None]:
# ============================================================
# RESUMEN FINAL Y RECOMENDACIONES
# ============================================================

print("\n" + "="*70)
print("üìù RESUMEN FINAL DEL DIAGN√ìSTICO")
print("="*70)

if hist_bc is not None and same_shape:
    
    # Recalcular m√©tricas clave
    diff_abs_mean = float(abs(hist_bc_calib - hist_orig_calib).mean())
    obs_mean = float(obs_calib.mean())
    orig_bias = abs(float(hist_orig_calib.mean()) - obs_mean)
    bc_bias = abs(float(hist_bc_calib.mean()) - obs_mean)
    
    print(f"\nüìä M√©tricas clave:")
    print(f"   ‚Ä¢ Diferencia abs media (BC vs Original): {diff_abs_mean:.4f} W/m¬≤")
    print(f"   ‚Ä¢ Sesgo original: {orig_bias:.2f} W/m¬≤")
    print(f"   ‚Ä¢ Sesgo corregido: {bc_bias:.2f} W/m¬≤")
    
    print(f"\nüí° Conclusi√≥n:")
    
    if diff_abs_mean < 0.01:
        print("   ‚ùå PROBLEMA DETECTADO: Bias correction NO aplicado")
        print("      Los datos corregidos son pr√°cticamente id√©nticos al original")
        print("\n   üîß Recomendaciones:")
        print("      1. Verificar que el notebook 02_correction_v2_qm.ipynb se ejecut√≥ completamente")
        print("      2. Revisar que bc.adjust() se llam√≥ sobre los datos correctos")
        print("      3. Verificar que se guard√≥ el archivo correcto (no el original)")
        print("      4. Re-ejecutar el proceso de bias correction desde cero")
    
    elif bc_bias < orig_bias:
        reduction = 100 * (1 - bc_bias/orig_bias)
        print(f"   ‚úÖ BIAS CORRECTION EXITOSO")
        print(f"      Reducci√≥n de sesgo: {reduction:.1f}%")
        print(f"      El sesgo se redujo de {orig_bias:.2f} a {bc_bias:.2f} W/m¬≤")
        
        if reduction > 80:
            print("\n   üåü ¬°Excelente correcci√≥n! El sesgo se redujo significativamente")
        elif reduction > 50:
            print("\n   üëç Buena correcci√≥n, el sesgo se redujo considerablemente")
        else:
            print("\n   ‚ö†Ô∏è Correcci√≥n moderada, podr√≠a mejorarse")
    
    else:
        print("   ‚ùå PROBLEMA: El bias correction EMPEOR√ì el sesgo")
        print(f"      El sesgo aument√≥ de {orig_bias:.2f} a {bc_bias:.2f} W/m¬≤")
        print("\n   üîß Posibles causas:")
        print("      1. Problema con el regridding (grillas mal alineadas)")
        print("      2. Par√°metros de QM inadecuados (nquantiles, group, etc.)")
        print("      3. Per√≠odo de entrenamiento incorrecto")
        print("      4. M√©todo de correcci√≥n no apropiado para esta variable")

else:
    print("\n‚ö†Ô∏è No se pudo completar el diagn√≥stico completo")
    print("   Verificar que los archivos bias-corrected existen y tienen el formato correcto")

print("\n" + "="*70)