In [35]:
# Entrenamiento de Bias Correction con datos 1D corregidos

print("üéØ ENTRENAMIENTO DE BIAS CORRECTION - VERSI√ìN CORREGIDA")
print("="*55)

trained_adjustments_fixed = {}
training_log_fixed = []

# Usar datos corregidos
for model_name, model_info in regridded_datasets_fixed.items():
    print(f"\n=== ENTRENANDO BIAS CORRECTION: {model_name.upper()} ===")
    
    try:
        # Obtener datos 1D
        cmip6_1d = model_info['regridded_data']
        
        # Alinear calendarios y encontrar per√≠odo de traslape
        ref_aligned, sim_aligned, overlap_start, overlap_end = align_calendars(cr2met_1d_for_training, cmip6_1d)
        
        print(f"Datos para entrenamiento:")
        print(f"  CR2MET: {ref_aligned.shape}")
        print(f"  {model_name}: {sim_aligned.shape}")
        
        # Cargar datos en memoria
        print("  Cargando datos en memoria...")
        ref_aligned = ref_aligned.load()
        sim_aligned = sim_aligned.load()
        print("  ‚úì Datos cargados en memoria")
        
        # Verificar estad√≠sticos b√°sicos
        print(f"  CR2MET stats: min={float(ref_aligned.min()):.3f}, max={float(ref_aligned.max()):.3f}, mean={float(ref_aligned.mean()):.3f}")
        print(f"  {model_name} stats: min={float(sim_aligned.min()):.3f}, max={float(sim_aligned.max()):.3f}, mean={float(sim_aligned.mean()):.3f}")
        
        # Aplicar wet-day adjustment para precipitaci√≥n
        ref_adjusted, ref_wet_days = apply_wet_day_adjustment(ref_aligned, threshold=0.1)
        sim_adjusted, sim_wet_days = apply_wet_day_adjustment(sim_aligned, threshold=0.1)
        
        print(f"  D√≠as h√∫medos CR2MET: {ref_wet_days.sum().values} de {len(ref_wet_days.time)}")
        print(f"  D√≠as h√∫medos {model_name}: {sim_wet_days.sum().values} de {len(sim_wet_days.time)}")
        
        # Configurar m√©todo de bias correction
        try:
            # Intentar con Empirical Quantile Mapping (EQM) primero para datos 1D
            print("  Intentando Empirical Quantile Mapping (EQM)...")
            adjustment = EmpiricalQuantileMapping.train(
                ref=ref_adjusted,
                hist=sim_adjusted,
                nquantiles=50,
                kind='+',  # Aditivo para precipitaci√≥n
                group='time'
            )
            method_used = 'EQM'
            print("  ‚úì EQM entrenado exitosamente")
            
        except Exception as e:
            print(f"  EQM fall√≥ ({e}), intentando Detrended Quantile Mapping (DQM)...")
            try:
                adjustment = DetrendedQuantileMapping.train(
                    ref=ref_adjusted,
                    hist=sim_adjusted,
                    nquantiles=50,
                    kind='+',  # Aditivo para precipitaci√≥n
                    group='time'
                )
                method_used = 'DQM'
                print("  ‚úì DQM entrenado exitosamente")
                
            except Exception as e2:
                print(f"  ‚ùå Ambos m√©todos fallaron: EQM ({e}), DQM ({e2})")
                training_log_fixed.append({
                    'model': model_name,
                    'variable': 'pr',
                    'frequency': model_info['frequency'],
                    'train_start': overlap_start.strftime('%Y-%m-%d'),
                    'train_end': overlap_end.strftime('%Y-%m-%d'),
                    'method': 'FAILED',
                    'status': 'ERROR',
                    'notes': f"EQM: {e}, DQM: {e2}"
                })
                continue
        
        # Guardar adjustment entrenado
        params_dir = OUTPUT_PATH / "bias_params" / model_name / "pr" / "historical"
        params_dir.mkdir(parents=True, exist_ok=True)
        
        params_file = params_dir / f"pr_{model_name}_historical_{method_used.lower()}_params_1d_{overlap_start.strftime('%Y')}_{overlap_end.strftime('%Y')}.nc"
        
        print(f"  Guardando par√°metros en: {params_file}")
        adjustment.save(params_file)
        
        # Almacenar informaci√≥n
        trained_adjustments_fixed[model_name] = {
            'adjustment': adjustment,
            'method': method_used,
            'training_period': (overlap_start, overlap_end),
            'params_file': params_file,
            'spatial_method': '1D_regional_average'
        }
        
        # Log exitoso
        training_log_fixed.append({
            'model': model_name,
            'variable': 'pr',
            'frequency': model_info['frequency'],
            'train_start': overlap_start.strftime('%Y-%m-%d'),
            'train_end': overlap_end.strftime('%Y-%m-%d'),
            'method': method_used,
            'status': 'SUCCESS',
            'notes': f"Entrenado con {len(ref_aligned.time)} timesteps (1D regional)"
        })
        
        print(f"  ‚úì Bias correction entrenado para {model_name.upper()}")
        
    except Exception as e:
        print(f"  ‚ùå Error general entrenando {model_name}: {e}")
        import traceback
        traceback.print_exc()
        training_log_fixed.append({
            'model': model_name,
            'variable': 'pr',
            'frequency': model_info.get('frequency', 'unknown'),
            'train_start': 'N/A',
            'train_end': 'N/A', 
            'method': 'N/A',
            'status': 'ERROR',
            'notes': str(e)
        })
        continue

print(f"\nüéâ ENTRENAMIENTO COMPLETADO")
print(f"‚úÖ Modelos entrenados exitosamente: {len(trained_adjustments_fixed)}")

# Guardar log de entrenamiento
log_df_fixed = pd.DataFrame(training_log_fixed)
log_file_fixed = OUTPUT_PATH / "logs" / "bias_correction_training_log_fixed.csv"
log_df_fixed.to_csv(log_file_fixed, index=False)
print(f"‚úÖ Log guardado en: {log_file_fixed}")

print("\nüìã RESUMEN DEL ENTRENAMIENTO:")
print(log_df_fixed.to_string(index=False))

üéØ ENTRENAMIENTO DE BIAS CORRECTION - VERSI√ìN CORREGIDA

=== ENTRENANDO BIAS CORRECTION: ACCESS-CM2 ===
  Per√≠odo de traslape: 1960-01-01 a 2014-12-31
Datos para entrenamiento:
  CR2MET: (20089,)
  ACCESS-CM2: (20089,)
  Cargando datos en memoria...
  ‚úì Datos cargados en memoria
  CR2MET stats: min=0.000, max=106.601, mean=1.203
  ACCESS-CM2 stats: min=0.000, max=56.712, mean=0.930
  D√≠as h√∫medos CR2MET: 5400 de 20089
  D√≠as h√∫medos ACCESS-CM2: 5316 de 20089
  Intentando Empirical Quantile Mapping (EQM)...
  EQM fall√≥ ('DataArray' object has no attribute 'units'), intentando Detrended Quantile Mapping (DQM)...
  ‚ùå Ambos m√©todos fallaron: EQM ('DataArray' object has no attribute 'units'), DQM ('DataArray' object has no attribute 'units')

üéâ ENTRENAMIENTO COMPLETADO
‚úÖ Modelos entrenados exitosamente: 0
‚úÖ Log guardado en: /home/aninotna/magister/tesis/justh2_pipeline/out/logs/bias_correction_training_log_fixed.csv

üìã RESUMEN DEL ENTRENAMIENTO:
     model variable f

In [34]:
# Soluci√≥n alternativa: usar datos espacialmente compatibles sin regridding problem√°tico

print("üîß IMPLEMENTANDO SOLUCI√ìN ALTERNATIVA PARA BIAS CORRECTION")
print("="*60)

# Problema identificado: el regridding de 1x1 pixel a 20x38 grid falla
# Soluci√≥n: trabajar con datos espaciales compatibles directamente

# Usar datos originales para bias correction
print("Preparando datos espacialmente compatibles...")

# Para ACCESS-CM2: datos ya est√°n en una celda del Valle de Aconcagua
model_name = 'ACCESS-CM2'
cmip6_original = cmip6_datasets[model_name]['data']

print(f"‚úì {model_name} original shape: {cmip6_original.shape}")
print(f"  Coords: lat {cmip6_original.lat.values}, lon {cmip6_original.lon.values}")

# Para CR2MET: extraer serie temporal promedio de la regi√≥n
print("Extrayendo serie temporal promedio de CR2MET...")
cr2met_regional = cr2met_pr.mean(dim=['lat', 'lon'], skipna=True)

print(f"‚úì CR2MET regional shape: {cr2met_regional.shape}")
print(f"  Media regional: {float(cr2met_regional.mean().values):.3f} mm/day")

# Preparar datos para bias correction
print(f"\nPreparando datos 1D para bias correction...")

# Extraer series 1D
cmip6_1d = cmip6_original.squeeze()  # Remover dimensiones de tama√±o 1
cr2met_1d = cr2met_regional

print(f"‚úì {model_name} 1D shape: {cmip6_1d.shape}")
print(f"‚úì CR2MET 1D shape: {cr2met_1d.shape}")

# Crear dataset actualizado para bias correction
regridded_datasets_fixed = {
    model_name: {
        **cmip6_datasets[model_name],
        'regridded_data': cmip6_1d,
        'method': 'spatial_average_1d'
    }
}

# Tambi√©n preparar CR2MET 1D para entrenamien
cr2met_1d_for_training = cr2met_1d

print(f"\n‚úÖ Datos preparados para bias correction 1D")
print(f"  - {model_name}: {cmip6_1d.shape}")
print(f"  - CR2MET: {cr2met_1d.shape}")
print(f"  - M√©todo: Series temporales 1D sin regridding problem√°tico")

üîß IMPLEMENTANDO SOLUCI√ìN ALTERNATIVA PARA BIAS CORRECTION
Preparando datos espacialmente compatibles...
‚úì ACCESS-CM2 original shape: (60265, 1, 1)
  Coords: lat [-33.125], lon [-70.3125]
Extrayendo serie temporal promedio de CR2MET...
‚úì CR2MET regional shape: (22646,)
  Media regional: 1.161 mm/day

Preparando datos 1D para bias correction...
‚úì ACCESS-CM2 1D shape: (60265,)
‚úì CR2MET 1D shape: (22646,)

‚úÖ Datos preparados para bias correction 1D
  - ACCESS-CM2: (60265,)
  - CR2MET: (22646,)
  - M√©todo: Series temporales 1D sin regridding problem√°tico


In [33]:
# Diagn√≥stico r√°pido de datos originales vs regridded

print("üîç DIAGN√ìSTICO R√ÅPIDO DE REGRIDDING")

# Verificar datos originales CMIP6
if 'cmip6_datasets' in globals() and cmip6_datasets:
    model_name = list(cmip6_datasets.keys())[0]
    original_data = cmip6_datasets[model_name]['data']
    
    print(f"\nüìä {model_name} DATOS ORIGINALES:")
    print(f"  Shape: {original_data.shape}")
    print(f"  Min: {float(original_data.min().values):.6f}")
    print(f"  Max: {float(original_data.max().values):.6f}")
    print(f"  Media: {float(original_data.mean().values):.6f}")
    print(f"  Hay NaN: {np.isnan(original_data).sum().values > 0}")
    
    # Verificar datos regridded
    if 'regridded_datasets' in globals() and model_name in regridded_datasets:
        regridded_data = regridded_datasets[model_name]['regridded_data']
        
        print(f"\nüìä {model_name} DATOS REGRIDDED:")
        print(f"  Shape: {regridded_data.shape}")
        print(f"  Min: {float(regridded_data.min().values):.6f}")
        print(f"  Max: {float(regridded_data.max().values):.6f}")
        print(f"  Media: {float(regridded_data.mean().values):.6f}")
        print(f"  Hay NaN: {np.isnan(regridded_data).sum().values > 0}")
        
        print(f"\nüö® PROBLEMA: El regridding ha convertido todos los valores a NaN")
        print(f"   Esto sugiere un problema de coordinadas o interpolaci√≥n")

print("\n‚úÖ Diagn√≥stico r√°pido completado")

üîç DIAGN√ìSTICO R√ÅPIDO DE REGRIDDING

üìä ACCESS-CM2 DATOS ORIGINALES:
  Shape: (60265, 1, 1)
  Min: 0.000000
  Max: 58.349644
  Media: 0.893804
  Hay NaN: False

üìä ACCESS-CM2 DATOS REGRIDDED:
  Shape: (60265, 20, 38)
  Min: nan
  Max: nan
  Media: nan
  Hay NaN: True

üö® PROBLEMA: El regridding ha convertido todos los valores a NaN
   Esto sugiere un problema de coordinadas o interpolaci√≥n

‚úÖ Diagn√≥stico r√°pido completado


In [32]:
# Diagn√≥stico de datos para bias correction

print("üîç DIAGN√ìSTICO DE DATOS PARA BIAS CORRECTION")
print("="*50)

# Examinar datos CR2MET
print("\nüìä CR2MET PRECIPITACI√ìN:")
print(f"  Shape: {cr2met_pr.shape}")
print(f"  Rango temporal: {cr2met_pr.time.values[0]} a {cr2met_pr.time.values[-1]}")
print(f"  Unidades: {cr2met_pr.attrs.get('units', 'No especificadas')}")
print(f"  Estad√≠sticos b√°sicos:")
print(f"    Min: {float(cr2met_pr.min().values):.6f}")
print(f"    Max: {float(cr2met_pr.max().values):.6f}")
print(f"    Media: {float(cr2met_pr.mean().values):.6f}")
print(f"    D√≠as > 0.1 mm: {(cr2met_pr > 0.1).sum().values}")
print(f"    D√≠as > 0.01 mm: {(cr2met_pr > 0.01).sum().values}")

# Examinar datos CMIP6
if 'regridded_datasets' in globals() and regridded_datasets:
    model_name = list(regridded_datasets.keys())[0]
    cmip6_data = regridded_datasets[model_name]['regridded_data']
    
    print(f"\nüìä {model_name.upper()} PRECIPITACI√ìN (REGRIDDED):")
    print(f"  Shape: {cmip6_data.shape}")
    print(f"  Rango temporal: {cmip6_data.time.values[0]} a {cmip6_data.time.values[-1]}")
    print(f"  Unidades: {cmip6_data.attrs.get('units', 'No especificadas')}")
    print(f"  Estad√≠sticos b√°sicos:")
    print(f"    Min: {float(cmip6_data.min().values):.6f}")
    print(f"    Max: {float(cmip6_data.max().values):.6f}")
    print(f"    Media: {float(cmip6_data.mean().values):.6f}")
    print(f"    D√≠as > 0.1 mm: {(cmip6_data > 0.1).sum().values}")
    print(f"    D√≠as > 0.01 mm: {(cmip6_data > 0.01).sum().values}")
    print(f"    D√≠as > 0.001 mm: {(cmip6_data > 0.001).sum().values}")
    print(f"    D√≠as > 0 mm: {(cmip6_data > 0).sum().values}")
    
    # Examinar primeros valores
    print(f"\n  Primeros 10 valores temporales:")
    sample_data = cmip6_data.isel(lat=0, lon=0)
    for i in range(min(10, len(sample_data.time))):
        time_val = sample_data.time.values[i]
        pr_val = float(sample_data.values[i])
        print(f"    {time_val}: {pr_val:.6f} mm/day")
        
    # Verificar si hay valores NaN o ceros
    nan_count = np.isnan(cmip6_data).sum().values
    zero_count = (cmip6_data == 0).sum().values
    print(f"\n  Valores NaN: {nan_count}")
    print(f"  Valores exactamente 0: {zero_count}")
    print(f"  Total de valores: {cmip6_data.size}")
    
    # Verificar coordenadas temporales
    print(f"\nüïê COORDINACI√ìN TEMPORAL:")
    print(f"  CR2MET tipo de tiempo: {type(cr2met_pr.time.values[0])}")
    print(f"  {model_name} tipo de tiempo: {type(cmip6_data.time.values[0])}")
    
    # Buscar per√≠odo de traslape manual
    cr2_start = pd.to_datetime(cr2met_pr.time.values[0])
    cr2_end = pd.to_datetime(cr2met_pr.time.values[-1])
    cmip_start = pd.to_datetime(cmip6_data.time.values[0])
    cmip_end = pd.to_datetime(cmip6_data.time.values[-1])
    
    overlap_start = max(cr2_start, cmip_start)
    overlap_end = min(cr2_end, cmip_end)
    
    print(f"  CR2MET: {cr2_start.strftime('%Y-%m-%d')} a {cr2_end.strftime('%Y-%m-%d')}")
    print(f"  {model_name}: {cmip_start.strftime('%Y-%m-%d')} a {cmip_end.strftime('%Y-%m-%d')}")
    print(f"  Traslape: {overlap_start.strftime('%Y-%m-%d')} a {overlap_end.strftime('%Y-%m-%d')}")
    print(f"  D√≠as de traslape: {(overlap_end - overlap_start).days}")

print("\n‚úÖ Diagn√≥stico completado")

üîç DIAGN√ìSTICO DE DATOS PARA BIAS CORRECTION

üìä CR2MET PRECIPITACI√ìN:
  Shape: (22646, 20, 38)
  Rango temporal: 1960-01-01T00:00:00.000000000 a 2021-12-31T00:00:00.000000000
  Unidades: mm/day
  Estad√≠sticos b√°sicos:
    Min: 0.000000
    Max: 575.900000
    Media: 1.161186
    D√≠as > 0.1 mm: 2208928
    D√≠as > 0.01 mm: 2257330

üìä ACCESS-CM2 PRECIPITACI√ìN (REGRIDDED):
  Shape: (60265, 20, 38)
  Rango temporal: 1850-01-01T12:00:00.000000000 a 2014-12-31T12:00:00.000000000
  Unidades: mm/day
  Estad√≠sticos b√°sicos:
    Min: nan
    Max: nan
    Media: nan
    D√≠as > 0.1 mm: 0
    D√≠as > 0.01 mm: 0
    D√≠as > 0.001 mm: 0
    D√≠as > 0 mm: 0

  Primeros 10 valores temporales:
    1850-01-01T12:00:00.000000000: nan mm/day
    1850-01-02T12:00:00.000000000: nan mm/day


KeyboardInterrupt: 

In [28]:
# Pipeline de Correcci√≥n de Sesgo CMIP6 ‚Üí CR2MET usando Quantile Mapping
# Valle de Aconcagua, Chile

import xarray as xr
import numpy as np
import pandas as pd
import os
import sys
from pathlib import Path
import dask
import dask.array as da
from dask.diagnostics import ProgressBar
import warnings

# Bias correction libraries
try:
    import xclim
    from xclim.sdba import DetrendedQuantileMapping, EmpiricalQuantileMapping
    print("‚úì xclim.sdba importado exitosamente")
except ImportError:
    try:
        # Alternativa con xsdba standalone
        from xsdba import DetrendedQuantileMapping, EmpiricalQuantileMapping
        print("‚úì xsdba importado como alternativa")
    except ImportError:
        print("‚ùå No se pudo importar xclim.sdba ni xsdba")
        DetrendedQuantileMapping = None
        EmpiricalQuantileMapping = None

# Spatial libraries
import rioxarray
try:
    import xesmf as xe
    print("‚úì xESMF disponible para regridding")
except ImportError:
    print("‚ö†Ô∏è xESMF no disponible, usando rioxarray como alternativa")
    xe = None

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Configuration
warnings.filterwarnings('ignore', category=FutureWarning)
dask.config.set({'array.slicing.split_large_chunks': False})

# Set up dask for efficient processing
dask.config.set({
    'array.chunk-size': '128MB',
    'distributed.worker.memory.target': 0.8,
    'distributed.worker.memory.spill': 0.9
})

print("‚úì Imports completados")
print(f"‚úì xarray version: {xr.__version__}")
print(f"‚úì xclim version: {xclim.__version__ if 'xclim' in globals() else 'No disponible'}")
print(f"‚úì Bias correction: {'Disponible' if DetrendedQuantileMapping is not None else 'No disponible'}")

‚úì xclim.sdba importado exitosamente
‚ö†Ô∏è xESMF no disponible, usando rioxarray como alternativa
‚úì Imports completados
‚úì xarray version: 2025.1.2
‚úì xclim version: 0.58.1
‚úì Bias correction: Disponible


In [17]:
# Diagn√≥stico de dependencias y backends disponibles

print("Verificando backends de xarray disponibles...")
print(f"Backends disponibles: {xr.backends.list_engines()}")

# Verificar dependencias espec√≠ficas
try:
    import netCDF4
    print(f"‚úì netCDF4 version: {netCDF4.__version__}")
except ImportError:
    print("‚ùå netCDF4 no est√° instalado")

try:
    import h5netcdf
    print(f"‚úì h5netcdf version: {h5netcdf.__version__}")
except ImportError:
    print("‚ùå h5netcdf no est√° instalado")

# Verificar archivo espec√≠fico
test_file = Path("/home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/historical/pr/pr_access_cm2_historical_1850_2014.nc")
if test_file.exists():
    print(f"‚úì Archivo encontrado: {test_file}")
    print(f"  Tama√±o: {test_file.stat().st_size / (1024**3):.2f} GB")
    
    # Probar apertura r√°pida
    try:
        test_ds = xr.open_dataset(test_file, engine='netcdf4')
        print(f"  ‚úì Apertura exitosa con netcdf4")
        print(f"  Variables: {list(test_ds.data_vars)}")
        print(f"  Dimensiones: {dict(test_ds.dims)}")
        test_ds.close()
    except Exception as e:
        print(f"  ‚ùå Error abriendo con netcdf4: {e}")
        
        # Intentar con h5netcdf como alternativa
        try:
            test_ds = xr.open_dataset(test_file, engine='h5netcdf')
            print(f"  ‚úì Apertura exitosa con h5netcdf como alternativa")
            test_ds.close()
        except Exception as e2:
            print(f"  ‚ùå Error tambi√©n con h5netcdf: {e2}")
else:
    print(f"‚ùå Archivo no encontrado: {test_file}")

print("\n‚úì Diagn√≥stico completado")

Verificando backends de xarray disponibles...
Backends disponibles: {'netcdf4': <NetCDF4BackendEntrypoint>
  Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray
  Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.NetCDF4BackendEntrypoint.html, 'h5netcdf': <H5netcdfBackendEntrypoint>
  Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using h5netcdf in Xarray
  Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.H5netcdfBackendEntrypoint.html, 'scipy': <ScipyBackendEntrypoint>
  Open netCDF files (.nc, .nc4, .cdf and .gz) using scipy in Xarray
  Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.ScipyBackendEntrypoint.html, 'cfgrib': <CfGribBackend>
  Open GRIB files (.grib, .grib2, .grb and .grb2) in Xarray
  Learn more at https://github.com/ecmwf/cfgrib, 'rasterio': <RasterioBackend>, 'store': <StoreBackendEntrypoint>
  Open AbstractDataStore instances in Xarray
  Learn more at https://doc

In [15]:
# Diagn√≥sticos avanzados del archivo NetCDF

import subprocess
import os

test_file = Path("/home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/historical/pr/pr_access_cm2_historical_1850_2014.nc")

print("=== DIAGN√ìSTICO AVANZADO DEL ARCHIVO ===")

# 1. Verificar tipo de archivo
print("1. Verificando tipo de archivo...")
try:
    result = subprocess.run(['file', str(test_file)], capture_output=True, text=True)
    print(f"   Tipo de archivo: {result.stdout.strip()}")
except Exception as e:
    print(f"   ‚ùå Error ejecutando 'file': {e}")

# 2. Verificar cabecera del archivo
print("\n2. Verificando cabecera del archivo...")
try:
    with open(test_file, 'rb') as f:
        header = f.read(32)
    print(f"   Primeros 32 bytes (hex): {header.hex()}")
    print(f"   Primeros 16 bytes (texto): {header[:16]}")
    
    # Verificar firma NetCDF
    if header.startswith(b'CDF\x01') or header.startswith(b'CDF\x02'):
        print("   ‚úì Archivo tiene firma NetCDF cl√°sica")
    elif header.startswith(b'\x89HDF\r\n\x1a\n'):
        print("   ‚úì Archivo tiene firma HDF5/NetCDF4")
    else:
        print("   ‚ùå Archivo NO tiene firma NetCDF/HDF5 reconocida")
except Exception as e:
    print(f"   ‚ùå Error leyendo cabecera: {e}")

# 3. Intentar con ncdump si est√° disponible
print("\n3. Intentando ncdump...")
try:
    result = subprocess.run(['ncdump', '-h', str(test_file)], 
                          capture_output=True, text=True, timeout=30)
    if result.returncode == 0:
        print("   ‚úì ncdump exitoso - archivo NetCDF v√°lido")
        print("   Primeras l√≠neas de la cabecera:")
        lines = result.stdout.split('\n')[:10]
        for line in lines:
            print(f"     {line}")
    else:
        print(f"   ‚ùå ncdump fall√≥: {result.stderr}")
except FileNotFoundError:
    print("   ‚ö†Ô∏è ncdump no est√° disponible")
except subprocess.TimeoutExpired:
    print("   ‚ùå ncdump timeout")
except Exception as e:
    print(f"   ‚ùå Error ejecutando ncdump: {e}")

# 4. Verificar si es archivo comprimido
print("\n4. Verificando compresi√≥n...")
try:
    # Verificar si es gzip
    with open(test_file, 'rb') as f:
        first_bytes = f.read(3)
    if first_bytes == b'\x1f\x8b\x08':
        print("   ‚ö†Ô∏è Archivo parece estar comprimido con gzip")
        print("   Intentando descomprimir...")
        
        import gzip
        try:
            with gzip.open(test_file, 'rb') as gz_file:
                header = gz_file.read(32)
            print(f"   Cabecera descomprimida: {header[:16]}")
        except Exception as e:
            print(f"   ‚ùå Error descomprimiendo: {e}")
    else:
        print("   ‚úì Archivo no parece estar comprimido")
except Exception as e:
    print(f"   ‚ùå Error verificando compresi√≥n: {e}")

print("\n=== FIN DIAGN√ìSTICO AVANZADO ===")

# 5. Intentar alternativas de lectura
print("\n5. Probando lecturas alternativas...")

# Probar con scipy
try:
    print("   Probando backend scipy...")
    test_ds = xr.open_dataset(test_file, engine='scipy')
    print("   ‚úì Scipy funciona!")
    print(f"   Variables: {list(test_ds.data_vars)}")
    test_ds.close()
    WORKING_ENGINE = 'scipy'
except Exception as e:
    print(f"   ‚ùå Scipy fall√≥: {e}")
    
    # Probar sin especificar engine
    try:
        print("   Probando sin especificar engine...")
        test_ds = xr.open_dataset(test_file)
        print("   ‚úì Auto-detecci√≥n funciona!")
        print(f"   Variables: {list(test_ds.data_vars)}")
        test_ds.close()
        WORKING_ENGINE = 'auto'
    except Exception as e2:
        print(f"   ‚ùå Auto-detecci√≥n tambi√©n fall√≥: {e2}")
        WORKING_ENGINE = None

if WORKING_ENGINE:
    print(f"\nüéâ SOLUCI√ìN ENCONTRADA: Usar engine='{WORKING_ENGINE}'")
else:
    print("\n‚ùå NING√öN ENGINE FUNCIONA - Archivo posiblemente corrupto")

print("\n‚úì Diagn√≥stico avanzado completado")

=== DIAGN√ìSTICO AVANZADO DEL ARCHIVO ===
1. Verificando tipo de archivo...
   Tipo de archivo: /home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/historical/pr/pr_access_cm2_historical_1850_2014.nc: data

2. Verificando cabecera del archivo...
   Primeros 32 bytes (hex): 504b0304140000000000cba8475b7aa3bc426f1400006f1400000f0000007072
   Primeros 16 bytes (texto): b'PK\x03\x04\x14\x00\x00\x00\x00\x00\xcb\xa8G[z\xa3'
   ‚ùå Archivo NO tiene firma NetCDF/HDF5 reconocida

3. Intentando ncdump...
   ‚ùå ncdump fall√≥: ncdump: /home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/historical/pr/pr_access_cm2_historical_1850_2014.nc: NetCDF: Unknown file format


4. Verificando compresi√≥n...
   ‚úì Archivo no parece estar comprimido

=== FIN DIAGN√ìSTICO AVANZADO ===

5. Probando lecturas alternativas...
   Probando backend scipy...
   ‚ùå Scipy fall√≥: Error: /home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/historical/pr/pr_access_cm2_historical_1850_2014.nc is not a valid

In [16]:
# Soluci√≥n para archivos ZIP que contienen NetCDF

import zipfile
import tempfile
import shutil

def extract_netcdf_from_zip(zip_path, extract_to=None):
    """
    Extraer archivo NetCDF de un ZIP y devolver la ruta del archivo extra√≠do
    """
    print(f"üîç Detectado archivo ZIP: {zip_path}")
    
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = zip_ref.namelist()
            print(f"  Archivos en ZIP: {file_list}")
            
            # Buscar archivo .nc
            netcdf_files = [f for f in file_list if f.endswith('.nc')]
            
            if not netcdf_files:
                raise ValueError("No se encontraron archivos .nc en el ZIP")
            
            if len(netcdf_files) > 1:
                print(f"  ‚ö†Ô∏è M√∫ltiples archivos .nc encontrados, usando: {netcdf_files[0]}")
            
            nc_file = netcdf_files[0]
            print(f"  üìÅ Extrayendo: {nc_file}")
            
            if extract_to is None:
                # Crear directorio temporal
                extract_to = tempfile.mkdtemp(prefix="cmip6_")
                print(f"  üìÇ Directorio temporal: {extract_to}")
            
            # Extraer archivo
            extracted_path = zip_ref.extract(nc_file, extract_to)
            print(f"  ‚úÖ Extra√≠do a: {extracted_path}")
            
            return Path(extracted_path), extract_to
            
    except Exception as e:
        print(f"  ‚ùå Error extrayendo ZIP: {e}")
        return None, None

# Probar la extracci√≥n
zip_file = Path("/home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/historical/pr/pr_access_cm2_historical_1850_2014.nc")
extracted_nc, temp_dir = extract_netcdf_from_zip(zip_file)

if extracted_nc and extracted_nc.exists():
    print(f"\nüéâ EXTRACCI√ìN EXITOSA!")
    print(f"   Archivo NetCDF: {extracted_nc}")
    print(f"   Tama√±o: {extracted_nc.stat().st_size / (1024**3):.2f} GB")
    
    # Probar lectura del archivo extra√≠do
    print("\nüß™ Probando lectura del archivo extra√≠do...")
    try:
        test_ds = xr.open_dataset(extracted_nc, engine='netcdf4')
        print("   ‚úÖ Lectura exitosa con netcdf4!")
        print(f"   Variables: {list(test_ds.data_vars)}")
        print(f"   Dimensiones: {dict(test_ds.dims)}")
        print(f"   Per√≠odo: {test_ds.time.values[0]} a {test_ds.time.values[-1]}")
        test_ds.close()
        
        # Guardar ruta para uso posterior
        EXTRACTED_NC_FILE = extracted_nc
        TEMP_EXTRACTION_DIR = temp_dir
        
    except Exception as e:
        print(f"   ‚ùå Error leyendo archivo extra√≠do: {e}")
        EXTRACTED_NC_FILE = None
        TEMP_EXTRACTION_DIR = None
else:
    print("‚ùå No se pudo extraer el archivo")
    EXTRACTED_NC_FILE = None
    TEMP_EXTRACTION_DIR = None

print("\n‚úÖ Proceso de extracci√≥n completado")

üîç Detectado archivo ZIP: /home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/historical/pr/pr_access_cm2_historical_1850_2014.nc
  Archivos en ZIP: ['provenance.json', 'provenance.png', 'pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_18500101-18990706.nc', 'pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_18990707-19490109.nc', 'pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_19490110-19980715.nc', 'pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_19980716-20141231.nc']
  ‚ö†Ô∏è M√∫ltiples archivos .nc encontrados, usando: pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_18500101-18990706.nc
  üìÅ Extrayendo: pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_18500101-18990706.nc
  üìÇ Directorio temporal: /tmp/cmip6_muo9a7em
  ‚úÖ Extra√≠do a: /tmp/cmip6_muo9a7em/pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_18500101-18990706.nc

üéâ EXTRACCI√ìN EXITOSA!
   Archivo NetCDF: /tmp/cmip6_muo9a7em/pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_18500101-18990706.nc
   Tama√±o: 1.71 GB

üß™ Probando lectura del archivo extra√≠do...
   ‚

  print(f"   Dimensiones: {dict(test_ds.dims)}")


In [9]:
# Configuraci√≥n de paths y par√°metros
BASE_PATH = Path("/home/aninotna/magister/tesis/justh2_pipeline")
DATA_PATH = BASE_PATH / "data"
CMIP6_PATH = DATA_PATH / "cmip6" / "historical"
CR2MET_PATH = DATA_PATH / "cr2met" / "clima.zarr"
OUTPUT_PATH = BASE_PATH / "out"

# Crear directorios de salida
(OUTPUT_PATH / "regridded").mkdir(parents=True, exist_ok=True)
(OUTPUT_PATH / "bias_params").mkdir(parents=True, exist_ok=True)
(OUTPUT_PATH / "corrected").mkdir(parents=True, exist_ok=True)
(OUTPUT_PATH / "logs").mkdir(parents=True, exist_ok=True)

# Regi√≥n Valle de Aconcagua (bounding box)
BBOX = {
    'lat_min': -33.27,
    'lat_max': -32.26, 
    'lon_min': -71.89,
    'lon_max': -70.00
}

# Par√°metros de bias correction
VARIABLES = ['pr']  # Empezamos con precipitaci√≥n
CHUNKS = {'time': 365, 'lat': 50, 'lon': 50}
TRAINING_PERIOD = slice('1980', '2014')  # Per√≠odo por defecto, se ajustar√° autom√°ticamente

print("‚úì Configuraci√≥n completada")
print(f"‚úì CMIP6 path: {CMIP6_PATH}")
print(f"‚úì CR2MET path: {CR2MET_PATH}")
print(f"‚úì Output path: {OUTPUT_PATH}")
print(f"‚úì Regi√≥n: {BBOX}")
print(f"‚úì Variables a procesar: {VARIABLES}")

‚úì Configuraci√≥n completada
‚úì CMIP6 path: /home/aninotna/magister/tesis/justh2_pipeline/data/cmip6/historical
‚úì CR2MET path: /home/aninotna/magister/tesis/justh2_pipeline/data/cr2met/clima.zarr
‚úì Output path: /home/aninotna/magister/tesis/justh2_pipeline/out
‚úì Regi√≥n: {'lat_min': -33.27, 'lat_max': -32.26, 'lon_min': -71.89, 'lon_max': -70.0}
‚úì Variables a procesar: ['pr']


In [10]:
# Funciones utilitarias para preprocesamiento

def standardize_units(da, var_name):
    """Estandarizar unidades seg√∫n convenciones CF"""
    if var_name in ['tasmin', 'tasmax', 'tmin', 'tmax']:
        # Convertir a ¬∞C si est√° en K
        if da.attrs.get('units', '').lower() in ['k', 'kelvin']:
            da = da - 273.15
            da.attrs['units'] = 'degrees_C'
        elif da.attrs.get('units', '').lower() in ['¬∞c', 'celsius', 'degrees_celsius']:
            da.attrs['units'] = 'degrees_C'
    
    elif var_name == 'pr':
        # Convertir a mm/d√≠a
        if da.attrs.get('units', '') == 'kg m-2 s-1':
            da = da * 86400  # segundos por d√≠a
            da.attrs['units'] = 'mm/day'
        elif da.attrs.get('units', '') == 'mm d-1':
            da.attrs['units'] = 'mm/day'
    
    return da

def spatial_subset(da, bbox):
    """Recortar espacialmente al bounding box"""
    return da.sel(
        lat=slice(bbox['lat_min'], bbox['lat_max']),
        lon=slice(bbox['lon_min'], bbox['lon_max'])
    )

def align_calendars(ref_da, sim_da):
    """Alinear calendarios entre datos de referencia y simulados"""
    # Convertir a calendario est√°ndar si es necesario
    if hasattr(sim_da.time, 'calendar') and sim_da.time.calendar != 'standard':
        print(f"  Convirtiendo calendario de {sim_da.time.calendar} a standard")
        sim_da = sim_da.convert_calendar('standard', align_on='date')
    
    # Encontrar per√≠odo de traslape
    ref_start = pd.to_datetime(ref_da.time.values[0])
    ref_end = pd.to_datetime(ref_da.time.values[-1])
    sim_start = pd.to_datetime(sim_da.time.values[0]) 
    sim_end = pd.to_datetime(sim_da.time.values[-1])
    
    overlap_start = max(ref_start, sim_start)
    overlap_end = min(ref_end, sim_end)
    
    print(f"  Per√≠odo de traslape: {overlap_start.strftime('%Y-%m-%d')} a {overlap_end.strftime('%Y-%m-%d')}")
    
    # Recortar al per√≠odo de traslape
    ref_aligned = ref_da.sel(time=slice(overlap_start, overlap_end))
    sim_aligned = sim_da.sel(time=slice(overlap_start, overlap_end))
    
    return ref_aligned, sim_aligned, overlap_start, overlap_end

def apply_wet_day_adjustment(da, threshold=0.1):
    """Aplicar ajuste de d√≠as h√∫medos para precipitaci√≥n"""
    # Marcar d√≠as h√∫medos
    wet_days = da >= threshold
    return da, wet_days

print("‚úì Funciones utilitarias definidas")

‚úì Funciones utilitarias definidas


In [18]:
# Cargar datos de referencia CR2MET

print("Cargando datos CR2MET...")
try:
    # Abrir el dataset CR2MET
    cr2met = xr.open_dataset(CR2MET_PATH, chunks=CHUNKS)
    print(f"‚úì CR2MET cargado exitosamente")
    print(f"  Variables disponibles: {list(cr2met.data_vars)}")
    print(f"  Dimensiones: {dict(cr2met.dims)}")
    print(f"  Per√≠odo temporal: {cr2met.time.dt.strftime('%Y-%m-%d').values[0]} a {cr2met.time.dt.strftime('%Y-%m-%d').values[-1]}")
    
    # Recortar espacialmente al Valle de Aconcagua
    cr2met_subset = spatial_subset(cr2met, BBOX)
    print(f"  Regi√≥n recortada: lat {cr2met_subset.lat.values.min():.3f} a {cr2met_subset.lat.values.max():.3f}, "
          f"lon {cr2met_subset.lon.values.min():.3f} a {cr2met_subset.lon.values.max():.3f}")
    
    # Verificar variable de precipitaci√≥n
    if 'pr' in cr2met_subset.data_vars:
        cr2met_pr = cr2met_subset['pr']
        cr2met_pr = standardize_units(cr2met_pr, 'pr')
        print(f"  Precipitaci√≥n: {cr2met_pr.shape}, unidades: {cr2met_pr.attrs.get('units', 'N/A')}")
        print(f"  Rango temporal PR: {cr2met_pr.time.dt.strftime('%Y-%m-%d').values[0]} a {cr2met_pr.time.dt.strftime('%Y-%m-%d').values[-1]}")
    else:
        print("  ‚ö†Ô∏è Variable 'pr' no encontrada en CR2MET")
        
except Exception as e:
    print(f"‚ùå Error cargando CR2MET: {e}")
    cr2met_subset = None

Cargando datos CR2MET...
‚úì CR2MET cargado exitosamente
  Variables disponibles: ['year', 'cl_mask', 'pr', 'tmin', 'pr_sd', 'tmax']
  Dimensiones: {'time': 22646, 'lat': 800, 'lon': 220}
  Per√≠odo temporal: 1960-01-01 a 2021-12-31
  Regi√≥n recortada: lat -33.225 a -32.275, lon -71.875 a -70.025
  Precipitaci√≥n: (22646, 20, 38), unidades: mm/day
  Rango temporal PR: 1960-01-01 a 2021-12-31


  cr2met = xr.open_dataset(CR2MET_PATH, chunks=CHUNKS)
  cr2met = xr.open_dataset(CR2MET_PATH, chunks=CHUNKS)
  cr2met = xr.open_dataset(CR2MET_PATH, chunks=CHUNKS)
  print(f"  Dimensiones: {dict(cr2met.dims)}")


In [20]:
# Cargar datos CMIP6 - Precipitaci√≥n (con soporte para m√∫ltiples archivos)

def load_cmip6_from_multiple_files(data_path, model_pattern, chunks):
    """
    Cargar y concatenar m√∫ltiples archivos NetCDF de un modelo CMIP6
    """
    # Buscar archivos que coincidan con el patr√≥n del modelo
    nc_files = list(data_path.glob(f"*{model_pattern}*.nc"))
    
    if not nc_files:
        return None
        
    print(f"  üìÅ Encontrados {len(nc_files)} archivos NetCDF:")
    for f in sorted(nc_files):
        print(f"    - {f.name}")
    
    # Cargar y concatenar todos los archivos
    datasets = []
    for nc_file in sorted(nc_files):
        print(f"    üìÇ Cargando: {nc_file.name}")
        ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
        datasets.append(ds)
    
    # Concatenar por tiempo
    if len(datasets) > 1:
        print(f"  üîó Concatenando {len(datasets)} datasets por tiempo...")
        combined_ds = xr.concat(datasets, dim='time')
        
        # Verificar que no hay duplicados temporales
        time_diffs = pd.Series(combined_ds.time.values).diff().dropna()
        duplicates = (time_diffs == pd.Timedelta(0)).sum()
        if duplicates > 0:
            print(f"  ‚ö†Ô∏è Detectados {duplicates} timestamps duplicados, removiendo...")
            _, unique_indices = np.unique(combined_ds.time.values, return_index=True)
            combined_ds = combined_ds.isel(time=sorted(unique_indices))
            
        print(f"  ‚úÖ Dataset concatenado: {combined_ds.pr.shape}")
    else:
        combined_ds = datasets[0]
        print(f"  ‚úÖ Dataset √∫nico: {combined_ds.pr.shape}")
    
    return combined_ds

def detect_cmip6_models(data_path):
    """
    Detectar modelos CMIP6 disponibles bas√°ndose en los archivos presentes
    """
    all_files = list(data_path.glob("*.nc"))
    models = set()
    
    for file in all_files:
        # Extraer modelo del nombre del archivo
        # Formato esperado: pr_day_MODEL_historical_...
        parts = file.name.split('_')
        if len(parts) >= 3 and 'historical' in parts:
            model = parts[2]  # MODEL est√° en la posici√≥n 2
            models.add(model)
    
    return list(models)

print("üîç Detectando modelos CMIP6 disponibles...")

# Detectar modelos autom√°ticamente
pr_path = CMIP6_PATH / "pr"
available_models = detect_cmip6_models(pr_path)

print(f"üìã Modelos detectados: {available_models}")

cmip6_datasets = {}

for model_name in available_models:
    print(f"\n=== PROCESANDO MODELO: {model_name.upper()} ===")
    
    try:
        # Cargar datasets del modelo
        ds = load_cmip6_from_multiple_files(pr_path, model_name, CHUNKS)
        
        if ds is None:
            print(f"  ‚ùå No se pudo cargar el dataset para {model_name}")
            continue
            
        print(f"  ‚úÖ Modelo cargado exitosamente")
        print(f"  üìÖ Per√≠odo completo: {ds.time.values[0]} a {ds.time.values[-1]}")
        print(f"  üìè Shape final: {ds.pr.shape}")
        
        # Estandarizar variable de precipitaci√≥n
        pr_da = ds['pr']
        
        # Estandarizar unidades
        pr_da = standardize_units(pr_da, 'pr')
        print(f"  üîß Unidades estandarizadas: {pr_da.attrs.get('units', 'N/A')}")
        
        # Verificar coordenadas y recortar espacialmente
        print(f"  üó∫Ô∏è Rango espacial original: lat {pr_da.lat.values.min():.2f} a {pr_da.lat.values.max():.2f}, "
              f"lon {pr_da.lon.values.min():.2f} a {pr_da.lon.values.max():.2f}")
        
        # Recortar espacialmente (con margen para regridding)
        pr_subset = spatial_subset(pr_da, BBOX)
        print(f"  ‚úÇÔ∏è Despu√©s del recorte: {pr_subset.shape}")
        
        # Detectar frecuencia temporal
        time_diff = pd.to_datetime(pr_subset.time.values[1]) - pd.to_datetime(pr_subset.time.values[0])
        if time_diff.days == 1:
            freq = 'daily'
        elif time_diff.days >= 28 and time_diff.days <= 31:
            freq = 'monthly'
        else:
            freq = 'unknown'
        print(f"  ‚è∞ Frecuencia detectada: {freq}")
        
        # Almacenar informaci√≥n del modelo
        cmip6_datasets[model_name] = {
            'data': pr_subset,
            'frequency': freq,
            'original_path': pr_path,
            'calendar': getattr(pr_subset.time, 'calendar', 'standard'),
            'total_files': len(list(pr_path.glob(f"*{model_name}*.nc")))
        }
        
        print(f"  ‚úÖ {model_name.upper()} procesado exitosamente")
        
        # Cerrar dataset para liberar memoria
        ds.close()
        
    except Exception as e:
        print(f"  ‚ùå Error procesando {model_name}: {e}")
        import traceback
        traceback.print_exc()
        continue

print(f"\nüéâ RESUMEN DE CARGA")
print(f"‚úÖ Total de modelos CMIP6 cargados: {len(cmip6_datasets)}")
for model, info in cmip6_datasets.items():
    print(f"  - {model.upper()}: {info['frequency']}, {info['total_files']} archivos, calendario {info['calendar']}")

üîç Detectando modelos CMIP6 disponibles...
üìã Modelos detectados: ['ACCESS-CM2']

=== PROCESANDO MODELO: ACCESS-CM2 ===
  üìÅ Encontrados 4 archivos NetCDF:
    - pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_18500101-18990706.nc
    - pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_18990707-19490109.nc
    - pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_19490110-19980715.nc
    - pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_19980716-20141231.nc
    üìÇ Cargando: pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_18500101-18990706.nc
    üìÇ Cargando: pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_18990707-19490109.nc
    üìÇ Cargando: pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_19490110-19980715.nc
    üìÇ Cargando: pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_19980716-20141231.nc
  üîó Concatenando 4 datasets por tiempo...
  ‚úÖ Dataset concatenado: (60265, 144, 192)
  ‚úÖ Modelo cargado exitosamente
  üìÖ Per√≠odo completo: 1850-01-01T12:00:00.000000000 a 2014-12-31T12:00:00.000000000
  üìè Shape final: (60265, 14

  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')


In [22]:
# Verificaci√≥n y correcci√≥n del sistema de coordenadas

print("üîç VERIFICANDO SISTEMA DE COORDENADAS...")

# Recargar datos con correcci√≥n de coordenadas desde el inicio
print("üîÑ Recargando datos con correcci√≥n de coordenadas...")

def load_and_fix_coordinates(data_path, model_pattern, chunks, bbox):
    """
    Cargar archivos CMIP6 y corregir coordenadas si es necesario
    """
    # Buscar archivos
    nc_files = list(data_path.glob(f"*{model_pattern}*.nc"))
    if not nc_files:
        return None
        
    print(f"  üìÅ Cargando {len(nc_files)} archivos...")
    
    # Cargar primer archivo para verificar coordenadas
    first_ds = xr.open_dataset(sorted(nc_files)[0], engine='netcdf4')
    
    print(f"  üìä Coordenadas originales:")
    print(f"    Latitud: {first_ds.lat.values.min():.2f} a {first_ds.lat.values.max():.2f}")
    print(f"    Longitud: {first_ds.lon.values.min():.2f} a {first_ds.lon.values.max():.2f}")
    
    # Verificar si necesita conversi√≥n de longitud
    needs_conversion = first_ds.lon.values.max() > 180
    first_ds.close()
    
    # Cargar y procesar todos los archivos
    datasets = []
    for nc_file in sorted(nc_files):
        ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
        
        # Convertir coordenadas si es necesario
        if needs_conversion:
            ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180))
            ds = ds.sortby('lon')
        
        datasets.append(ds)
    
    # Concatenar
    if len(datasets) > 1:
        print(f"  üîó Concatenando {len(datasets)} datasets...")
        combined_ds = xr.concat(datasets, dim='time')
    else:
        combined_ds = datasets[0]
    
    print(f"  üìä Coordenadas despu√©s de conversi√≥n:")
    print(f"    Latitud: {combined_ds.lat.values.min():.2f} a {combined_ds.lat.values.max():.2f}")
    print(f"    Longitud: {combined_ds.lon.values.min():.2f} a {combined_ds.lon.values.max():.2f}")
    
    # Estandarizar unidades
    pr_da = standardize_units(combined_ds['pr'], 'pr')
    
    # Aplicar recorte espacial
    print(f"  üéØ Aplicando recorte espacial para bbox: {bbox}")
    pr_subset = spatial_subset(pr_da, bbox)
    
    print(f"  ‚úÇÔ∏è Shape despu√©s del recorte: {pr_subset.shape}")
    
    return pr_subset, combined_ds

# Recargar ACCESS-CM2 con correcci√≥n de coordenadas
pr_path = CMIP6_PATH / "pr"
pr_subset_fixed, ds_full = load_and_fix_coordinates(pr_path, 'ACCESS-CM2', CHUNKS, BBOX)

if pr_subset_fixed is not None and pr_subset_fixed.shape[1] > 0 and pr_subset_fixed.shape[2] > 0:
    print("‚úÖ Datos cargados exitosamente con coordenadas corregidas")
    
    # Actualizar el dataset
    cmip6_datasets['ACCESS-CM2']['data'] = pr_subset_fixed
    
    print(f"üìè Shape final: {pr_subset_fixed.shape}")
    print(f"üó∫Ô∏è Rango espacial final:")
    print(f"  Latitud: {pr_subset_fixed.lat.values.min():.3f} a {pr_subset_fixed.lat.values.max():.3f}")
    print(f"  Longitud: {pr_subset_fixed.lon.values.min():.3f} a {pr_subset_fixed.lon.values.max():.3f}")
    
    # Calcular estad√≠sticos b√°sicos
    mean_pr = float(pr_subset_fixed.mean().values)
    max_pr = float(pr_subset_fixed.max().values)
    min_pr = float(pr_subset_fixed.min().values)
    print(f"üìä Estad√≠sticos b√°sicos:")
    print(f"  Precipitaci√≥n promedio: {mean_pr:.3f} mm/day")
    print(f"  Precipitaci√≥n m√≠nima: {min_pr:.3f} mm/day")
    print(f"  Precipitaci√≥n m√°xima: {max_pr:.3f} mm/day")
    
    # Verificar per√≠odo temporal
    print(f"üìÖ Per√≠odo temporal: {pr_subset_fixed.time.values[0]} a {pr_subset_fixed.time.values[-1]}")
    print(f"   Total de d√≠as: {len(pr_subset_fixed.time)}")
    
else:
    print("‚ùå No se pudieron cargar datos v√°lidos para la regi√≥n")

# Limpiar memoria
if 'ds_full' in locals():
    ds_full.close()

print("\n‚úÖ Correcci√≥n de coordenadas completada")

üîç VERIFICANDO SISTEMA DE COORDENADAS...
üîÑ Recargando datos con correcci√≥n de coordenadas...
  üìÅ Cargando 4 archivos...
  üìä Coordenadas originales:
    Latitud: -89.38 a 89.38
    Longitud: 0.94 a 359.06
  üîó Concatenando 4 datasets...
  üìä Coordenadas despu√©s de conversi√≥n:
    Latitud: -89.38 a 89.38
    Longitud: -179.06 a 179.06
  üéØ Aplicando recorte espacial para bbox: {'lat_min': -33.27, 'lat_max': -32.26, 'lon_min': -71.89, 'lon_max': -70.0}
  ‚úÇÔ∏è Shape despu√©s del recorte: (60265, 1, 1)
‚úÖ Datos cargados exitosamente con coordenadas corregidas
üìè Shape final: (60265, 1, 1)
üó∫Ô∏è Rango espacial final:
  Latitud: -33.125 a -33.125
  Longitud: -70.312 a -70.312


  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')
  ds = xr.open_dataset(nc_file, chunks=chunks, engine='netcdf4')


üìä Estad√≠sticos b√°sicos:
  Precipitaci√≥n promedio: 0.894 mm/day
  Precipitaci√≥n m√≠nima: 0.000 mm/day
  Precipitaci√≥n m√°xima: 58.350 mm/day
üìÖ Per√≠odo temporal: 1850-01-01T12:00:00.000000000 a 2014-12-31T12:00:00.000000000
   Total de d√≠as: 60265

‚úÖ Correcci√≥n de coordenadas completada


  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_

In [29]:
# Funci√≥n de regridding para alinear grids

def regrid_to_reference(source_da, target_da, method='bilinear'):
    """
    Regriddear datos fuente al grid del dataset de referencia
    Usa xESMF si est√° disponible, sino rioxarray
    """
    print(f"  Regridding de {source_da.shape} a grid de referencia...")
    
    try:
        if xe is not None:
            # Usar xESMF si est√° disponible
            print("  Usando xESMF para regridding...")
            regridder = xe.Regridder(
                source_da, 
                target_da, 
                method=method,
                periodic=False,
                ignore_degenerate=True
            )
            
            regridded = regridder(source_da)
            regridder.clean_weight_file()
            
        else:
            # Usar rioxarray como alternativa
            print("  Usando rioxarray para regridding...")
            
            # Asignar CRS si no existe
            if source_da.rio.crs is None:
                source_da = source_da.rio.write_crs("EPSG:4326")
            if target_da.rio.crs is None:
                target_da = target_da.rio.write_crs("EPSG:4326")
            
            # Hacer regridding usando rioxarray
            regridded = source_da.rio.reproject_match(target_da, resampling=1)  # bilinear
        
        print(f"  ‚úì Regridding completado: {regridded.shape}")
        return regridded
        
    except Exception as e:
        print(f"  ‚ùå Error en regridding: {e}")
        
        # Fallback: interpolaci√≥n simple con xarray
        try:
            print("  Intentando interpolaci√≥n simple como fallback...")
            regridded = source_da.interp(
                lat=target_da.lat,
                lon=target_da.lon,
                method='linear'
            )
            print(f"  ‚úì Interpolaci√≥n completada: {regridded.shape}")
            return regridded
        except Exception as e2:
            print(f"  ‚ùå Fallback tambi√©n fall√≥: {e2}")
            return None

# Aplicar regridding a datos CMIP6
print("Aplicando regridding a datos CMIP6...")

regridded_datasets = {}

for model_name, model_info in cmip6_datasets.items():
    print(f"\n--- Regridding {model_name.upper()} ---")
    
    try:
        # Obtener datos originales
        cmip6_data = model_info['data']
        
        # Aplicar regridding al grid de CR2MET
        regridded_data = regrid_to_reference(cmip6_data, cr2met_pr, method='bilinear')
        
        if regridded_data is not None:
            # Actualizar informaci√≥n del modelo
            regridded_datasets[model_name] = {
                **model_info,
                'regridded_data': regridded_data
            }
            
            print(f"  ‚úì {model_name.upper()} regridded exitosamente")
            print(f"    Shape final: {regridded_data.shape}")
            
            # Crear directorio de salida para datos regridded
            output_dir = OUTPUT_PATH / "regridded" / model_name / "pr" / "historical"
            output_dir.mkdir(parents=True, exist_ok=True)
            
            # Guardar datos regridded (opcional, para verificaci√≥n)
            output_file = output_dir / f"pr_{model_name}_historical_regridded_cr2metgrid.nc"
            print(f"  Guardando regridded en: {output_file}")
            
            regridded_data.to_netcdf(
                output_file,
                encoding={'pr': {'zlib': True, 'complevel': 4}}
            )
            
        else:
            print(f"  ‚ùå Regridding fall√≥ para {model_name}")
            
    except Exception as e:
        print(f"  ‚ùå Error general en regridding de {model_name}: {e}")
        continue

print(f"\n‚úì Regridding completado para {len(regridded_datasets)} modelos")

Aplicando regridding a datos CMIP6...

--- Regridding ACCESS-CM2 ---
  Regridding de (60265, 1, 1) a grid de referencia...
  Usando rioxarray para regridding...
  ‚ùå Error en regridding: y dimension not found. 'rio.set_spatial_dims()' or using 'rename()' to change the dimension name to 'y' can address this. Data variable: pr
  Intentando interpolaci√≥n simple como fallback...
  ‚úì Interpolaci√≥n completada: (60265, 20, 38)
  ‚úì ACCESS-CM2 regridded exitosamente
    Shape final: (60265, 20, 38)
  Guardando regridded en: /home/aninotna/magister/tesis/justh2_pipeline/out/regridded/ACCESS-CM2/pr/historical/pr_ACCESS-CM2_historical_regridded_cr2metgrid.nc

‚úì Regridding completado para 1 modelos

‚úì Regridding completado para 1 modelos


In [31]:
# Entrenamiento de Bias Correction con Quantile Mapping

print("Iniciando entrenamiento de bias correction...")

trained_adjustments = {}
training_log = []

for model_name, model_info in regridded_datasets.items():
    print(f"\n=== ENTRENANDO BIAS CORRECTION: {model_name.upper()} ===")
    
    try:
        # Obtener datos regridded
        cmip6_regridded = model_info['regridded_data']
        
        # Alinear calendarios y encontrar per√≠odo de traslape
        ref_aligned, sim_aligned, overlap_start, overlap_end = align_calendars(cr2met_pr, cmip6_regridded)
        
        print(f"Datos para entrenamiento:")
        print(f"  CR2MET: {ref_aligned.shape}")
        print(f"  {model_name}: {sim_aligned.shape}")
        
        # Cargar datos en memoria para evitar problemas con chunking
        print("  Cargando datos en memoria para entrenamiento...")
        ref_aligned = ref_aligned.load()
        sim_aligned = sim_aligned.load()
        print("  ‚úì Datos cargados en memoria")
        
        # Aplicar wet-day adjustment para precipitaci√≥n
        ref_adjusted, ref_wet_days = apply_wet_day_adjustment(ref_aligned, threshold=0.1)
        sim_adjusted, sim_wet_days = apply_wet_day_adjustment(sim_aligned, threshold=0.1)
        
        print(f"  D√≠as h√∫medos CR2MET: {ref_wet_days.sum().values} de {len(ref_wet_days.time)}")
        print(f"  D√≠as h√∫medos {model_name}: {sim_wet_days.sum().values} de {len(sim_wet_days.time)}")
        
        # Verificar que hay d√≠as h√∫medos en ambos datasets
        if sim_wet_days.sum().values == 0:
            print("  ‚ö†Ô∏è No hay d√≠as h√∫medos en datos simulados, revisando umbrales...")
            # Intentar con umbral m√°s bajo
            sim_adjusted_low, sim_wet_days_low = apply_wet_day_adjustment(sim_aligned, threshold=0.01)
            print(f"  D√≠as h√∫medos {model_name} (umbral 0.01): {sim_wet_days_low.sum().values}")
            if sim_wet_days_low.sum().values > 0:
                sim_adjusted = sim_adjusted_low
                sim_wet_days = sim_wet_days_low
                print("  ‚úì Usando umbral reducido de 0.01 mm/day")
        
        # Configurar m√©todo de bias correction
        try:
            # Intentar con Detrended Quantile Mapping (DQM) primero
            print("  Intentando Detrended Quantile Mapping (DQM)...")
            adjustment = DetrendedQuantileMapping.train(
                ref=ref_adjusted,
                hist=sim_adjusted,
                nquantiles=50,  # Reducir n√∫mero de cuantiles
                kind='+',  # Aditivo para precipitaci√≥n
                group='time'
            )
            method_used = 'DQM'
            print("  ‚úì DQM entrenado exitosamente")
            
        except Exception as e:
            print(f"  DQM fall√≥ ({e}), intentando Empirical Quantile Mapping (EQM)...")
            try:
                adjustment = EmpiricalQuantileMapping.train(
                    ref=ref_adjusted,
                    hist=sim_adjusted,
                    nquantiles=50,  # Reducir n√∫mero de cuantiles
                    kind='+',  # Aditivo para precipitaci√≥n
                    group='time'
                )
                method_used = 'EQM'
                print("  ‚úì EQM entrenado exitosamente")
                
            except Exception as e2:
                print(f"  ‚ùå Ambos m√©todos fallaron: DQM ({e}), EQM ({e2})")
                training_log.append({
                    'model': model_name,
                    'variable': 'pr',
                    'frequency': model_info['frequency'],
                    'train_start': overlap_start.strftime('%Y-%m-%d'),
                    'train_end': overlap_end.strftime('%Y-%m-%d'),
                    'method': 'FAILED',
                    'status': 'ERROR',
                    'notes': f"DQM: {e}, EQM: {e2}"
                })
                continue
        
        # Guardar adjustment entrenado
        params_dir = OUTPUT_PATH / "bias_params" / model_name / "pr" / "historical"
        params_dir.mkdir(parents=True, exist_ok=True)
        
        params_file = params_dir / f"pr_{model_name}_historical_{method_used.lower()}_params_{overlap_start.strftime('%Y')}_{overlap_end.strftime('%Y')}.nc"
        
        print(f"  Guardando par√°metros en: {params_file}")
        adjustment.save(params_file)
        
        # Almacenar informaci√≥n
        trained_adjustments[model_name] = {
            'adjustment': adjustment,
            'method': method_used,
            'training_period': (overlap_start, overlap_end),
            'params_file': params_file
        }
        
        # Log exitoso
        training_log.append({
            'model': model_name,
            'variable': 'pr',
            'frequency': model_info['frequency'],
            'train_start': overlap_start.strftime('%Y-%m-%d'),
            'train_end': overlap_end.strftime('%Y-%m-%d'),
            'method': method_used,
            'status': 'SUCCESS',
            'notes': f"Entrenado con {len(ref_aligned.time)} timesteps"
        })
        
        print(f"  ‚úì Bias correction entrenado para {model_name.upper()}")
        
    except Exception as e:
        print(f"  ‚ùå Error general entrenando {model_name}: {e}")
        import traceback
        traceback.print_exc()
        training_log.append({
            'model': model_name,
            'variable': 'pr',
            'frequency': model_info.get('frequency', 'unknown'),
            'train_start': 'N/A',
            'train_end': 'N/A', 
            'method': 'N/A',
            'status': 'ERROR',
            'notes': str(e)
        })
        continue

print(f"\n‚úì Entrenamiento completado para {len(trained_adjustments)} modelos")

# Guardar log de entrenamiento
log_df = pd.DataFrame(training_log)
log_file = OUTPUT_PATH / "logs" / "bias_correction_training_log.csv"
log_df.to_csv(log_file, index=False)
print(f"‚úì Log guardado en: {log_file}")
print("\nResumen del entrenamiento:")
print(log_df)

Iniciando entrenamiento de bias correction...

=== ENTRENANDO BIAS CORRECTION: ACCESS-CM2 ===
  Per√≠odo de traslape: 1960-01-01 a 2014-12-31
Datos para entrenamiento:
  CR2MET: (20089, 20, 38)
  ACCESS-CM2: (20089, 20, 38)
  Cargando datos en memoria para entrenamiento...
  ‚úì Datos cargados en memoria
  D√≠as h√∫medos CR2MET: 1983315 de 20089
  D√≠as h√∫medos ACCESS-CM2: 0 de 20089
  ‚ö†Ô∏è No hay d√≠as h√∫medos en datos simulados, revisando umbrales...
  D√≠as h√∫medos ACCESS-CM2 (umbral 0.01): 0
  Intentando Detrended Quantile Mapping (DQM)...
  DQM fall√≥ (`ref` and `hist` have distinct time arrays, this is not supported for DetrendedQuantileMapping adjustment.), intentando Empirical Quantile Mapping (EQM)...
  ‚ùå Ambos m√©todos fallaron: DQM (`ref` and `hist` have distinct time arrays, this is not supported for DetrendedQuantileMapping adjustment.), EQM (`ref` and `hist` have distinct time arrays, this is not supported for EmpiricalQuantileMapping adjustment.)

‚úì Entrenamient

In [None]:
# Aplicar correcci√≥n de sesgo a datos hist√≥ricos completos

print("Aplicando bias correction a series hist√≥ricas completas...")

corrected_datasets = {}
correction_log = []

for model_name, adjustment_info in trained_adjustments.items():
    print(f"\n=== APLICANDO CORRECCI√ìN: {model_name.upper()} ===")
    
    try:
        # Obtener adjustment entrenado
        adjustment = adjustment_info['adjustment']
        
        # Obtener datos regridded completos
        cmip6_full = regridded_datasets[model_name]['regridded_data']
        
        print(f"  Aplicando {adjustment_info['method']} a serie completa: {cmip6_full.shape}")
        
        # Aplicar correcci√≥n
        print("  Ejecutando bias correction...")
        with ProgressBar():
            corrected_data = adjustment.adjust(cmip6_full, interp='linear')
        
        # Asegurar no-negatividad para precipitaci√≥n
        corrected_data = corrected_data.clip(min=0)
        
        # Actualizar metadatos
        corrected_data.attrs.update({
            'units': 'mm/day',
            'long_name': f'Bias-corrected precipitation from {model_name}',
            'bias_correction_method': adjustment_info['method'],
            'bias_correction_reference': 'CR2MET',
            'training_period': f"{adjustment_info['training_period'][0].strftime('%Y-%m-%d')} to {adjustment_info['training_period'][1].strftime('%Y-%m-%d')}",
            'history': f"Bias-corrected on {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}",
            'source_model': model_name,
            'regridded_to': 'CR2MET grid'
        })
        
        print(f"  ‚úì Correcci√≥n aplicada: {corrected_data.shape}")
        print(f"    Rango valores: {corrected_data.min().values:.3f} a {corrected_data.max().values:.3f} mm/day")
        
        # Guardar datos corregidos
        corrected_dir = OUTPUT_PATH / "corrected" / model_name / "pr" / "historical"
        corrected_dir.mkdir(parents=True, exist_ok=True)
        
        # Nombre del archivo con informaci√≥n del per√≠odo de entrenamiento
        train_start = adjustment_info['training_period'][0].strftime('%Y')
        train_end = adjustment_info['training_period'][1].strftime('%Y')
        
        corrected_file = corrected_dir / f"pr_{model_name}_historical_corrected_cr2metgrid_{train_start}_{train_end}.nc"
        
        print(f"  Guardando datos corregidos: {corrected_file}")
        
        # Encoding para compresi√≥n
        encoding = {
            'pr': {
                'zlib': True, 
                'complevel': 4,
                'dtype': 'float32'
            }
        }
        
        corrected_data.to_netcdf(corrected_file, encoding=encoding)
        
        # Almacenar informaci√≥n
        corrected_datasets[model_name] = {
            'data': corrected_data,
            'file': corrected_file,
            'method': adjustment_info['method'],
            'training_period': adjustment_info['training_period']
        }
        
        # Log exitoso
        correction_log.append({
            'model': model_name,
            'variable': 'pr',
            'method': adjustment_info['method'],
            'train_period': f"{train_start}-{train_end}",
            'output_file': str(corrected_file),
            'min_value': float(corrected_data.min().values),
            'max_value': float(corrected_data.max().values),
            'mean_value': float(corrected_data.mean().values),
            'status': 'SUCCESS'
        })
        
        print(f"  ‚úì {model_name.upper()} corregido y guardado exitosamente")
        
    except Exception as e:
        print(f"  ‚ùå Error aplicando correcci√≥n a {model_name}: {e}")
        correction_log.append({
            'model': model_name,
            'variable': 'pr',
            'method': adjustment_info.get('method', 'N/A'),
            'train_period': 'N/A',
            'output_file': 'N/A',
            'min_value': None,
            'max_value': None,
            'mean_value': None,
            'status': 'ERROR'
        })
        continue

print(f"\n‚úì Correcci√≥n aplicada a {len(corrected_datasets)} modelos")

# Guardar log de correcci√≥n
correction_df = pd.DataFrame(correction_log)
correction_log_file = OUTPUT_PATH / "logs" / "bias_correction_application_log.csv"
correction_df.to_csv(correction_log_file, index=False)
print(f"‚úì Log de correcci√≥n guardado en: {correction_log_file}")
print("\nResumen de la aplicaci√≥n:")
print(correction_df)

In [None]:
# Validaci√≥n r√°pida y estad√≠sticos comparativos

print("Realizando validaci√≥n de la correcci√≥n de sesgo...")

validation_results = []

for model_name, corrected_info in corrected_datasets.items():
    print(f"\n--- Validaci√≥n {model_name.upper()} ---")
    
    try:
        # Obtener per√≠odo de entrenamiento
        train_start, train_end = corrected_info['training_period']
        
        # Recortar datos al per√≠odo de entrenamiento para validaci√≥n
        ref_validation = cr2met_pr.sel(time=slice(train_start, train_end))
        corrected_validation = corrected_info['data'].sel(time=slice(train_start, train_end))
        original_validation = regridded_datasets[model_name]['regridded_data'].sel(time=slice(train_start, train_end))
        
        # Calcular estad√≠sticos espaciales promedio
        ref_mean = float(ref_validation.mean().values)
        ref_p10 = float(ref_validation.quantile(0.1).values)
        ref_p90 = float(ref_validation.quantile(0.9).values)
        
        orig_mean = float(original_validation.mean().values)
        orig_p10 = float(original_validation.quantile(0.1).values)
        orig_p90 = float(original_validation.quantile(0.9).values)
        
        corr_mean = float(corrected_validation.mean().values)
        corr_p10 = float(corrected_validation.quantile(0.1).values)
        corr_p90 = float(corrected_validation.quantile(0.9).values)
        
        # Calcular sesgos
        orig_bias_mean = orig_mean - ref_mean
        corr_bias_mean = corr_mean - ref_mean
        
        orig_bias_p10 = orig_p10 - ref_p10
        corr_bias_p10 = corr_p10 - ref_p10
        
        orig_bias_p90 = orig_p90 - ref_p90
        corr_bias_p90 = corr_p90 - ref_p90
        
        print(f"  Estad√≠sticos en per√≠odo de entrenamiento ({train_start.strftime('%Y')} - {train_end.strftime('%Y')}):")
        print(f"    CR2MET    - Media: {ref_mean:.3f}, P10: {ref_p10:.3f}, P90: {ref_p90:.3f} mm/day")
        print(f"    Original  - Media: {orig_mean:.3f}, P10: {orig_p10:.3f}, P90: {orig_p90:.3f} mm/day")
        print(f"    Corregido - Media: {corr_mean:.3f}, P10: {corr_p10:.3f}, P90: {corr_p90:.3f} mm/day")
        print(f"  Sesgo original - Media: {orig_bias_mean:+.3f}, P10: {orig_bias_p10:+.3f}, P90: {orig_bias_p90:+.3f} mm/day")
        print(f"  Sesgo corregido - Media: {corr_bias_mean:+.3f}, P10: {corr_bias_p10:+.3f}, P90: {corr_bias_p90:+.3f} mm/day")
        
        # Calcular mejora relativa
        improvement_mean = abs(orig_bias_mean) - abs(corr_bias_mean)
        improvement_p10 = abs(orig_bias_p10) - abs(corr_bias_p10)
        improvement_p90 = abs(orig_bias_p90) - abs(corr_bias_p90)
        
        print(f"  Mejora absoluta - Media: {improvement_mean:+.3f}, P10: {improvement_p10:+.3f}, P90: {improvement_p90:+.3f} mm/day")
        
        # Almacenar resultados
        validation_results.append({
            'model': model_name,
            'ref_mean': ref_mean,
            'ref_p10': ref_p10,
            'ref_p90': ref_p90,
            'orig_mean': orig_mean,
            'orig_p10': orig_p10,
            'orig_p90': orig_p90,
            'corr_mean': corr_mean,
            'corr_p10': corr_p10,
            'corr_p90': corr_p90,
            'orig_bias_mean': orig_bias_mean,
            'orig_bias_p10': orig_bias_p10,
            'orig_bias_p90': orig_bias_p90,
            'corr_bias_mean': corr_bias_mean,
            'corr_bias_p10': corr_bias_p10,
            'corr_bias_p90': corr_bias_p90,
            'improvement_mean': improvement_mean,
            'improvement_p10': improvement_p10,
            'improvement_p90': improvement_p90,
            'method': corrected_info['method']
        })
        
    except Exception as e:
        print(f"  ‚ùå Error en validaci√≥n de {model_name}: {e}")
        continue

# Guardar resultados de validaci√≥n
if validation_results:
    validation_df = pd.DataFrame(validation_results)
    validation_file = OUTPUT_PATH / "logs" / "bias_correction_validation.csv"
    validation_df.to_csv(validation_file, index=False)
    print(f"\n‚úì Resultados de validaci√≥n guardados en: {validation_file}")
    
    # Mostrar resumen
    print("\n=== RESUMEN DE VALIDACI√ìN ===")
    print("Mejora en sesgo absoluto (valores positivos = mejora):")
    summary_cols = ['model', 'method', 'improvement_mean', 'improvement_p10', 'improvement_p90']
    print(validation_df[summary_cols].round(3))
else:
    print("‚ùå No se pudieron generar resultados de validaci√≥n")

In [None]:
# Visualizaci√≥n b√°sica y resumen final

print("Generando visualizaciones b√°sicas...")

if validation_results:
    # Configurar matplotlib
    plt.style.use('default')
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    fig.suptitle('Validaci√≥n de Bias Correction - Precipitaci√≥n', fontsize=14, fontweight='bold')
    
    # Datos para plotting
    models = [r['model'] for r in validation_results]
    ref_means = [r['ref_mean'] for r in validation_results]
    orig_means = [r['orig_mean'] for r in validation_results]
    corr_means = [r['corr_mean'] for r in validation_results]
    
    orig_bias_means = [r['orig_bias_mean'] for r in validation_results]
    corr_bias_means = [r['corr_bias_mean'] for r in validation_results]
    
    # 1. Medias comparativas
    x_pos = np.arange(len(models))
    width = 0.25
    
    axes[0,0].bar(x_pos - width, ref_means, width, label='CR2MET', alpha=0.8, color='blue')
    axes[0,0].bar(x_pos, orig_means, width, label='Original', alpha=0.8, color='red')
    axes[0,0].bar(x_pos + width, corr_means, width, label='Corregido', alpha=0.8, color='green')
    axes[0,0].set_xlabel('Modelos')
    axes[0,0].set_ylabel('Precipitaci√≥n media (mm/day)')
    axes[0,0].set_title('Precipitaci√≥n Media')
    axes[0,0].set_xticks(x_pos)
    axes[0,0].set_xticklabels([m.upper() for m in models], rotation=45)
    axes[0,0].legend()
    axes[0,0].grid(True, alpha=0.3)
    
    # 2. Sesgos antes y despu√©s
    axes[0,1].bar(x_pos - width/2, orig_bias_means, width, label='Sesgo Original', alpha=0.8, color='red')
    axes[0,1].bar(x_pos + width/2, corr_bias_means, width, label='Sesgo Corregido', alpha=0.8, color='green')
    axes[0,1].axhline(y=0, color='black', linestyle='-', alpha=0.5)
    axes[0,1].set_xlabel('Modelos')
    axes[0,1].set_ylabel('Sesgo (mm/day)')
    axes[0,1].set_title('Sesgo en Precipitaci√≥n Media')
    axes[0,1].set_xticks(x_pos)
    axes[0,1].set_xticklabels([m.upper() for m in models], rotation=45)
    axes[0,1].legend()
    axes[0,1].grid(True, alpha=0.3)
    
    # 3. Mejora absoluta
    improvements = [r['improvement_mean'] for r in validation_results]
    colors = ['green' if x > 0 else 'red' for x in improvements]
    axes[1,0].bar(x_pos, improvements, color=colors, alpha=0.7)
    axes[1,0].axhline(y=0, color='black', linestyle='-', alpha=0.5)
    axes[1,0].set_xlabel('Modelos')
    axes[1,0].set_ylabel('Mejora en sesgo absoluto (mm/day)')
    axes[1,0].set_title('Mejora despu√©s de Bias Correction')
    axes[1,0].set_xticks(x_pos)
    axes[1,0].set_xticklabels([m.upper() for m in models], rotation=45)
    axes[1,0].grid(True, alpha=0.3)
    
    # 4. Tabla resumen
    axes[1,1].axis('tight')
    axes[1,1].axis('off')
    
    table_data = []
    for r in validation_results:
        table_data.append([
            r['model'].upper(),
            r['method'],
            f"{r['orig_bias_mean']:+.2f}",
            f"{r['corr_bias_mean']:+.2f}",
            f"{r['improvement_mean']:+.2f}"
        ])
    
    table = axes[1,1].table(
        cellText=table_data,
        colLabels=['Modelo', 'M√©todo', 'Sesgo Orig.', 'Sesgo Corr.', 'Mejora'],
        cellLoc='center',
        loc='center',
        bbox=[0, 0, 1, 1]
    )
    table.auto_set_font_size(False)
    table.set_fontsize(9)
    table.scale(1, 1.5)
    axes[1,1].set_title('Resumen Estad√≠stico')
    
    plt.tight_layout()
    
    # Guardar figura
    plot_file = OUTPUT_PATH / "logs" / "bias_correction_validation_plot.png"
    plt.savefig(plot_file, dpi=300, bbox_inches='tight')
    print(f"‚úì Gr√°fico guardado en: {plot_file}")
    plt.show()

print("\n" + "="*60)
print("RESUMEN FINAL DEL PIPELINE DE BIAS CORRECTION")
print("="*60)

print(f"\nüìÅ ARCHIVOS GENERADOS:")
print(f"  Regridded: {OUTPUT_PATH}/regridded/{{MODEL}}/pr/historical/")
print(f"  Par√°metros: {OUTPUT_PATH}/bias_params/{{MODEL}}/pr/historical/")
print(f"  Corregidos: {OUTPUT_PATH}/corrected/{{MODEL}}/pr/historical/")
print(f"  Logs: {OUTPUT_PATH}/logs/")

print(f"\nüìä MODELOS PROCESADOS:")
for model_name in corrected_datasets.keys():
    corrected_file = corrected_datasets[model_name]['file']
    method = corrected_datasets[model_name]['method']
    print(f"  ‚úì {model_name.upper()}: {method} ‚Üí {corrected_file.name}")

if validation_results:
    print(f"\nüìà VALIDACI√ìN:")
    avg_improvement = np.mean([r['improvement_mean'] for r in validation_results])
    print(f"  Mejora promedio en sesgo: {avg_improvement:+.3f} mm/day")
    best_model = max(validation_results, key=lambda x: x['improvement_mean'])
    print(f"  Mejor correcci√≥n: {best_model['model'].upper()} ({best_model['improvement_mean']:+.3f} mm/day)")

print(f"\nüîÑ PR√ìXIMOS PASOS:")
print(f"  1. Repetir proceso para tmax y tmin")
print(f"  2. Aplicar par√°metros entrenados a escenarios SSP")
print(f"  3. Validar con m√©tricas de extremos clim√°ticos")

print(f"\n‚úÖ PIPELINE COMPLETADO EXITOSAMENTE")
print("="*60)