## Bias Correction Process

This script performs **bias correction** for precipitation data from CMIP6 models using CHRIPS observed precipitation data. The process involves:

### 1. Setting Up Directories
- Define paths for standardized **CMIP6 model data**, **CHRIPS observed data**, and **output files**.
- Ensure that all required datasets are stored in the correct directory structure.

### 2. Loading Calibration Data
- Load observed precipitation data (`CHIRPS`) and CMIP6 model data for the overlapping period (2015–2025).
- Standardize the time format and **align observed data to the model grid** for consistency.

### 3. Computing Quantiles
- Compute **quantiles** for both observed and model precipitation data to establish a correction relationship.

### 4. Applying Bias Correction (Quantile Mapping)
- Apply **quantile mapping** to adjust model precipitation data using the computed quantiles.
- Save **bias-corrected calibration data** for the overlapping period (2015-2025).

### 5. Processing Future Projections (2025–2100)
- Load **future CMIP6 model data** for the specified scenarios.
- Apply the **same quantile mapping** to adjust future precipitation projections.
- Save the **bias-corrected future data**.

### Instructions for Use
1. Ensure that **standardized CMIP6 data and CHRIPS observed data** exist in the specified directories.
2. Run the script, and it will:
   - Identify the available models and scenarios.
   - Perform **bias correction using quantile mapping**.
   - Save the corrected outputs in the designated output folder.

### Final Output
- Bias-corrected precipitation data for both:
  - **Calibration period (2015–2025)**.
  - **Future projections (2025–2100)**.
- All corrected data is stored in the **output directory**.


In [None]:
import xarray as xr
import numpy as np
import os
import glob

# === Define your base paths here ===
base_dir = r"D:/CMIP6-BiasCorrection-SWAT/workingfolder/standardized_data/"
observed_base_dir = r"D:/CMIP6-BiasCorrection-SWAT/workingfolder/observed/"
output_dir = r"D:/CMIP6-BiasCorrection-SWAT/workingfolder/bias_corrected/"

os.makedirs(output_dir, exist_ok=True)

# Define calibration and future years
calibration_years = range(2015, 2025)
future_years = range(2025, 2101)

# === Observed data parameters ===
# If your observed data follows a different pattern, adjust here
observed_pattern = "clipped_chirps-v2.0.{year}.days_p25.nc"
observed_variable = "precip"

# Function to standardize time
def standardize_time(ds):
    """Converts time to np.datetime64 if needed."""
    if not np.issubdtype(ds["time"].dtype, np.datetime64):
        ds["time"] = xr.cftime_range(
            start=str(ds["time"].values[0]), periods=len(ds["time"]), freq="D"
        ).to_datetimeindex()
    return ds

# Function to standardize observed data dimensions
def standardize_observed(observed_ds, model_ds):
    """
    Renames the y/x dims in observed data to lat/lon
    and interpolates them onto the model grid.
    """
    observed_ds = observed_ds.rename({"y": "lat", "x": "lon"})
    observed_ds = observed_ds.interp(
        lat=model_ds["lat"], lon=model_ds["lon"], method="linear"
    )
    return observed_ds

# Loop through models and scenarios in the base directory
for model in os.listdir(base_dir):
    model_path = os.path.join(base_dir, model)
    if not os.path.isdir(model_path) or model == "observed":
        # Skip non-directories and the 'observed' folder itself
        continue
    
    for scenario in ["ssp245", "ssp585"]:
        scenario_path = os.path.join(model_path, scenario, "pr")
        if not os.path.exists(scenario_path):
            continue  # Skip if scenario folder doesn't exist
        
        print(f"Processing Model: {model}, Scenario: {scenario}")
        
        # Create an output directory for each scenario
        scenario_output_dir = os.path.join(output_dir, model, scenario)
        os.makedirs(scenario_output_dir, exist_ok=True)
        
        # Prepare lists to collect observed and model data for calibration
        observed_list = []
        model_list = []

        # === Load calibration data ===
        for year in calibration_years:
            # Observed file
            obs_file = os.path.join(observed_base_dir, observed_pattern.format(year=year))
            
            # Model files matching your dynamic pattern:
            # e.g. pr_day_{model}_{scenario}_{pattern}_{year}_{version}_clipped.nc
            mod_files = glob.glob(
                os.path.join(scenario_path, f"pr_day_{model}_{scenario}_*_{year}_*_clipped.nc")
            )
            
            if not os.path.exists(obs_file) or len(mod_files) == 0:
                print(f"Missing data for year {year} (observed or model), skipping...")
                continue
            
            mod_file = mod_files[0]  # If multiple, pick the first or handle as needed
            
            # Open datasets
            obs_ds = xr.open_dataset(obs_file)
            mod_ds = xr.open_dataset(mod_file)
            
            obs_ds = standardize_time(obs_ds)
            mod_ds = standardize_time(mod_ds)
            
            # Interpolate observed onto model grid
            obs_ds = standardize_observed(obs_ds, mod_ds)
            
            # Append the data arrays for calibration
            observed_list.append(obs_ds[observed_variable])
            model_list.append(mod_ds["pr"])

        # If no calibration data found, skip to the next scenario
        if len(observed_list) == 0 or len(model_list) == 0:
            print("No valid calibration data found. Skipping this scenario...")
            continue

        # Concatenate observed and model data over the calibration period
        observed_data = xr.concat(observed_list, dim="time")
        model_data = xr.concat(model_list, dim="time")

        # Compute quantiles
        print("Calculating quantiles...")
        quantiles = np.linspace(0, 1, 1001)
        observed_quantiles = observed_data.quantile(quantiles, dim="time")
        model_quantiles = model_data.quantile(quantiles, dim="time")

        # === Save bias-corrected calibration data ===
        print("Saving bias-corrected calibration data...")
        # We assume the order of 'model_list' corresponds to the order of calibration years
        # If some years were skipped, track them carefully
        used_calibration_years = calibration_years
        for year, mod_data in zip(used_calibration_years, model_list):
            # Flatten so we can apply np.interp
            mod_data_flat = mod_data.stack(z=("lat", "lon"))
            model_quantiles_flat = model_quantiles.stack(z=("lat", "lon"))
            observed_quantiles_flat = observed_quantiles.stack(z=("lat", "lon"))
            
            corrected_calibration_flat = xr.apply_ufunc(
                np.interp,
                mod_data_flat,
                model_quantiles_flat,
                observed_quantiles_flat,
                input_core_dims=[["time"], ["quantile"], ["quantile"]],
                output_core_dims=[["time"]],
                vectorize=True,
                dask="parallelized",
                output_dtypes=[mod_data.dtype],
            )
            
            corrected_calibration = corrected_calibration_flat.unstack("z")
            calibration_ds = xr.Dataset({"pr": corrected_calibration})
            
            output_file_calibration = os.path.join(
                scenario_output_dir, f"bias_corrected_{model}_{scenario}_{year}_calibration.nc"
            )
            calibration_ds.to_netcdf(output_file_calibration)
            print(f"Bias-corrected calibration data saved for {year}: {output_file_calibration}")

        # === Process future data ===
        print("Processing future data...")
        for year in future_years:
            future_files = glob.glob(
                os.path.join(scenario_path, f"pr_day_{model}_{scenario}_*_{year}_*_clipped.nc")
            )
            if len(future_files) == 0:
                print(f"No future model file for {year}, skipping...")
                continue
            
            future_file = future_files[0]  # If multiple, pick first
            future_ds = xr.open_dataset(future_file)
            future_ds = standardize_time(future_ds)
            
            # Flatten future data for quantile mapping
            future_data = future_ds["pr"]
            future_data_flat = future_data.stack(z=("lat", "lon"))
            
            model_quantiles_flat = model_quantiles.stack(z=("lat", "lon"))
            observed_quantiles_flat = observed_quantiles.stack(z=("lat", "lon"))

            corrected_data_flat = xr.apply_ufunc(
                np.interp,
                future_data_flat,
                model_quantiles_flat,
                observed_quantiles_flat,
                input_core_dims=[["time"], ["quantile"], ["quantile"]],
                output_core_dims=[["time"]],
                vectorize=True,
                dask="parallelized",
                output_dtypes=[future_data.dtype],
            )

            corrected_data = corrected_data_flat.unstack("z")
            future_ds["pr"] = corrected_data

            output_file_future = os.path.join(
                scenario_output_dir, f"bias_corrected_{model}_{scenario}_{year}.nc"
            )
            future_ds.to_netcdf(output_file_future)
            print(f"Bias-corrected future data saved for {year}: {output_file_future}")

print("Processing complete!")
