# Climate Data Extraction for SWAT Input

## Overview
This script extracts bias-corrected climate data and converts it into SWAT-compatible text files. It processes different climate parameters (precipitation, temperature, humidity, wind, and radiation) for multiple models and scenarios.

## Steps Performed:
1. **Define Directories**  
   - Paths to bias-corrected precipitation data, standardized other climate data, stations shapefile, and output folder.

2. **Parameter Configuration**  
   - Defines climate parameters and their corresponding SWAT metadata filenames.

3. **Set Scenarios and Time Range**  
   - Specifies climate scenarios (`ssp245`, `ssp585`) and forecast years (2025–2100).

4. **Load Grid Points**  
   - Reads centroid locations from the shapefile to extract point-based climate data.

5. **Data Extraction Process**
   - Loops over models, scenarios, and parameters.
   - Extracts data for each grid point in the shapefile.
   - Selects nearest latitude/longitude index in the NetCDF files.
   - Handles precipitation separately from other parameters.
   - Saves extracted data into SWAT input files.

6. **Save Metadata for SWAT**  
   - Generates SWAT-compatible metadata files such as `pcp.txt`, `tmp.txt`, etc.

## Instructions for Use:
- Ensure the climate datasets and shapefile are available in the specified directories.
- Update paths if necessary.
- Run the script to generate SWAT input files.

### Output:
- SWAT-ready text files containing daily climate data.
- Metadata files specifying location details.

**Final Message:**
_"SWAT data extraction completed."_  


In [None]:
import os
import glob
import xarray as xr
import pandas as pd
import geopandas as gpd

# -------------------------------------------------------------------
# 1. Define your directories
# -------------------------------------------------------------------
bias_corrected_root = r"D:\Hesham\WhiteNile\CMIP6-BiasCorrection-SWAT\workingfolder\bias_corrected"
standardized_root   = r"D:\Hesham\WhiteNile\CMIP6-BiasCorrection-SWAT\workingfolder\converted_data"
grid_shapefile_path = r"D:\Hesham\WhiteNile\CMIP6-BiasCorrection-SWAT\workingfolder\stations_shapefile\Centroid.shp"  # Add your stations shapefile
output_root_folder  = r"D:\Hesham\WhiteNile\CMIP6-BiasCorrection-SWAT\workingfolder\SWAT_INPUT"   
os.makedirs(output_root_folder, exist_ok=True)


# -------------------------------------------------------------------
# 2. Define parameters and their SWAT metadata file names
# -------------------------------------------------------------------
parameters = {
    "tasmax_tasmin": {"var_name": ("tasmax", "tasmin"), "metadata_name": "tmp.txt"},
    "hurs":          {"var_name": "hurs",   "metadata_name": "rh.txt"},
    "rsds":          {"var_name": "rsds",   "metadata_name": "solar.txt"},
    "sfcWind":       {"var_name": "sfcWind","metadata_name": "wind.txt"},
    "pr":            {"var_name": "pr",     "metadata_name": "pcp.txt"},  # Precipitation
}

# -------------------------------------------------------------------
# 3. Time range and scenarios
# -------------------------------------------------------------------
scenarios = ["ssp245", "ssp585"]
years = range(2025, 2101)  # 2025 through 2100

# -------------------------------------------------------------------
# 4. Load grid points from the shapefile
# -------------------------------------------------------------------
centroids_gdf = gpd.read_file(grid_shapefile_path)

# -------------------------------------------------------------------
# 5. Main loop over each parameter
# -------------------------------------------------------------------
for param, param_config in parameters.items():
    param_metadata = []  # Will store info like [ID, NAME, LAT, LONG, ELEVATION]

    # Either specify your models explicitly or list them from the standardized folder
    models = [d for d in os.listdir(standardized_root) if os.path.isdir(os.path.join(standardized_root, d))]
    
    for model in models:
        print(f"Processing model: {model}")

        for scenario in scenarios:
            print(f"  Processing scenario: {scenario}")

            # Prepare scenario-specific output folder
            scenario_output_folder = os.path.join(output_root_folder, f"{model}_{scenario}")
            os.makedirs(scenario_output_folder, exist_ok=True)
            param_output_folder = os.path.join(scenario_output_folder, param)
            os.makedirs(param_output_folder, exist_ok=True)

            # -------------------------------------------------------------------
            # Loop over each point in the shapefile
            # -------------------------------------------------------------------
            for index, centroid in centroids_gdf.iterrows():
                point_id = index + 1
                lon, lat = centroid.geometry.x, centroid.geometry.y
                point_name = f"{param[:3]}{str(point_id).zfill(3)}"
                elevation = 0  # Placeholder if you don't have real elevation data
                txt_file_path = os.path.join(param_output_folder, f"{point_name}.txt")

                # Skip if the file already exists
                if os.path.exists(txt_file_path):
                    print(f"File already exists, skipping: {txt_file_path}")
                    continue

                # We will accumulate daily values across all years into this list
                daily_values = []
                start_date = None

                # -------------------------------------------------------------------
                # Loop over each year from 2025 to 2100
                # -------------------------------------------------------------------
                for year in years:
                    # -------------------------------------------------------------------
                    # 5.1 Construct the NetCDF path differently for precipitation vs. others
                    # -------------------------------------------------------------------
                    if param == "pr":
                        # Precipitation from bias-corrected directory
                        # e.g.: bias_corrected_{model}_{scenario}_{year}.nc
                        nc_file_path = os.path.join(
                            bias_corrected_root, 
                            model,
                            scenario,
                            f"bias_corrected_{model}_{scenario}_{year}.nc"
                        )
                        if not os.path.exists(nc_file_path):
                            print(f"    File not found: {nc_file_path} => skipping precipitation.")
                            continue

                        # Just 1 file to open
                        ds = xr.open_dataset(nc_file_path)
                        data_var = param_config["var_name"]
                        
                        lon_idx = abs(ds["lon"].values - lon).argmin()
                        lat_idx = abs(ds["lat"].values - lat).argmin()
                        time_values = ds.indexes["time"]
                        data_values = ds[data_var][:, lat_idx, lon_idx].values

                    elif param == "tasmax_tasmin":
                        # We have 2 parameters: tasmax and tasmin
                        # We use glob to handle the dynamic portion of the filename
                        
                        # Example pattern: tasmax_day_{model}_{scenario}_*_{year}_*_clipped.nc
                        tasmax_pattern = os.path.join(
                            standardized_root, 
                            model, 
                            scenario, 
                            "tasmax",
                            f"tasmax_day_{model}_{scenario}_*_{year}_*_clipped.nc"
                        )
                        tasmin_pattern = os.path.join(
                            standardized_root, 
                            model, 
                            scenario, 
                            "tasmin",
                            f"tasmin_day_{model}_{scenario}_*_{year}_*_clipped.nc"
                        )

                        tasmax_files = glob.glob(tasmax_pattern)
                        tasmin_files = glob.glob(tasmin_pattern)

                        if not (tasmax_files and tasmin_files):
                            print(f"    Missing tasmax or tasmin files for year={year}, skipping.")
                            continue

                        # Pick the first match if multiple
                        tasmax_file = tasmax_files[0]
                        tasmin_file = tasmin_files[0]

                        tasmax_ds = xr.open_dataset(tasmax_file)
                        tasmin_ds = xr.open_dataset(tasmin_file)

                        tasmax_var, tasmin_var = param_config["var_name"]

                        lon_idx = abs(tasmax_ds["lon"].values - lon).argmin()
                        lat_idx = abs(tasmax_ds["lat"].values - lat).argmin()
                        time_values = tasmax_ds.indexes["time"]
                        tasmax_values = tasmax_ds[tasmax_var][:, lat_idx, lon_idx].values
                        tasmin_values = tasmin_ds[tasmin_var][:, lat_idx, lon_idx].values

                    else:
                        # For the other parameters (hurs, rsds, sfcWind),
                        # we use the standardized folder with a wildcard
                        # e.g.: hurs_day_{model}_{scenario}_*_{year}_*_clipped.nc
                        pattern = os.path.join(
                            standardized_root,
                            model,
                            scenario,
                            param,
                            f"{param}_day_{model}_{scenario}_*_{year}_*_clipped.nc"
                        )
                        files = glob.glob(pattern)
                        if not files:
                            print(f"    No file match for {param}, year={year} => skipping.")
                            continue

                        nc_file_path = files[0]  # pick first if multiple
                        ds = xr.open_dataset(nc_file_path)

                        data_var = param_config["var_name"]
                        lon_idx = abs(ds["lon"].values - lon).argmin()
                        lat_idx = abs(ds["lat"].values - lat).argmin()
                        time_values = ds.indexes["time"]
                        data_values = ds[data_var][:, lat_idx, lon_idx].values

                    # -------------------------------------------------------------------
                    # 5.2 Store daily data
                    # -------------------------------------------------------------------
                    if start_date is None and len(time_values) > 0:
                        # Convert the first time to a string (YYYYMMDD)
                        start_date = pd.to_datetime(time_values[0]).strftime("%Y%m%d")

                    # If we are in tasmax_tasmin, we have separate arrays
                    if param == "tasmax_tasmin":
                        daily_values.extend([
                            f"{tx:.2f},{tn:.2f}"
                            for tx, tn in zip(tasmax_values, tasmin_values)
                        ])
                    else:
                        # We have a single array (pr, hurs, rsds, sfcWind)
                        daily_values.extend([
                            f"{val:.2f}"
                            for val in data_values
                        ])

                # -------------------------------------------------------------------
                # 5.3 Write daily data to the SWAT text file
                # -------------------------------------------------------------------
                if len(daily_values) > 0:
                    with open(txt_file_path, "w") as txt_file:
                        txt_file.write(f"{start_date}\n")
                        txt_file.write("\n".join(daily_values))
                    print(f"      Data written for point {point_name} => {txt_file_path}")
                else:
                    print(f"      No data to write for point {point_name} ({param}).")

                # Add metadata for this point
                param_metadata.append([point_id, point_name, lat, lon, elevation])

    # -------------------------------------------------------------------
    # 6. Save parameter-level metadata (e.g., tmp.txt, pcp.txt, etc.)
    # -------------------------------------------------------------------
    if len(param_metadata) > 0:
        metadata_df = pd.DataFrame(param_metadata, columns=["ID", "NAME", "LAT", "LONG", "ELEVATION"])
        metadata_file_path = os.path.join(output_root_folder, parameters[param]["metadata_name"])
        metadata_df.to_csv(metadata_file_path, index=False, sep=",")
        print(f"Metadata saved for {param}: {metadata_file_path}")

print("SWAT data extraction completed.")