In [6]:
"""
CMIP6 Climate Data Extraction 
---------------------------------
Author: Aurela Sakaj
Date: 2025-04-09

This script extracts and processes climate model data from the CMIP6 archive for specified cities.
It handles temperature (converted to °C) and precipitation (converted to mm/day) data,
combines results from multiple models and scenarios, and saves them as CSV files.
"""

import intake
import xarray as xr
import pandas as pd
import numpy as np
import os
import time

# Configuration

# Dictionary of target cities with their (latitude, longitude) coordinates
LOCATIONS = {
    "Cairo": (30.0444, 31.2357),
    "Helsinki": (60.1695, 24.9354),
    "New_Delhi": (28.6139, 77.2090)  

}

MODELS = ["MPI-ESM1-2-LR", "MIROC6", "EC-Earth3"]
SCENARIOS = ['historical', 'ssp245', 'ssp585']
VARIABLES = ['tas', 'pr']
SAVE_DIR = "CMIP6_extracts"
os.makedirs(SAVE_DIR, exist_ok=True)

# Utility Functions

def extract_and_convert(ds, lat, lon):
    tas = ds['tas'].sel(lat=lat, lon=lon, method='nearest') - 273.15  # K to °C
    pr = ds['pr'].sel(lat=lat, lon=lon, method='nearest') * 86400    # kg/m²/s to mm/day
    df = xr.Dataset({'temperature_C': tas, 'precipitation_mm_per_day': pr}).to_dataframe().reset_index()
    df['time'] = pd.to_datetime(df['time'])
    return df[['time', 'temperature_C', 'precipitation_mm_per_day']]

# Load catalog
print("Loading catalog...")
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

start_time = time.time()

# Main Processing Loop 

for city, coords in LOCATIONS.items():
    print(f"\nProcessing city: {city}")
    city_data = []

    for model in MODELS:
        print(f"  Processing model: {model}")

        # Single search per model for all scenarios and variables
        cat = col.search(
            source_id=model,
            experiment_id=SCENARIOS,
            variable_id=VARIABLES,
            table_id='Amon',
            member_id='r1i1p1f1'
        )

        if cat.df.empty:
            print(f"    No data found for {model}, skipping.")
            continue

        try:
            dsets = cat.to_dataset_dict(
                zarr_kwargs={'consolidated': True, 'use_cftime': False},
                storage_options={'token': 'anon'},
                progressbar=False
            )
        except Exception as e:
            print(f"    Error loading data: {e}")
            continue

        model_frames = []

        for key, ds in dsets.items():
            scenario = ds.attrs.get("experiment_id", "").lower()
            if scenario not in SCENARIOS:
                continue
            if not all(var in ds for var in VARIABLES):
                continue

            try:
                df = extract_and_convert(ds, *coords)
                df['scenario'] = scenario
                df['model'] = model
                model_frames.append(df)
            except Exception as e:
                print(f"    Failed extraction {key}: {e}")
                continue

        if model_frames:
            city_data.append(pd.concat(model_frames))

    # Combine and save city's data
    if city_data:
        combined_city_df = pd.concat(city_data).sort_values(by=['model', 'scenario', 'time'])
        combined_city_df.to_csv(f"{SAVE_DIR}/{city}_data.csv", index=False)
        print(f"  Saved: {SAVE_DIR}/{city}_data.csv")
    else:
        print(f"  No data extracted for {city}.")

elapsed = time.time() - start_time
print(f"\nAll done in {elapsed:.2f} seconds.")


Loading catalog...

Processing city: Cairo
  Processing model: MPI-ESM1-2-LR
  Processing model: MIROC6
  Processing model: EC-Earth3
  Saved: CMIP6_extracts/Cairo_data.csv

Processing city: Helsinki
  Processing model: MPI-ESM1-2-LR
  Processing model: MIROC6
  Processing model: EC-Earth3
  Saved: CMIP6_extracts/Helsinki_data.csv

Processing city: New_Delhi
  Processing model: MPI-ESM1-2-LR
  Processing model: MIROC6
  Processing model: EC-Earth3
  Saved: CMIP6_extracts/New_Delhi_data.csv

All done in 1653.85 seconds.
