In [1]:
import xarray as xr
import pandas as pd
import os
from ESGF import Domain, Variable, Experiment, DrivingModel, TimeFrequency, Institute,save_bash_scritp

# Download

## Download SPEI from COPERNICUS project

Check the this [dataset](https://cds.climate.copernicus.eu/datasets/multi-origin-c3s-atlas?tab=overview) for mor infos, look for SPEI

In [None]:
import cdsapi

dataset = "multi-origin-c3s-atlas"
request = {
    "origin": "cmip6",
    "experiment": "ssp5_8_5",
    "domain": "global",
    "period": "2015-2100",
    "variable": "monthly_standardised_precipitation_evapotranspiration_index_for_6_months_cumulation_period"
}

client = cdsapi.Client()
client.retrieve(dataset, request).download()


## Download raw variables

In [3]:
domains_of_interest = [
    # Domain.SAM22,
    # Domain.NAM22,
    Domain.EUR11,
    # Domain.AFR44,
    # Domain.EAS22,
    # Domain.WAS22,
]
experiments_of_interest = [Experiment.historical, Experiment.rcp26, Experiment.rcp85]
variables_of_interest = [Variable.pr, Variable.tas]
models_of_interest = [
    DrivingModel.hadGEM2_ES,
    DrivingModel.HadGEM2_CC,
    DrivingModel.NCC_NorESM1_M,
    DrivingModel.NCC_NorESM1_ME,
    DrivingModel.MOHC_HadGEM2_CC,
    DrivingModel.MOHC_HadGEM2_ES,
    DrivingModel.MPI_ESM_LR,
    DrivingModel.MPI_ESM_MR
]

In [None]:
# TODO: create dataframe to show how many models there is for each domain/experiment/variable

In [4]:
## Creating folder structure and downloading wget scipts
for domain in domains_of_interest:
    for experiment in experiments_of_interest:
        for variable in variables_of_interest:
            path = f'wget_scripts/{domain}/{experiment}/{variable}'
            os.makedirs(path, exist_ok=True)
            save_bash_scritp(
                domain=domain,
                experiment=experiment,
                time_frequency=TimeFrequency.month,
                variable=variable,
                driving_model=models_of_interest,
                path=path,
            )

### Download pr

In [None]:
%%bash 
cd ./data/historical/pr/wget_scripts/
bash wget-20241228215607.sh -s
bash wget-20241228225357.sh -s
bash wget-20241228225701.sh -s

### Download tas

In [None]:
%%bash 
cd ./data/historical/tas/wget_scripts/
bash wget-20241228220807.sh -s
bash wget-20241228230852.sh -s
bash wget-20241228230930.sh -s

# Organizing files

In [14]:
for domain in domains_of_interest:
    for experiment in experiments_of_interest:
        for variable in variables_of_interest:
            origin_path = f"wget_scripts/{domain}/{experiment}/{variable}"

            file_names = [
                file_name
                for file_name in os.listdir(origin_path)
                if file_name.endswith(".nc")
            ]
            models = set([file_name.split("_")[2] for file_name in file_names])
            for model in models:
                destiny_path = f"./data/{domain}/{experiment}/{variable}/{model}"
                _file_names = [
                    file_name for file_name in file_names if model in file_name
                ]
                os.makedirs(destiny_path, exist_ok=True)
                for file_name in _file_names:
                    os.system(
                        f"mv {origin_path}/{file_name} {destiny_path}/{file_name}"
                    )


# Checking File Integrity and Dimensions

In [None]:
# TODO: create scritp to check file ingetity

In [18]:
# TODO: Update scripts to check dataset dimensions
variable = 'pr'
experiment = 'historical'  

models = [folder_name for folder_name in os.listdir(f'./data/{experiment}/{variable}/') if 'wget_scripts' not in folder_name]
models.sort()

for model in models:
    ds = xr.open_mfdataset(f'./data/{experiment}/{variable}/{model}/*.nc')
    print(ds.sizes)
        

Frozen({'time': 540, 'bnds': 2, 'lat': 256, 'lon': 512})
Frozen({'time': 540, 'bnds': 2, 'lat': 324, 'lon': 432})
Frozen({'time': 540, 'bnds': 2, 'lat': 192, 'lon': 288})


In [19]:
# checking datasets dimensions
variable = 'tas'
models = [folder_name for folder_name in os.listdir(f'./data/{experiment}/{variable}/') if 'wget_scripts' not in folder_name]
models.sort()

for model in models:
    ds = xr.open_mfdataset(f'./data/{experiment}/{variable}/{model}/*.nc')
    print(ds.sizes)

Frozen({'time': 540, 'bnds': 2, 'lat': 256, 'lon': 512})
Frozen({'time': 540, 'bnds': 2, 'lat': 324, 'lon': 432})
Frozen({'time': 540, 'bnds': 2, 'lat': 192, 'lon': 288})
