In [20]:
import pandas as pd
from pandas.errors import OutOfBoundsDatetime
import xarray as xr
from tqdm import tqdm
from cmip6_downscaling.data.cmip import get_gcm
import intake
import json
from cmip6_downscaling.methods.common.containers import RunParameters
from cmip6_downscaling import config

az_col = intake.open_esm_datastore(config.get("data_catalog.cmip.json"))

gcs_col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

In [None]:
variable_id_list = ['tasmax', 'tasmin', 'pr']
experiment_id_list = ['historical', 'ssp245', 'ssp370', 'ssp585']
query = dict(
    table_id='day',
    grid_label='gn',
    variable_id=variable_id_list,
    experiment_id=experiment_id_list,
)
col_subset = az_col.search(require_all_on=["variable_id", "experiment_id"], **query)
col_subset_df = col_subset.df
group_result = col_subset_df.groupby(["source_id", "member_id"])[
    ["experiment_id", "variable_id"]
].nunique()
filtered_group_result = group_result[
    (group_result['experiment_id'] >= len(experiment_id_list))
    & (group_result['variable_id'] >= len(variable_id_list))
].reset_index()
catalog_filtered = pd.merge(
    filtered_group_result,
    col_subset_df,
    left_on=['source_id', 'member_id'],
    right_on=['source_id', 'member_id'],
    how='inner',
)

NameError: name 'catalog_filtered' is not defined

In [3]:
invalid_val_list = [
    'az://cmip6/ScenarioMIP/NCAR/CESM2/ssp585/r11i1p1f1/day/pr/gn/v20200528/',
    'az://cmip6/CMIP/CCCma/CanESM5/historical/r18i1p2f1/day/pr/gn/v20190429/',
    'az://cmip6/ScenarioMIP/CCCma/CanESM5/ssp585/r4i1p1f1/day/tasmax/gn/v20190429/',
    'az://cmip6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r2i1p1f1/day/tasmax/gn/v20190710/',
    'az://cmip6/ScenarioMIP/MRI/MRI-ESM2-0/ssp245/r2i1p1f1/day/pr/gn/v20210830/',
]
invalid_val_list = []
for val in tqdm(catalog_filtered['zstore'].to_list()):
    try:
        ds = xr.open_zarr(val)
        # print(ds.time.min().values.astype(str))
    except:
        invalid_val_list.append(val)

# removes invalid stores from filtered catalog
catalog_filtered_clean = catalog_filtered[~catalog_filtered['zstore'].isin(invalid_val_list)]
catalog_filtered_clean.to_csv('az://static/valid_gcm_members.csv', index=False)

In [None]:
valid_df = pd.read_csv('valid_catalog.csv')

In [12]:
historical_predict_period = ["1950", "2014"]
scenario_predict_period = ["2015", "2099"]

template = {
    "method": "",
    "obs": "ERA5",
    "model": "",
    "member": "",
    "grid_label": "gn",
    "table_id": "day",
    "scenario": "",
    "variable": "",
    "train_period": ["1981", "2010"],
    "predict_period": "",
    "latmin": "-90",
    "latmax": "90",
    "lonmin": "-180",
    "lonmax": "180",
}

In [10]:
valid_df.head(1)

Unnamed: 0,source_id,member_id,experiment_id_x,variable_id_x,activity_id,institution_id,experiment_id_y,table_id,variable_id_y,grid_label,zstore,dcpp_init_year,version
0,ACCESS-CM2,r1i1p1f1,4,3,CMIP,CSIRO-ARCCSS,historical,day,pr,gn,az://cmip6/CMIP/CSIRO-ARCCSS/ACCESS-CM2/histor...,,20191108


In [28]:
for method in ['bcsd', 'gard', 'maca']:
    filled_template = template.copy()
    filled_template["method"] = method
    for index, row in valid_df.iterrows():
        filled_template["member"] = row['member_id']
        filled_template["model"] = row["source_id"]
        filled_template["variable"] = row["variable_id_y"]
        filled_template["scenario"] = row["experiment_id_y"]
        if 'ssp' in filled_template["scenario"]:
            filled_template["predict_period"] = ["2015", "2099"]
        else:
            filled_template["predict_period"] = ["1950", "2014"]
        print(row['member_id'])

        print(filled_template)
        filled_template['train_dates'] = filled_template.pop('train_period')
        filled_template['predict_dates'] = filled_template.pop('predict_period')

        run_parameters = RunParameters(**filled_template)

        with open(run_parameters.run_id + '.json', 'w') as f:
            f.write(json.dumps(filled_template))
        break
    break

r1i1p1f1
{'method': 'bcsd', 'obs': 'ERA5', 'model': 'ACCESS-CM2', 'member': 'r1i1p1f1', 'grid_label': 'gn', 'table_id': 'day', 'scenario': 'historical', 'variable': 'pr', 'train_period': ['1981', '2010'], 'predict_period': ['1950', '2014'], 'latmin': '-90', 'latmax': '90', 'lonmin': '-180', 'lonmax': '180'}


In [23]:
filled_template

{'method': 'bcsd',
 'obs': 'ERA5',
 'model': 'ACCESS-CM2',
 'member': 'r1i1p1f1',
 'grid_label': 'gn',
 'table_id': 'day',
 'scenario': 'historical',
 'variable': 'pr',
 'train_period': ['1981', '2010'],
 'predict_period': ['1950', '2014'],
 'latmin': '-90',
 'latmax': '90',
 'lonmin': '-180',
 'lonmax': '180'}

'bcsd'

In [None]:
{
    "method": "bcsd",
    "obs": "ERA5",
    "model": "MIROC6",
    "member": "r1i1p1f1",
    "grid_label": "gn",
    "table_id": "day",
    "scenario": "ssp245",
    "variable": "tasmin",
    "train_period": ["1981", "2010"],
    "predict_period": ["2015", "2099"],
    "latmin": "-90",
    "latmax": "90",
    "lonmin": "-180",
    "lonmax": "180",
}

r1i1p1f1


Unnamed: 0,source_id,member_id,experiment_id_x,variable_id_x,activity_id,institution_id,experiment_id_y,table_id,variable_id_y,grid_label,zstore,dcpp_init_year,version
0,ACCESS-CM2,r1i1p1f1,4,3,CMIP,CSIRO-ARCCSS,historical,day,pr,gn,az://cmip6/CMIP/CSIRO-ARCCSS/ACCESS-CM2/histor...,,20191108
1,ACCESS-CM2,r1i1p1f1,4,3,ScenarioMIP,CSIRO-ARCCSS,ssp245,day,pr,gn,az://cmip6/ScenarioMIP/CSIRO-ARCCSS/ACCESS-CM2...,,20191108
2,ACCESS-CM2,r1i1p1f1,4,3,ScenarioMIP,CSIRO-ARCCSS,ssp370,day,pr,gn,az://cmip6/ScenarioMIP/CSIRO-ARCCSS/ACCESS-CM2...,,20191108
3,ACCESS-CM2,r1i1p1f1,4,3,ScenarioMIP,CSIRO-ARCCSS,ssp585,day,pr,gn,az://cmip6/ScenarioMIP/CSIRO-ARCCSS/ACCESS-CM2...,,20210317
4,ACCESS-CM2,r1i1p1f1,4,3,CMIP,CSIRO-ARCCSS,historical,day,tasmax,gn,az://cmip6/CMIP/CSIRO-ARCCSS/ACCESS-CM2/histor...,,20191108
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1027,NorESM2-MM,r1i1p1f1,4,3,ScenarioMIP,NCC,ssp585,day,tasmax,gn,az://cmip6/ScenarioMIP/NCC/NorESM2-MM/ssp585/r...,,20191108
1028,NorESM2-MM,r1i1p1f1,4,3,CMIP,NCC,historical,day,tasmin,gn,az://cmip6/CMIP/NCC/NorESM2-MM/historical/r1i1...,,20191108
1029,NorESM2-MM,r1i1p1f1,4,3,ScenarioMIP,NCC,ssp245,day,tasmin,gn,az://cmip6/ScenarioMIP/NCC/NorESM2-MM/ssp245/r...,,20191108
1030,NorESM2-MM,r1i1p1f1,4,3,ScenarioMIP,NCC,ssp370,day,tasmin,gn,az://cmip6/ScenarioMIP/NCC/NorESM2-MM/ssp370/r...,,20191108
