In [None]:
import pandas as pd
from pandas.errors import OutOfBoundsDatetime
import xarray as xr
from tqdm import tqdm
from cmip6_downscaling.data import cat
from cmip6_downscaling.data.cmip import get_gcm
import intake

az_col = cat.cmip6()
gcs_col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

In [2]:
variable_id_list = ['tasmax', 'tasmin', 'pr']
experiment_id_list = ['historical', 'ssp245', 'ssp370', 'ssp585']
query = dict(
    table_id='day',
    grid_label='gn',
    variable_id=variable_id_list,
    experiment_id=experiment_id_list,
)
col_subset = az_col.search(require_all_on=["variable_id", "experiment_id"], **query)
col_subset_df = col_subset.df
group_result = col_subset_df.groupby(["source_id", "member_id"])[
    ["experiment_id", "variable_id"]
].nunique()
filtered_group_result = group_result[
    (group_result['experiment_id'] >= len(experiment_id_list))
    & (group_result['variable_id'] >= len(variable_id_list))
].reset_index()
catalog_filtered = pd.merge(
    filtered_group_result,
    col_subset_df,
    left_on=['source_id', 'member_id'],
    right_on=['source_id', 'member_id'],
    how='inner',
)

In [3]:
# invalid_val_list =['az://cmip6/ScenarioMIP/NCAR/CESM2/ssp585/r11i1p1f1/day/pr/gn/v20200528/',
#  'az://cmip6/CMIP/CCCma/CanESM5/historical/r18i1p2f1/day/pr/gn/v20190429/',
#  'az://cmip6/ScenarioMIP/CCCma/CanESM5/ssp585/r4i1p1f1/day/tasmax/gn/v20190429/',
#  'az://cmip6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r2i1p1f1/day/tasmax/gn/v20190710/',
#  'az://cmip6/ScenarioMIP/MRI/MRI-ESM2-0/ssp245/r2i1p1f1/day/pr/gn/v20210830/']
invalid_val_list = []
for val in tqdm(catalog_filtered['zstore'].to_list()):
    try:
        ds = xr.open_zarr(val)
        # print(ds.time.min().values.astype(str))
    except:
        invalid_val_list.append(val)

# removes invalid stores from filtered catalog
catalog_filtered_clean = catalog_filtered[~catalog_filtered['zstore'].isin(invalid_val_list)]
catalog_filtered_clean.to_csv('az://static/valid_gcm_members.csv', index=False)