# Import data libraries and load the catalog

In [1]:
import intake
import xarray as xr
import os
import pandas as pd
import matplotlib.pyplot as plt
from packaging.version import Version
import numpy as np
import gcsfs
import fsspec
import xesmf as xe
import json
import sys
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from xmip.utils import google_cmip_col
from xmip.preprocessing import combined_preprocessing

# Load the CMIP6 catalog
catalog_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
# col = intake.open_esm_datastore(catalog_url)
col = google_cmip_col()
print(col.df)
print(col.df.columns)


       activity_id       institution_id      source_id       experiment_id  \
0       HighResMIP                 CMCC   CMCC-CM2-HR4  highresSST-present   
1       HighResMIP                 CMCC   CMCC-CM2-HR4  highresSST-present   
2       HighResMIP                 CMCC   CMCC-CM2-HR4  highresSST-present   
3       HighResMIP                 CMCC   CMCC-CM2-HR4  highresSST-present   
4       HighResMIP                 CMCC   CMCC-CM2-HR4  highresSST-present   
...            ...                  ...            ...                 ...   
514813        CMIP  EC-Earth-Consortium  EC-Earth3-Veg          historical   
514814        CMIP  EC-Earth-Consortium  EC-Earth3-Veg          historical   
514815        CMIP  EC-Earth-Consortium  EC-Earth3-Veg          historical   
514816        CMIP  EC-Earth-Consortium  EC-Earth3-Veg          historical   
514817        CMIP  EC-Earth-Consortium  EC-Earth3-Veg          historical   

       member_id table_id variable_id grid_label  \
0       r1i

# Filter for historical runs of models we already have projeciton information for

In [2]:
# Path to the folder containing your existing CMIP6 models
# model_folder_path = "/Users/aallyn/Library/CloudStorage/Box-Box/RES_Data/CMIP6/SSP5_85/BiasCorrected/IndividualModels/surf_temp"  # replace with your folder path

# # Get list of model names from folder
# strings_to_remove = ["surf_temp_stGrid_tos_", ".grd"]  # replace with actual strings to remove

# ssp585_models = []
# for name in os.listdir(model_folder_path):
#     # Only include specific file types if needed (e.g., .nc files)
#     if name.endswith("historical.grd"):  # Optional: Filter by file extension
#         # Remove each unwanted string from the file name
#         clean_name = name
#         for string in strings_to_remove:
#             clean_name = clean_name.replace(string, "")
#         ssp585_models.append(clean_name)
# print("List of model names:", ssp585_models)
# print(col.df['source_id'].unique())

# # Filter for historical data of the specified models
# # Extract the first and second parts
# # Split each model name by '_'
# split_names = [name.split('_') for name in ssp585_models]

# Extract just the IDs (the last element in the split parts)
# source_ids = [parts[0] for parts in split_names]
# Get info from the input json file, which was largely taken from NOAA CMIP6 data portal
# Path to your JSON file
file_path = "/Users/aallyn/GitHub/lobSDM/Code/cmip6_input.json"

# Open and load the JSON file
with open(file_path, "r") as file:
    input = json.load(file)

data_set = input['dataset']
source_ids = {item["source_id"] for item in data_set.values()}
member_ids = {member for item in data_set.values() for member in item["ens_members"]}
variable_ids = ["thetao", "tos"]
table_ids = ["Omon"]

cat = col.search(
    experiment_id = 'historical',
    source_id = source_ids,
    member_id = member_ids,
    variable_id = variable_ids,
    table_id = table_ids) # Filter for the specific models and variants

# kwargs for combined pre_processing
kwargs = {
    'zarr_kwargs':{
        'consolidated':True,
        'use_cftime':True
    },
    'aggregate':False,
    'preprocess':combined_preprocessing
}

ddict = cat.to_dataset_dict(
    zarr_kwargs={"consolidated": True, "use_cftime": True},
    aggregate=False,
    preprocess=combined_preprocessing,
)
list(ddict.keys())


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.zstore.dcpp_init_year.version'


    incompatible units for variable 'lev': cannot convert a non-quantity using 'm' as unit
    incompatible units for variable 'lev': cannot convert a non-quantity using 'm' as unit


['CMIP.BCC.BCC-CSM2-MR.historical.r1i1p1f1.Omon.tos.gn.gs://cmip6/CMIP6/CMIP/BCC/BCC-CSM2-MR/historical/r1i1p1f1/Omon/tos/gn/v20181126/.20181126',
 'CMIP.MIROC.MIROC6.historical.r1i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/MIROC/MIROC6/historical/r1i1p1f1/Omon/thetao/gn/v20190311/.20190311',
 'CMIP.CSIRO-ARCCSS.ACCESS-CM2.historical.r1i1p1f1.Omon.tos.gn.gs://cmip6/CMIP6/CMIP/CSIRO-ARCCSS/ACCESS-CM2/historical/r1i1p1f1/Omon/tos/gn/v20191108/.20191108',
 'CMIP.EC-Earth-Consortium.EC-Earth3-Veg-LR.historical.r1i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/EC-Earth-Consortium/EC-Earth3-Veg-LR/historical/r1i1p1f1/Omon/thetao/gn/v20200919/.20200919',
 'CMIP.NUIST.NESM3.historical.r1i1p1f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/NUIST/NESM3/historical/r1i1p1f1/Omon/thetao/gn/v20190703/.20190703',
 'CMIP.MIROC.MIROC6.historical.r1i1p1f1.Omon.tos.gn.gs://cmip6/CMIP6/CMIP/MIROC/MIROC6/historical/r1i1p1f1/Omon/tos/gn/v20181212/.20181212',
 'CMIP.EC-Earth-Consortium.EC-Earth3-Veg-LR.historical.r1i1p1f

# Regrid models to common 1 x 1 grid

It looks like there are a variety of different ways to do this. For simplicity, going to try to use one of the gr1 grids and keep going with the Pangeo CMIP6 tutorial workflow rather than departing and doing custom `xesmf` regridding. I think we can use 'CMIP.INM.INM-CM4-8.historical.r1i1p1f1.Omon.thetao.gr1.gs://cmip6/CMIP6/CMIP/INM/INM-CM4-8/historical/r1i1p1f1/Omon/thetao/gr1/v20190530/.20190530' as a template.

In [3]:
from xmip.postprocessing import (
    interpolate_grid_label
)

combined_grids_dict = interpolate_grid_label(ddict, target_grid_label='gr1')
list(combined_grids_dict)



['BCC-CSM2-MR.historical.Omon.r1i1p1f1',
 'MIROC6.historical.Omon.r1i1p1f1',
 'ACCESS-CM2.historical.Omon.r1i1p1f1',
 'EC-Earth3-Veg-LR.historical.Omon.r1i1p1f1',
 'NESM3.historical.Omon.r1i1p1f1',
 'EC-Earth3.historical.Omon.r1i1p1f1',
 'CMCC-ESM2.historical.Omon.r1i1p1f1',
 'INM-CM4-8.historical.Omon.r1i1p1f1',
 'CIESM.historical.Omon.r1i1p1f1',
 'FIO-ESM-2-0.historical.Omon.r1i1p1f1',
 'CanESM5.historical.Omon.r1i1p1f1',
 'MPI-ESM1-2-HR.historical.Omon.r1i1p1f1',
 'MCM-UA-1-0.historical.Omon.r1i1p1f1',
 'INM-CM5-0.historical.Omon.r1i1p1f1',
 'FGOALS-g3.historical.Omon.r1i1p1f1',
 'FGOALS-f3-L.historical.Omon.r1i1p1f1',
 'ACCESS-ESM1-5.historical.Omon.r1i1p1f1',
 'CESM2.historical.Omon.r1i1p1f1',
 'IITM-ESM.historical.Omon.r1i1p1f1',
 'GISS-E2-1-G.historical.Omon.r1i1p1f1',
 'CMCC-CM2-SR5.historical.Omon.r1i1p1f1',
 'NorESM2-LM.historical.Omon.r1i1p1f1',
 'MPI-ESM1-2-LR.historical.Omon.r1i1p1f1',
 'CAMS-CSM1-0.historical.Omon.r1i1p1f1',
 'IPSL-CM6A-LR.historical.Omon.r1i1p1f1',
 'KAC

In [4]:
interpolate_grid_label?

[0;31mSignature:[0m
[0minterpolate_grid_label[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mds_dict[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtarget_grid_label[0m[0;34m=[0m[0;34m'gn'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmethod[0m[0;34m=[0m[0;34m'bilinear'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mxesmf_kwargs[0m[0;34m=[0m[0;34m{[0m[0;34m'ignore_degenerate'[0m[0;34m:[0m [0;32mTrue[0m[0;34m,[0m [0;34m'periodic'[0m[0;34m:[0m [0;32mTrue[0m[0;34m}[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmerge_kwargs[0m[0;34m=[0m[0;34m{[0m[0;34m'combine_attrs'[0m[0;34m:[0m [0;34m'drop_conflicts'[0m[0;34m}[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverbose[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Combines different grid labels via interpolation with xesmf

Parameters
----------
ds_dict : dict
    dictonary of input datasets
target_grid_label : str, optional

# Download and save the historical data for region of interest

In [None]:
# Define your region of interest (extent)
# Load the NetCDF file using xarray
ds = xr.open_dataset("/Users/aallyn/Library/CloudStorage/Box-Box/RES_Data/CMIP6/SSP1_26/RawTmpFiles/so_CanESM5_r10i1p1f1_ssp126.nc")

# Get the bounds (assuming the latitudes and longitudes are called 'lat' and 'lon')
lat_bounds = slice(ds['latitude'].values.min(), ds['latitude'].values.max())
print(lat_bounds)
lon_bounds = slice(ds['longitude'].values.min(), ds['longitude'].values.max())
print(lon_bounds)

# Download
box_root = "/Users/aallyn/Library/CloudStorage/Box-Box/RES_Data/CMIP6/SSP5_85/RawTmpFiles/"
 
# # I couldn't get this to work!       
# for key, ds in dset_dict.items():
#     # print(f"Model: {key}, Type of dataset: {type(ds)}")
#     file_name = f"{box_root}{key}.nc"
#     try:
#         print(f"Processing {key}...")

        # # Dynamically find longitude and latitude coordinates
        # lon_name = None
        # lat_name = None
        # for name in ds.coords:
        #     if 'lon' in name.lower() or 'longitude' in name.lower():
        #         lon_name = name
        #     if 'lat' in name.lower() or 'latitude' in name.lower():
        #         lat_name = name

        # # Check if lon/lat coordinates were found
        # if lon_name is None or lat_name is None:
        #     # If not, check for j/i indexing
        #     if 'j' in ds.coords and 'i' in ds.coords:
        #         print("Using j/i indexing instead of lat/lon.")
        #         lon_name = 'i'  # Use 'i' for longitude
        #         lat_name = 'j'  # Use 'j' for latitude
        #     else:
        #         raise ValueError("No longitude or latitude coordinates found in the dataset.")

        # # Subset region based on identified coordinates
        # if lon_name in ['i', 'j']:  # If using i/j indexing instead of lon/lat
        #     subset_ds = ds.isel(
        #         **{
        #             lon_name: slice(lon_bounds[0], lon_bounds[1]),
        #             lat_name: slice(lat_bounds[0], lat_bounds[1]),
        #         }
        #     )
        # else:
        #     subset_ds = ds.sel(
        #         **{
        #             lon_name: slice(lon_bounds[0], lon_bounds[1]),
        #             lat_name: slice(lat_bounds[0], lat_bounds[1]),
        #         }
        #     )
        
        # # Fix encoding for time coordinate
        # # Extract the time encoding and units
        # if 'time' in subset_ds.coords:
        #     original_time_encoding = ds['time'].encoding
        #     units = original_time_encoding.get('units', 'days since 1850-01-01')
        #     dtype = original_time_encoding.get('dtype', 'float64')  # Ensure dtype is set

        #     # If dtype is None, set it explicitly to 'float64'
        #     if dtype is None:
        #         dtype = 'float64'

        #     # Set time encoding to match units and dtype
        #     subset_ds['time'].encoding.update({
        #         'units': units,
        #         'dtype': dtype,
        #         '_FillValue': None,  # Avoid conflicts with fill values
        #     })
            
        #     # If time bounds exist, set units for time_bnds as well
        #     if 'time_bnds' in subset_ds.coords:
        #         subset_ds['time_bnds'].encoding.update({
        #             'units': units,  # Match the units of time
        #         })
                
        #     # Remove chunking to prevent issues with datetime encoding
        #     subset_ds['time'].encoding['chunks'] = None
                
        # # Save the subset file
        # subset_ds.to_netcdf(file_name, encoding={var: {} for var in subset_ds.variables})
        # print(f"Saved subset file for {key} to {file_name}")

    # except Exception as e:
    #     print(f"Error processing {key}: {e}")
    

slice(20.54499053955078, 82.37899780273438, None)
slice(254.0767822265625, 323.5, None)


# New option from pangeo example

In [1]:
import intake
import xarray as xr
import os
import pandas as pd
import matplotlib.pyplot as plt
from packaging.version import Version
import numpy as np
import gcsfs
import fsspec
# conda install -c conda-forge esmf esmpy
import xesmf as xe
import json
import sys
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# A function to flag curvilinear grids as these have their own issues
def is_curvilinear(ds):
    lat_name = 'lat' if 'lat' in ds else 'latitude'
    lon_name = 'lon' if 'lon' in ds else 'longitude'
    
    if lat_name in ds and lon_name in ds:
        lat_dims = ds[lat_name].dims
        lon_dims = ds[lon_name].dims
        # Check if lat/lon have 2D dimensions
        return len(lat_dims) == 2 and len(lon_dims) == 2
    return False

# A function to get lat/latitude or lon/longitude 
def get_lat_lon_names(ds):
    """
    Dynamically detect the latitude and longitude variable names.
    """
    lat_candidates = ["lat", "latitude", "y", "j"]
    lon_candidates = ["lon", "longitude", "x", "i"]
    
    lat_name = next((name for name in lat_candidates if name in ds.dims), None)
    lon_name = next((name for name in lon_candidates if name in ds.dims), None)
    
    if lat_name is None or lon_name is None:
        raise KeyError("Could not find latitude and/or longitude variables in the dataset.")
    
    return lat_name, lon_name

# Function to plot data
def plot_data(ds, variable, title="Quick Map", cmap="viridis", projection=ccrs.PlateCarree()):
    if variable not in ds:
        raise ValueError(f"Variable '{variable}' not found in the dataset.")
    # Select a single time step if time is a dimension
    if "time" in ds.dims:
        data = ds[variable].isel(time=0)  # Plot the first time step
    else:
        data = ds[variable]
    # Set up the map
    fig, ax = plt.subplots(
        subplot_kw={"projection": projection},
        figsize=(10, 6)
    )
    ax.set_global()  # Set global extent (you can modify as needed)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
    ax.add_feature(cfeature.BORDERS, linestyle=":")
    ax.add_feature(cfeature.LAND, edgecolor="black", facecolor="lightgray", alpha=0.5)
    # Plot the data
    im = ax.pcolormesh(
        ds[lon_name], ds[lat_name], data,
        transform=ccrs.PlateCarree(),
        cmap=cmap
    )
    # Add colorbar
    cb = fig.colorbar(im, ax=ax, orientation="horizontal", pad=0.05)
    cb.set_label(variable)
    # Title and gridlines
    ax.set_title(title, fontsize=14)
    ax.gridlines(draw_labels=True, linewidth=0.5, color="gray", linestyle="--", alpha=0.7)
    # Show the plot
    plt.show()


# Path to your JSON file
file_path = "/Users/aallyn/GitHub/lobSDM/Code/cmip6_input.json"

# Open and load the JSON file
with open(file_path, "r") as file:
    input = json.load(file)

# load metadata of the data in pangeo
if len(input['metadata_csv'].strip()) > 0:
    meta_data = pd.read_csv(input['metadata_csv'])
    print("using local metadta from: ", input['metadata_csv'])
else:
    print("using metadta from https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv")
    meta_data = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')

# prepare the output directory
out_dir = input['out_dir']
os.makedirs(out_dir, exist_ok=True)
data_set=input['dataset']

# check if the download all models option is invoked. If True, all unique models and ensemble members are obtained from metadata
if input['download_all_models']:
    models = meta_data['source_id'].unique()
    all_models = True
else:
    models = data_set.keys()
    all_models = False

# check if the download all ensemble members option is invoked. If True, all unique models and ensemble members are obtained from metadata
if input['download_all_members']:
    ens_members = meta_data['member_id'].unique()
    full_ensemble = True
else:
    ens_members = None
    full_ensemble = False

# check if regrid option is activated and create a lat lon array for target spatial grids
if len(input['target_grid']) == 2:
    regrid_data = True
    tar_lat_int = input['target_grid'][0]
    tar_lon_int = input['target_grid'][1]
    n_lat = int(180./tar_lat_int)
    n_lon = int(360./tar_lon_int)
    lat_min = -90 + tar_lat_int/2.
    lat_max = 90 - tar_lat_int/2.
    lon_max = 360 - tar_lon_int/2.
    lon_min = tar_lon_int/2.
    new_lat = np.linspace(lat_min, lat_max, n_lat, endpoint=True)
    new_lon = np.linspace(lon_min, lon_max, n_lon, endpoint=True)
else:
    regrid_data = False

# loop through the experiments and dataset
for experiment, info in input['experiments'].items():
    start_year = info[0]
    end_year = info[1]
    # loop through the unique models
    for model in models:
        # get models if download_all_models/all_models is False
        if not all_models:
            print(model)
            print(data_set[model])
            src = data_set[model]['source_id']
        else:
            src = model

        # get ensemble members if download_all_members/full_ensemble is False
        if not full_ensemble:
            ens_members = data_set[model]['ens_members']
        # loop through the variables
        for variable, table in input['variables'].items():
            # loop through the ensemble
            for variant in ens_members:
                qry = "table_id == '" + table +"' & variable_id == '" + variable + "' & experiment_id == '" + experiment + "' & source_id == '" + src + "' & member_id == '" + variant + "'"
                meta_data_sel = meta_data.query(qry)
                
                # Continue
                if not meta_data_sel.empty:
                    print("Trying to download: ")
                    print(qry)
                
                    zstore = meta_data_sel.zstore.values[-1]
                    # create a mutable-mapping-style interface to the store
                    mapper = fsspec.get_mapper(zstore)
                    # open it using xarray and zarr
                    ds = xr.open_zarr(mapper, consolidated=True)
                                        
                    # create dataset with target grid
                    print("Regridding irregular grid")
                    ds_out_target = xr.Dataset({'lat': (['lat'], new_lat), 'lon': (['lon'], new_lon),})
                    regridder = xe.Regridder(ds, ds_out_target, input['regrid_method'])
                    ds_regrid = regridder(ds)
                    print(ds_regrid)
                    
                    # try:
                    #     lat_name, lon_name = get_lat_lon_names(ds)
                    # except KeyError as e:
                    #     print(f"Error: {e}")
                    #     sys.exit(1)
                    
                    lat_name = "lat"
                    lon_name = "lon"
                    
                    # Perform subsetting
                    ds_subset = ds_regrid.isel(
                            lon=slice(-85, -50),
                            lat=slice(255, 320)
                    )
                    
                    ds_out = ds_subset.sel(time=slice(str(start_year), str(end_year)))
                    print("Done subsetting regular grid")
                    # plot_data(ds_out_temp, variable, title="Quick Map", cmap="viridis", projection=ccrs.PlateCarree())
                    
                    # OLD STUFF       
                    # curvilinear_flag = is_curvilinear(ds)
                    # if curvilinear_flag:
                    #     print("Detected curvilinear grid: Subsetting with lat/lon masks.")
                    #     # Subset using lat/lon bounds with 2D latitude/longitude variables
                    #     if "j" in ds.dims and "i" in ds.dims:
                    #         print("Trying j and i")
                    #         print(ds)
                            
                    #         # Checking as sometimes longitude seems weird
                    #         min_check = ds.min(dim = "i")
                            
                    #         if (min_check < 0).any():
                    #              # Manually subsetting
                    #             ds_subset = ds.isel(
                    #                 j=slice(255, 320),
                    #                 i=slice(-85, -50)
                    #             )
                            
                    #         else:
                    #             # Manually subsetting
                    #             ds_subset = ds.isel(
                    #                 j=slice(255, 320),
                    #                 i=slice(30, 60)
                    #             )
                        
                    #     if "x" in ds.dims and "y" in ds.dims:
                    #          ds_subset = ds.isel(
                    #             x=slice(255, 320),
                    #             y=slice(30, 60)
                    #         )
                             
                    #     ds_out_temp = ds_subset.sel(time=slice(str(start_year), str(end_year)))
                    #     print("Done subsetting irregular grid")
                    #     try:
                    #         lat_name, lon_name = get_lat_lon_names(ds_out_temp)
                    #     except KeyError as e:
                    #         print(f"Error: {e}")
                    #         sys.exit(1)
                    #     # print(lat_name, lon_name)
                    #     # plot_data(ds_out_temp, variable, title="Quick Map", cmap="viridis", projection=ccrs.PlateCarree())

                    #     if regrid_data:
                    #         print("Regridding irregular grid")
                    #         # create dataset with target grid
                    #         ds_out_target = xr.Dataset({'lat': (['lat'], new_lat), 'lon': (['lon'], new_lon),})
                            
                    #         print("Dimensions:", ds_out_temp.dims)
                    #         print("Coordinates:", ds_out_temp.coords)
                            
                    #         plt.plot(
                    #             [ds_out_temp.j.min(), ds_out_temp.j.max(), ds_out_temp.j.max(), ds_out_temp.j.min(), ds_out_temp.j.min()],
                    #             [ds_out_temp.i.min(), ds_out_temp.i.min(), ds_out_temp.i.max(), ds_out_temp.i.max(), ds_out_temp.i.min()],
                    #             label="Source Grid", color="blue"
                    #         )

                    #         # Plot target grid bounds
                    #         plt.plot(
                    #             [ds_out_target.lon.min(), ds_out_target.lon.max(), ds_out_target.lon.max(), ds_out_target.lon.min(), ds_out_target.lon.min()],
                    #             [ds_out_target.lat.min(), ds_out_target.lat.min(), ds_out_target.lat.max(), ds_out_target.lat.max(), ds_out_target.lat.min()],
                    #             label="Target Grid", color="red"
                    #         )

                    #         plt.legend()
                    #         plt.xlabel("Longitude")
                    #         plt.ylabel("Latitude")
                    #         plt.title("Spatial Extent Alignment")
                    #         plt.show()

                    #         regridder = xe.Regridder(ds_out_temp, ds_out_target, input['regrid_method'])
                    #         ds_out = regridder(ds_out_temp)
                    #         print(ds_out)

                    # else:
                    #     print("Detected regular grid or indices: Subsetting with lat/lon or latitude/longitude.")
                    #     # Identify the correct coordinate names
                    #     # lon_name = 'lon' if 'lon' in ds.coords and 'lon' in ds.variables else 'longitude'
                    #     # lat_name = 'lat' if 'lat' in ds.coords and 'lat' in ds.variables else 'latitude'
                    #     # Detect latitude and longitude variable names
                    #     try:
                    #         lat_name, lon_name = get_lat_lon_names(ds)
                    #     except KeyError as e:
                    #         print(f"Error: {e}")
                    #         sys.exit(1)
                                                
                    #     # Perform subsetting
                    #     ds_subset = ds.sel(
                    #         **{
                    #             lon_name: slice(lon_bounds[0], lon_bounds[1]),
                    #             lat_name: slice(lat_bounds[0], lat_bounds[1]),
                    #         }
                    #     )
                    #     ds_out_temp = ds_subset.sel(time=slice(str(start_year), str(end_year)))
                    #     print("Done subsetting regular grid")
                        
                    #     if regrid_data:
                    #         # regrid if the option is invoked
                    #         print("Regridding regular grid")
                    #         # create dataset with target grid
                    #         ds_out_target = xr.Dataset({'lat': (['lat'], new_lat), 'lon': (['lon'], new_lon),})
                    #         # Regrid
                    #         regridder = xe.Regridder(ds_out_temp, ds_out_target, input['regrid_method'])
                    #         ds_out = regridder(ds_out_temp)
                        
                    # create and save the output
                    file_name_nc_out = '{var}_{exp}_{tab}_{sroc}_{vrnt}_{syr}_{eyr}_{t_lat}x{t_lon}.nc'.format(var=variable, exp=experiment, tab=table, sroc=src, vrnt=variant, syr=str(start_year), eyr=end_year, t_lat=str(tar_lat_int), t_lon=str(tar_lon_int))
                    out_file_out = os.path.join(out_dir, file_name_nc_out)
                    ds_out.to_netcdf(out_file_out)
                    
                    # close dataset
                    ds_out.close()
                    # ds_out_temp.close()
                    print('saved regridded data: ', out_file_out)
                    
                    # close dataset
                    ds.close()
                else:
                    print(f'Data not found for: {variable}')
                    print(qry)
                print ('---------------------------------------')
            

using metadta from https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv
MIROC6
{'source_id': 'MIROC6', 'ens_members': ['r1i1p1f1']}
Trying to download: 
table_id == 'Omon' & variable_id == 'tos' & experiment_id == 'historical' & source_id == 'MIROC6' & member_id == 'r1i1p1f1'
Regridding irregular grid
<xarray.Dataset>
Dimensions:             (time: 1980, lat: 180, lon: 360, vertices: 4, bnds: 2)
Coordinates:
  * time                (time) datetime64[ns] 1850-01-16T12:00:00 ... 2014-12...
    time_bnds           (time, bnds) datetime64[ns] dask.array<chunksize=(1980, 2), meta=np.ndarray>
  * lat                 (lat) float64 -89.5 -88.5 -87.5 -86.5 ... 87.5 88.5 89.5
  * lon                 (lon) float64 0.5 1.5 2.5 3.5 ... 357.5 358.5 359.5
Dimensions without coordinates: vertices, bnds
Data variables:
    tos                 (time, lat, lon) float32 dask.array<chunksize=(312, 180, 360), meta=np.ndarray>
    vertices_latitude   (vertices, lat, lon) float32 dask.array

ValueError: Variable 'time_bnds' has conflicting _FillValue (nan) and missing_value (1.0000000200408773e+20). Cannot encode data.