### This notebook downloads and processes CMIP6 data to calculate heave and spice.

#### It is necessary to download data for both historical and future scenarios. It downloads the variables sea water potential temperature (thetao) and sea water salinity (so), which are then used to compute absolute salinity (sa) and pressure (press). The data are then temporally averaged and regridded to a 1° × 1° latitude–longitude grid. Finally, a region mask is applied to focus on specific in the areas of interest.

In [None]:
!pip install geopandas regionmask openpyxl numpy==1.20.0

Collecting regionmask
  Downloading regionmask-0.12.1-py3-none-any.whl (73 kB)
[K     |████████████████████████████████| 73 kB 9.0 MB/s eta 0:00:011
[?25hCollecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
[K     |████████████████████████████████| 250 kB 16.3 MB/s eta 0:00:01
Collecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Collecting regionmask
  Downloading regionmask-0.11.0-py3-none-any.whl (71 kB)
[K     |████████████████████████████████| 71 kB 61.6 MB/s eta 0:00:01
[?25h  Downloading regionmask-0.10.0-py3-none-any.whl (69 kB)
[K     |████████████████████████████████| 69 kB 59.8 MB/s eta 0:00:01
Installing collected packages: et-xmlfile, regionmask, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.5 regionmask-0.10.0


In [None]:
#Importing libraries
import gcsfs
import intake
import xarray as xr
import pandas as pd
import cftime
import geopandas as gpd
import regionmask
import re
import numpy as np
import fiona
import xesmf as xe
import gsw
fiona.drvsupport.supported_drivers['KML'] = 'rw'

In [None]:
def rename_coords(ds):
    """Renames the latitude, longitude, and depth variables to 'lat', 'lon', and 'lev',
    respectively, using coordinate variable names automatically detected in the file.
    """
    # Creates a dictionary with the possible old names for the latitude, longitude, and depth variables.

    coord_names = {
        'latitude': 'lat', 'nav_lat': 'lat', 'lat': 'lat',
        'longitude': 'lon', 'nav_lon': 'lon', 'lon': 'lon',
        'olevel': 'lev',
        'olevel_bounds': 'lev_bnds'
    }
    # Iterates over the list of coordinate names present in the file
    for coord_name in ds.coords.keys():
        # Checks whether the coordinate name matches one of the possible old names of the coordinate variables
        if coord_name in coord_names:
            # Renames the coordinate variable using the rename() method
            ds = ds.rename({coord_name: coord_names[coord_name]})
    # Returns the Dataset with the coordinate variables renamed
    return ds.copy()

# Function to fix the time variable! Some available CMIP6 formats for the time variable make its manipulation difficult.
def to_360day_monthly(da):
    ''' Conversion of the time dimension in climate models.
        Function created by Claire Carouge on the CLEX CMS Blog'''
    val = da.copy()
    time1 = da.time.copy()
    for itime in range(val.sizes['time']):
        bb = val.time.values[itime].timetuple()
        time1.values[itime] = cftime.Datetime360Day(bb[0], bb[1], 16)
    val = val.assign_coords({'time': time1})
    return val

# Function that converts depth from centimeters to meters.
def depth_m(ds):
    if "lev" in ds:
        if "units" in ds["lev"].attrs:
            units = ds["lev"].units.lower()
            if units == "cm" or units == "centimeters":
                ds["lev"] = ds["lev"] / 100
                ds["lev"].attrs["units"] = "m"
    return ds

In [None]:
Accesses the CMIP6 metadata set hosted on Google Cloud.
cmip6 = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

In [None]:
#Reading my Model Table.
#Check the path, in my Jupyter Lab everything is in the same folder.
df = pd.read_excel("Tabela_Modelos.xlsx", sheet_name='SSP370')
df = df.rename(columns = lambda x : x.strip())

In [None]:
#Perform my research according to my Model Table
pesquisas = []
for index, row in df.iterrows():
    pesquisa = {'source_id': row['source_id'],
                'table_id': row['table_id'],
                'variable_id': row['variable_id'].split(', '),
                'experiment_id': row['experiment_id'],
                'member_id': row['member_id']}
    pesquisas.append(pesquisa)

In [None]:
pesquisas = pesquisas[0:1]

In [None]:
import pandas as pd

for pesquisa in pesquisas:
    cat = cmip6.search(require_all_on='source_id', **pesquisa)
    cat = cat.to_dataset_dict(aggregate=True,
                              storage_options={'token': 'anon'},
                              zarr_kwargs={'consolidated': True,
                                           'decode_times': True,
                                           'use_cftime': True})

    # Variable to store the dataset
    ds = None

    # If no value ends with "gr", ds will take the last value ending with "gn"
    if ds is None:
        for key in list(cat.keys()):
            if key.endswith("gn"):
                ds = cat[key]
                break  # If "gn" is found, there's no need to continue searching

    # Converts time to the correct format
    ds = ds[["thetao", "so"]].squeeze()

    # Selects the analysis time period
    ds = ds.sel(time=slice('2080', '2100'))

    # Renames olevel to lev
    ds = rename_coords(ds)

    # Adjusts the depth coordinates
    ds = depth_m(ds)

    # Calculates the temporal mean
    ds_mean = ds.mean(dim='time')

    # Pressure from depth
    ds_mean["press"] = gsw.p_from_z(-ds_mean.lev, ds_mean.lat)

    # Converts preformed salinity to absolute salinity
    ds_mean["sa"] = gsw.SA_from_Sstar(ds_mean.so, ds_mean.press, ds_mean.lon, ds_mean.lat)

    # Performs regridding to a lat x lon grid (1° x 1°)
    ds_out = xr.Dataset(
        {
            "lat" : (["lat"], np.arange(-90, 90, 1.0)),
            "lon" : (["lon"], np.arange(-180, 180, 1.0)),
        }
    )

    regridder = xe.Regridder(ds_mean, ds_out, "bilinear", ignore_degenerate=True)
    ds_out = regridder(ds_mean, keep_attrs=True)

    # Converts the GeoDataFrame into a region mask object
    mask = regionmask.mask_geopandas(gdf, ds_out['lon'], ds_out['lat'])

    # Applies the mask to the dataset
    ds_masked = ds_out.where(mask == mask, drop=True)

    # Saves the file
    ds_masked.to_netcdf("{}.nc".format(key))


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'
