In [None]:
! pip install --upgrade xarray zarr gcsfs cftime nc-time-axis

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd
import xarray as xr
import zarr
import gcsfs


In [None]:
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')
df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,ps,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
1,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rsds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
2,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlus,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
3,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
4,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706


In [None]:
ds_processed=xr.open_dataset('/content/drive/MyDrive/CMIP6_pre-processed_data.nc')
source_list = ds_processed.model


In [None]:
gcs = gcsfs.GCSFileSystem(token='anon')

In [None]:
def load_zarr_dset(df):
  # get the path to a specific zarr store (the first one from the dataframe above)
  zstore = df.zstore.values[-1]

  # create a mutable-mapping-style interface to the store
  mapper = gcs.get_mapper(zstore)

  # open it using xarray and zarr
  ds = xr.open_zarr(mapper, consolidated=True)
  return ds

In [None]:
#time index to assign model times to
new_time_index=pd.date_range(start='1851-01-01',freq='M',periods=2988)
new_time_index

DatetimeIndex(['1851-01-31', '1851-02-28', '1851-03-31', '1851-04-30',
               '1851-05-31', '1851-06-30', '1851-07-31', '1851-08-31',
               '1851-09-30', '1851-10-31',
               ...
               '2099-03-31', '2099-04-30', '2099-05-31', '2099-06-30',
               '2099-07-31', '2099-08-31', '2099-09-30', '2099-10-31',
               '2099-11-30', '2099-12-31'],
              dtype='datetime64[ns]', length=2988, freq='M')

In [None]:
ds_list = list()

for source in source_list:
    df_hist_tas=df.query  ("activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'& member_id=='r1i1p1f1' & source_id==@source")
    df_ssp585_tas=df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp585'    & member_id=='r1i1p1f1' & source_id==@source")

  #load
    ds_hist_tas   =load_zarr_dset(df_hist_tas)
    ds_ssp585_tas =load_zarr_dset(df_ssp585_tas)

  #concat
    ds_all = xr.concat([ds_hist_tas, ds_ssp585_tas], dim = ('time'), coords='all',compat='override')

  #global mean
    weights = np.cos(np.deg2rad(ds_all.tas.lat))

    ds_all = ds_all.weighted(weights).mean(dim=('lon', 'lat'))

  #slice to our specific period and reassign the new time index created above
    ds_all = ds_all.sel(time=slice('1851', '2099'))
    ds_all=ds_all.assign(time=new_time_index)

  #add to list
    ds_list.append(ds_all)

ob_all = xr.concat(ds_list, dim = 'model',coords='minimal',compat='override')

ob_all

Unnamed: 0,Array,Chunk
Bytes,630.28 kiB,0.98 kiB
Shape,"(27, 2988)","(1, 126)"
Count,14250 Tasks,3321 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 630.28 kiB 0.98 kiB Shape (27, 2988) (1, 126) Count 14250 Tasks 3321 Chunks Type float64 numpy.ndarray",2988  27,

Unnamed: 0,Array,Chunk
Bytes,630.28 kiB,0.98 kiB
Shape,"(27, 2988)","(1, 126)"
Count,14250 Tasks,3321 Chunks
Type,float64,numpy.ndarray


In [None]:
ob_all = ob_all.assign(model = source_list)
ob_all

Unnamed: 0,Array,Chunk
Bytes,630.28 kiB,0.98 kiB
Shape,"(27, 2988)","(1, 126)"
Count,14250 Tasks,3321 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 630.28 kiB 0.98 kiB Shape (27, 2988) (1, 126) Count 14250 Tasks 3321 Chunks Type float64 numpy.ndarray",2988  27,

Unnamed: 0,Array,Chunk
Bytes,630.28 kiB,0.98 kiB
Shape,"(27, 2988)","(1, 126)"
Count,14250 Tasks,3321 Chunks
Type,float64,numpy.ndarray


In [None]:
ob_all.to_netcdf("global_data_processed.nc", mode = 'w', format = "NETCDF4", engine = 'netcdf4')