In [None]:
! pip install --upgrade xarray zarr gcsfs cftime nc-time-axis

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting xarray
  Downloading xarray-2023.1.0-py3-none-any.whl (973 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m973.1/973.1 KB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting zarr
  Downloading zarr-2.14.2-py3-none-any.whl (203 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m203.3/203.3 KB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gcsfs
  Downloading gcsfs-2023.3.0-py2.py3-none-any.whl (26 kB)
Collecting nc-time-axis
  Downloading nc_time_axis-1.4.1-py3-none-any.whl (17 kB)
Collecting numcodecs>=0.10.0
  Downloading numcodecs-0.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fasteners
  Downloading fasteners-0.18-py3-none-any.whl (18 kB)
Collecting asciitree
 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd
import xarray as xr
import zarr
import gcsfs


In [None]:
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')
df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,ps,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
1,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rsds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
2,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlus,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
3,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
4,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706


In [None]:
ds_processed=xr.open_dataset('/content/drive/MyDrive/CMIP6_pre-processed_data.nc')
source_list = ds_processed.model

source_list = source_list.drop("IPSL-CM6A-LR", dim='model')

source_list

In [None]:
gcs = gcsfs.GCSFileSystem(token='anon')

In [None]:
def load_zarr_dset(df):
  # get the path to a specific zarr store (the first one from the dataframe above)
  zstore = df.zstore.values[-1]

  # create a mutable-mapping-style interface to the store
  mapper = gcs.get_mapper(zstore)

  # open it using xarray and zarr
  ds = xr.open_zarr(mapper, consolidated=True)
  return ds

In [None]:
#time index to assign model times to
new_time_index=pd.date_range(start='1851-01-01',freq='M',periods=2988)
new_time_index

DatetimeIndex(['1851-01-31', '1851-02-28', '1851-03-31', '1851-04-30',
               '1851-05-31', '1851-06-30', '1851-07-31', '1851-08-31',
               '1851-09-30', '1851-10-31',
               ...
               '2099-03-31', '2099-04-30', '2099-05-31', '2099-06-30',
               '2099-07-31', '2099-08-31', '2099-09-30', '2099-10-31',
               '2099-11-30', '2099-12-31'],
              dtype='datetime64[ns]', length=2988, freq='M')

In [None]:
ds_list = list()

for source in source_list:
    df_hist_pr=df.query  ("activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'pr' & experiment_id=='historical'& member_id=='r1i1p1f1' & source_id==@source")
    df_ssp585_pr=df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'pr' & experiment_id=='ssp585'    & member_id=='r1i1p1f1' & source_id==@source")

    print(source)

  #load
    ds_hist_pr  =load_zarr_dset(df_hist_pr)
    ds_ssp585_pr =load_zarr_dset(df_ssp585_pr)

  #concat
    ds_all = xr.concat([ds_hist_pr, ds_ssp585_pr], dim = ('time'), coords='all',compat='override')

  #global mean
    weights = np.cos(np.deg2rad(ds_all.pr.lat))

    ds_all = ds_all.weighted(weights).mean(dim=('lon', 'lat'))

  #slice to our specific period and reassign the new time index created above
    ds_all = ds_all.sel(time=slice('1851', '2099'))
    ds_all=ds_all.assign(time=new_time_index)

  #add to list
    ds_list.append(ds_all)

ob_all = xr.concat(ds_list, dim = 'model',coords='minimal',compat='override')

ob_all

<xarray.DataArray 'model' ()>
array('CanESM5', dtype='<U7')
Coordinates:
    model    <U7 'CanESM5'


In [None]:
ob_all = ob_all.assign(model = source_list)


In [None]:
ob_all.load()
ob_all

In [None]:
ob_all.to_netcdf("global_data_processed_precip.nc", mode = 'w', format = "NETCDF4", engine = 'netcdf4')