## Preprocessing Global TOA Data

We want the data needed to compute climate sensitivity, radiative forcing, and radiative feedbacks.

In [8]:
#imports
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D 
from scipy import stats
import cftime
import numpy as np
import pandas as pd
import xarray as xr
import zarr
import gcsfs
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature

In [9]:
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')
df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,ps,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
1,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rsds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
2,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlus,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
3,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
4,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706


In [10]:
gcs = gcsfs.GCSFileSystem(token='anon')

In [11]:
def load_zarr_dset(df):
  # get the path to a specific zarr store (the first one from the dataframe above)
  zstore = df.zstore.values[-1]

  # create a mutable-mapping-style interface to the store
  mapper = gcs.get_mapper(zstore)

  # open it using xarray and zarr
  ds = xr.open_zarr(mapper, consolidated=True)
  return ds

In [12]:
#time index to assign model times to
new_time_index=pd.date_range('1850-01-01', '2099-12-31', freq='ME')
new_time_index

DatetimeIndex(['1850-01-31', '1850-02-28', '1850-03-31', '1850-04-30',
               '1850-05-31', '1850-06-30', '1850-07-31', '1850-08-31',
               '1850-09-30', '1850-10-31',
               ...
               '2099-03-31', '2099-04-30', '2099-05-31', '2099-06-30',
               '2099-07-31', '2099-08-31', '2099-09-30', '2099-10-31',
               '2099-11-30', '2099-12-31'],
              dtype='datetime64[ns]', length=3000, freq='ME')

In [13]:
#rlut
df_hist_toa_rlut = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical' & member_id == 'r1i1p1f1'")
df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585' & member_id == 'r1i1p1f1'")
#rsut
df_hist_toa_rsut = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical' & member_id == 'r1i1p1f1'")
df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585' & member_id == 'r1i1p1f1'")
#rsdt
df_hist_toa_rsdt = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical' & member_id == 'r1i1p1f1'")
df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585' & member_id == 'r1i1p1f1'")
#models
models_toa_hist_rlut  =set(df_hist_toa_rlut.source_id)
models_toa_hist_rsut  =set(df_hist_toa_rsut.source_id)
models_toa_hist_rsdt  =set(df_hist_toa_rsdt.source_id) # all models that have toa in the historical simulations
models_toa_ssp585_rlut=set(df_ssp585_toa_rlut.source_id) #all models that have toa in the ssp585 simulations
models_toa_ssp585_rsut=set(df_ssp585_toa_rsut.source_id)
models_toa_ssp585_rsdt=set(df_ssp585_toa_rsdt.source_id)

#intersection of all of the models that have all radiation types
source_set_toa_2 = set.intersection(models_toa_hist_rlut, models_toa_hist_rsut, models_toa_hist_rsdt, models_toa_ssp585_rlut, models_toa_ssp585_rsut, models_toa_ssp585_rsdt)
source_list_toa_2 = list(source_set_toa_2)
print(len(source_list_toa_2))

31


In [14]:
print(source_list_toa_2)

['INM-CM4-8', 'EC-Earth3', 'FGOALS-f3-L', 'NESM3', 'CanESM5', 'IITM-ESM', 'CESM2-WACCM', 'E3SM-1-1', 'GFDL-CM4', 'AWI-CM-1-1-MR', 'CMCC-CM2-SR5', 'BCC-CSM2-MR', 'CAMS-CSM1-0', 'INM-CM5-0', 'MPI-ESM1-2-HR', 'KACE-1-0-G', 'TaiESM1', 'NorESM2-LM', 'NorESM2-MM', 'MIROC6', 'CIESM', 'IPSL-CM6A-LR', 'KIOST-ESM', 'ACCESS-CM2', 'MRI-ESM2-0', 'MPI-ESM1-2-LR', 'EC-Earth3-Veg', 'FIO-ESM-2-0', 'FGOALS-g3', 'EC-Earth3-Veg-LR', 'GFDL-ESM4']


### 1.1 Look at One Model

In [17]:
#query data
#historical data
df_toa_hist_canesm5_rlut = df.query(  "activity_id=='CMIP' & source_id == 'CanESM5' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical' & member_id == 'r1i1p1f1'")
df_toa_hist_canesm5_rsut = df.query(  "activity_id=='CMIP' & source_id == 'CanESM5' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical' & member_id == 'r1i1p1f1'")
df_toa_hist_canesm5_rsdt = df.query(  "activity_id=='CMIP' & source_id == 'CanESM5' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical' & member_id == 'r1i1p1f1'")
ds_toa_hist_canesm5_rlut = load_zarr_dset(df_toa_hist_canesm5_rlut)
ds_toa_hist_canesm5_rsut = load_zarr_dset(df_toa_hist_canesm5_rsut)
ds_toa_hist_canesm5_rsdt = load_zarr_dset(df_toa_hist_canesm5_rsdt)
ds_toa_hist_canesm5 = xr.concat([ds_toa_hist_canesm5_rlut, ds_toa_hist_canesm5_rsut, ds_toa_hist_canesm5_rsdt], dim = ('time'), coords = 'all', compat = 'override')
#ssp585 data
df_toa_ssp585_canesm5_rlut = df.query("activity_id=='ScenarioMIP' & source_id == 'CanESM5' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585' & member_id == 'r1i1p1f1'")
df_toa_ssp585_canesm5_rsut = df.query("activity_id=='ScenarioMIP' & source_id == 'CanESM5' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585' & member_id == 'r1i1p1f1'")
df_toa_ssp585_canesm5_rsdt = df.query("activity_id=='ScenarioMIP' & source_id == 'CanESM5' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585' & member_id == 'r1i1p1f1'")
ds_toa_ssp585_canesm5_rlut = load_zarr_dset(df_toa_ssp585_canesm5_rlut)
ds_toa_ssp585_canesm5_rsut = load_zarr_dset(df_toa_ssp585_canesm5_rsut)
ds_toa_ssp585_canesm5_rsdt = load_zarr_dset(df_toa_ssp585_canesm5_rsdt)
ds_toa_ssp585_canesm5 = xr.concat([ds_toa_ssp585_canesm5_rlut, ds_toa_ssp585_canesm5_rsut, ds_toa_ssp585_canesm5_rsdt], dim = ('time'), coords = 'all', compat = 'override')

#load toa data
hist_toa_canesm5 = ds_toa_hist_canesm5.rsdt - ds_toa_hist_canesm5.rsut - ds_toa_hist_canesm5.rlut
ssp585_toa_canesm5 = ds_toa_ssp585_canesm5.rsdt - ds_toa_ssp585_canesm5.rsut - ds_toa_ssp585_canesm5.rlut
ds_all_canesm5 = xr.concat([hist_toa_canesm5, ssp585_toa_canesm5], dim = 'time', coords = 'all', compat = 'override'

Unnamed: 0,Array,Chunk
Bytes,5.80 MiB,1.93 MiB
Shape,"(5940, 64, 2)","(1980, 64, 2)"
Dask graph,3 chunks in 10 graph layers,3 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 5.80 MiB 1.93 MiB Shape (5940, 64, 2) (1980, 64, 2) Dask graph 3 chunks in 10 graph layers Data type float64 numpy.ndarray",2  64  5940,

Unnamed: 0,Array,Chunk
Bytes,5.80 MiB,1.93 MiB
Shape,"(5940, 64, 2)","(1980, 64, 2)"
Dask graph,3 chunks in 10 graph layers,3 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.60 MiB,3.87 MiB
Shape,"(5940, 128, 2)","(1980, 128, 2)"
Dask graph,3 chunks in 10 graph layers,3 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 11.60 MiB 3.87 MiB Shape (5940, 128, 2) (1980, 128, 2) Dask graph 3 chunks in 10 graph layers Data type float64 numpy.ndarray",2  128  5940,

Unnamed: 0,Array,Chunk
Bytes,11.60 MiB,3.87 MiB
Shape,"(5940, 128, 2)","(1980, 128, 2)"
Dask graph,3 chunks in 10 graph layers,3 chunks in 10 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,92.81 kiB,30.94 kiB
Shape,"(5940, 2)","(1980, 2)"
Dask graph,3 chunks in 7 graph layers,3 chunks in 7 graph layers
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 92.81 kiB 30.94 kiB Shape (5940, 2) (1980, 2) Dask graph 3 chunks in 7 graph layers Data type object numpy.ndarray",2  5940,

Unnamed: 0,Array,Chunk
Bytes,92.81 kiB,30.94 kiB
Shape,"(5940, 2)","(1980, 2)"
Dask graph,3 chunks in 7 graph layers,3 chunks in 7 graph layers
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,185.62 MiB,129.38 MiB
Shape,"(5940, 64, 128)","(4140, 64, 128)"
Dask graph,4 chunks in 13 graph layers,4 chunks in 13 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 185.62 MiB 129.38 MiB Shape (5940, 64, 128) (4140, 64, 128) Dask graph 4 chunks in 13 graph layers Data type float32 numpy.ndarray",128  64  5940,

Unnamed: 0,Array,Chunk
Bytes,185.62 MiB,129.38 MiB
Shape,"(5940, 64, 128)","(4140, 64, 128)"
Dask graph,4 chunks in 13 graph layers,4 chunks in 13 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,185.62 MiB,80.62 MiB
Shape,"(5940, 64, 128)","(2580, 64, 128)"
Dask graph,4 chunks in 13 graph layers,4 chunks in 13 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 185.62 MiB 80.62 MiB Shape (5940, 64, 128) (2580, 64, 128) Dask graph 4 chunks in 13 graph layers Data type float32 numpy.ndarray",128  64  5940,

Unnamed: 0,Array,Chunk
Bytes,185.62 MiB,80.62 MiB
Shape,"(5940, 64, 128)","(2580, 64, 128)"
Dask graph,4 chunks in 13 graph layers,4 chunks in 13 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,185.62 MiB,142.50 MiB
Shape,"(5940, 64, 128)","(4560, 64, 128)"
Dask graph,4 chunks in 13 graph layers,4 chunks in 13 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 185.62 MiB 142.50 MiB Shape (5940, 64, 128) (4560, 64, 128) Dask graph 4 chunks in 13 graph layers Data type float32 numpy.ndarray",128  64  5940,

Unnamed: 0,Array,Chunk
Bytes,185.62 MiB,142.50 MiB
Shape,"(5940, 64, 128)","(4560, 64, 128)"
Dask graph,4 chunks in 13 graph layers,4 chunks in 13 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
