## Query Models for TAS

This will be a temporary notebook that will be used to check for all the models that have tas data for the historical simulation and ssp585 simulation. Checking different experiment IDs. 

Afterwards, we can run the process again with all the different models. The models will be kept track with an Excel spreadsheet.

In [2]:
#imports
#imports
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D 
from scipy import stats
import numpy as np
import pandas as pd
import xarray as xr
import zarr
import gcsfs
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature

In [3]:
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')
df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,ps,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
1,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rsds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
2,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlus,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
3,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
4,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706


In [4]:
def load_zarr_dset(df):
  # get the path to a specific zarr store (the first one from the dataframe above)
  zstore = df.zstore.values[-1]

  # create a mutable-mapping-style interface to the store
  mapper = gcs.get_mapper(zstore)

  # open it using xarray and zarr
  ds = xr.open_zarr(mapper, consolidated=True)
  return ds

In [5]:
gcs = gcsfs.GCSFileSystem(token='anon')

### Historical and SSP585

In [22]:
df_hist_tas = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'")
df_ssp585_tas = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp585'")
#  assemble a set of models (source_id) for each experiment + variable combination
models_tas_hist  =set(df_hist_tas.source_id)    # all models that have temperature in the historical simulations
models_tas_ssp585=set(df_ssp585_tas.source_id)  # all models that have temperature in the ssp585     simulation
# let's only keep the models that have all the runs (the intersection of the set)
source_set = set.intersection(models_tas_hist, models_tas_ssp585)
source_list=list(source_set)
len(source_list)

47

In [23]:
print(source_list)

['CNRM-CM6-1', 'EC-Earth3-Veg-LR', 'EC-Earth3-CC', 'GFDL-ESM4', 'NorESM2-MM', 'EC-Earth3', 'EC-Earth3-Veg', 'GISS-E2-1-H', 'NorESM2-LM', 'MRI-ESM2-0', 'MIROC-ES2L', 'FIO-ESM-2-0', 'IITM-ESM', 'E3SM-1-1', 'CAMS-CSM1-0', 'MCM-UA-1-0', 'CESM2-WACCM', 'CESM2', 'GFDL-CM4', 'BCC-CSM2-MR', 'KIOST-ESM', 'CanESM5-CanOE', 'CMCC-ESM2', 'CIESM', 'CAS-ESM2-0', 'FGOALS-g3', 'HadGEM3-GC31-MM', 'INM-CM5-0', 'ACCESS-ESM1-5', 'TaiESM1', 'HadGEM3-GC31-LL', 'INM-CM4-8', 'ACCESS-CM2', 'MPI-ESM1-2-LR', 'IPSL-CM6A-LR', 'CMCC-CM2-SR5', 'CNRM-ESM2-1', 'MIROC6', 'UKESM1-0-LL', 'NESM3', 'FGOALS-f3-L', 'MPI-ESM1-2-HR', 'GISS-E2-1-G', 'AWI-CM-1-1-MR', 'CNRM-CM6-1-HR', 'CanESM5', 'KACE-1-0-G']


In [24]:
df_tas_hist=df.query  ("activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'& member_id=='r1i1p1f1'")
df_tas_ssp585=df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp585'    & member_id=='r1i1p1f1'")

#  assemble a set of models (source_id) for each experiment + variable combination
models_tas_hist  =set(df_tas_hist.source_id)    # all models that have temperature in the historical simulations
models_tas_ssp585=set(df_tas_ssp585.source_id)  # all models that have temperature in the ssp585     simulations

# let's only keep the models that have all the runs (the intersection of the set)
#'MPI-ESM1-2-LR' should alr be removed bc they both have and we account for it in global/regional source_list
source_set_2 = set.intersection(models_tas_hist, models_tas_ssp585)
source_list_2 =list(source_set_2)
len(source_list_2) 

35

In [39]:
#checking r1i1p1f1 time periods
for source in source_list_2:
    #access models
    df_hist_tas = df.query(  "activity_id=='CMIP' & member_id=='r1i1p1f1' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical' & source_id == @source")
    df_ssp585_tas = df.query("activity_id=='ScenarioMIP' & member_id=='r1i1p1f1'  & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp585' & source_id == @source")
    ds_hist_tas = load_zarr_dset(df_hist_tas)
    ds_ssp585_tas = load_zarr_dset(df_ssp585_tas)
    ds_all = xr.concat([ds_hist_tas, ds_ssp585_tas], dim = ('time'), coords='all', compat = 'override')
    print("\n"+source)
    print(ds_all.time[0])
    print(ds_all.time[-1])


EC-Earth3-Veg-LR
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array('2100-12-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 2100-12-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

EC-Earth3-CC
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array('2100-12-16T12:00:00.000000000', dtype='datetime64[ns]')


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



ACCESS-ESM1-5
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(2300, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 2.0
    time     object 2300-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

TaiESM1
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 2.0
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cfti

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



ACCESS-CM2
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(2300, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 2.0
    time     object 2300-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

MPI-ESM1-2-LR
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    _ChunkSizes:    1
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array('20

In [28]:
#remove any duplicate models in source_list and source_list_2
source_list_rest_models = source_list.copy()
for source in source_list_2:
    if source in source_list:
        source_list_rest_models.remove(source)
print(len(source_list_rest_models))
print(source_list_rest_models)

12
['CNRM-CM6-1', 'GISS-E2-1-H', 'MIROC-ES2L', 'MCM-UA-1-0', 'CESM2', 'CanESM5-CanOE', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-LL', 'CNRM-ESM2-1', 'UKESM1-0-LL', 'GISS-E2-1-G', 'CNRM-CM6-1-HR']


In [26]:
#pick member_id that matches historical + ssp585 in remaining 12 tas models that aren't r1i1p1f1
for source in source_list_rest_models:
    #access models
    df_hist_tas = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical' & source_id == @source")
    df_ssp585_tas = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp585' & source_id == @source")
    print("\n"+source)
    print("\nHistorical ID: " + str(df_hist_tas['member_id']))
    print("\nSSP585 ID: " + str(df_ssp585_tas['member_id']))


CNRM-CM6-1

Historical ID: 37798      r1i1p1f2
43397      r2i1p1f2
50745      r7i1p1f2
51000      r9i1p1f2
51729      r6i1p1f2
52023      r5i1p1f2
52225      r4i1p1f2
52482      r8i1p1f2
52588     r10i1p1f2
52706      r3i1p1f2
298319    r26i1p1f2
298375    r14i1p1f2
298507    r18i1p1f2
298615    r21i1p1f2
298746    r23i1p1f2
298785    r22i1p1f2
299004    r16i1p1f2
299006    r17i1p1f2
299188    r15i1p1f2
299362    r13i1p1f2
299807    r30i1p1f2
300089    r27i1p1f2
300130    r19i1p1f2
300202    r20i1p1f2
300425    r25i1p1f2
300812    r12i1p1f2
300831    r28i1p1f2
300968    r11i1p1f2
301021    r24i1p1f2
447991    r29i1p1f2
Name: member_id, dtype: object

SSP585 ID: 54625    r1i1p1f2
71431    r4i1p1f2
74462    r6i1p1f2
75408    r5i1p1f2
75505    r2i1p1f2
75887    r3i1p1f2
Name: member_id, dtype: object

GISS-E2-1-H

Historical ID: 69515      r1i1p1f1
69529      r6i1p1f1
69634      r7i1p1f1
69832      r2i1p1f1
69888      r5i1p1f1
69898      r3i1p1f1
70043      r8i1p1f1
70069      r4i1p1f1
7

In [27]:
#individual query with model and member_id to see if the start time and end time periods are 1850-2100
df_hist_tas = df.query(  "source_id == 'CNRM-ESM2-1' & member_id == 'r2i1p1f2' & activity_id=='CMIP'        & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'")
df_ssp585_tas = df.query("source_id == 'CNRM-ESM2-1' & member_id == 'r2i1p1f2' & activity_id=='ScenarioMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp585'")
ds_hist_tas = load_zarr_dset(df_hist_tas)
ds_ssp585_tas = load_zarr_dset(df_ssp585_tas)
ds_all = xr.concat([ds_hist_tas, ds_ssp585_tas], dim = ('time'), coords='all', compat = 'override')
print(ds_all.time[0])
print(ds_all.time[-1])

<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time
    time_origin:    1850-01-01 00:00:00
<xarray.DataArray 'time' ()>
array('2100-12-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 2100-12-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time
    time_origin:    1850-01-01 00:00:00


In [66]:
df_hist_tas = df.query(  "source_id == 'MPI-ESM1-2-HR' & activity_id=='CMIP'        & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'")
df_ssp585_tas = df.query("source_id == 'MPI-ESM1-2-HR' & activity_id=='ScenarioMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp585'")
print("\nHistorical ID: " + str(df_hist_tas['member_id']))
print("\nSSP585 ID: " + str(df_ssp585_tas['member_id']))


Historical ID: 222674     r2i1p1f1
223289     r3i1p1f1
223309    r10i1p1f1
236917     r9i1p1f1
237230     r1i1p1f1
238391     r8i1p1f1
238515     r5i1p1f1
238958     r6i1p1f1
238998     r4i1p1f1
239039     r7i1p1f1
Name: member_id, dtype: object

SSP585 ID: 240693    r2i1p1f1
240714    r1i1p1f1
Name: member_id, dtype: object


In [67]:
#individual query for better mpi-esm1-2-hr model
df_hist_tas = df.query(  "source_id == 'MPI-ESM1-2-HR' & member_id == 'r2i1p1f1' & activity_id=='CMIP'        & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'")
df_ssp585_tas = df.query("source_id == 'MPI-ESM1-2-HR' & member_id == 'r2i1p1f1' & activity_id=='ScenarioMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp585'")
ds_hist_tas = load_zarr_dset(df_hist_tas)
ds_ssp585_tas = load_zarr_dset(df_ssp585_tas)
ds_all = xr.concat([ds_hist_tas, ds_ssp585_tas], dim = ('time'), coords='all', compat = 'override')
print(ds_all.time[0])
print(ds_all.time[-1])

<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 1850-01-16T12:00:00
    height   float64 2.0
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array('2100-12-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 2100-12-16T12:00:00
    height   float64 2.0
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time


In [68]:
df_hist_tas = df.query(  "source_id == 'AWI-CM-1-1-MR' & activity_id=='CMIP'        & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'")
df_ssp585_tas = df.query("source_id == 'AWI-CM-1-1-MR' & activity_id=='ScenarioMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp585'")
print("\nHistorical ID: " + str(df_hist_tas['member_id']))
print("\nSSP585 ID: " + str(df_ssp585_tas['member_id']))


Historical ID: 459906    r1i1p1f1
459908    r2i1p1f1
459910    r3i1p1f1
459913    r5i1p1f1
459920    r4i1p1f1
Name: member_id, dtype: object

SSP585 ID: 204046    r1i1p1f1
Name: member_id, dtype: object


### 1pctCO2

In [5]:
#query tas models that are for 1pctCO2
df_tas_1pctCO2 = df.query ("activity_id == 'CMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id == '1pctCO2'")
models_tas_1pctCO2 = set(df_tas_1pctCO2['source_id'])
source_list_3 = list(models_tas_1pctCO2)
print(len(source_list_3))

59


In [6]:
df_tas_1pctCO2 = df.query ("activity_id == 'CMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id == '1pctCO2' & member_id == 'r1i1p1f1'")
models_tas_1pctCO2 = set(df_tas_1pctCO2['source_id'])
source_list_4 = list(models_tas_1pctCO2)
print(len(source_list_4))

50


In [7]:
print(source_list_4)

['GISS-E2-1-G', 'INM-CM4-8', 'IPSL-CM5A2-INCA', 'ICON-ESM-LR', 'CIESM', 'CESM2-WACCM', 'SAM0-UNICON', 'GISS-E2-1-H', 'FGOALS-g3', 'EC-Earth3-CC', 'E3SM-1-0', 'TaiESM1', 'KIOST-ESM', 'MRI-ESM2-0', 'INM-CM5-0', 'CMCC-ESM2', 'NorESM2-MM', 'GISS-E2-2-H', 'FGOALS-f3-L', 'MCM-UA-1-0', 'MPI-ESM1-2-LR', 'GFDL-CM4', 'NorESM2-LM', 'FIO-ESM-2-0', 'MIROC6', 'CanESM5', 'AWI-ESM-1-1-LR', 'NorCPM1', 'CESM2-FV2', 'BCC-CSM2-MR', 'CAS-ESM2-0', 'GFDL-ESM4', 'EC-Earth3-Veg', 'ACCESS-CM2', 'CESM2-WACCM-FV2', 'EC-Earth3-AerChem', 'BCC-ESM1', 'CESM2', 'CMCC-CM2-HR4', 'CAMS-CSM1-0', 'NESM3', 'MPI-ESM1-2-HR', 'IITM-ESM', 'KACE-1-0-G', 'AWI-CM-1-1-MR', 'ACCESS-ESM1-5', 'MPI-ESM-1-2-HAM', 'GISS-E2-2-G', 'CMCC-CM2-SR5', 'IPSL-CM6A-LR']


In [8]:
#remove any duplicate models in source_list and source_list_2
source_list_rest_models_2 = source_list_3.copy()
for source in source_list_4:
    if source in source_list_3:
        source_list_rest_models_2.remove(source)
print(len(source_list_rest_models_2))
print(source_list_rest_models_2)

9
['CanESM5-CanOE', 'EC-Earth3', 'CNRM-CM6-1', 'MIROC-ES2L', 'CNRM-ESM2-1', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-LL', 'UKESM1-0-LL', 'CNRM-CM6-1-HR']


In [29]:
for source in source_list_rest_models_2:
    #access models
    df_1pct_CO2 = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='1pctCO2' & source_id == @source")
    print("\n"+source)
    print("\nMember ID: " + str(df_1pct_CO2['member_id']))


MIROC-ES2L

Member ID: 276727    r1i1p1f2
Name: member_id, dtype: object

CanESM5-CanOE

Member ID: 81745    r1i1p2f1
Name: member_id, dtype: object

CNRM-ESM2-1

Member ID: 40723      r1i1p1f2
41263      r2i1p1f2
41577      r3i1p1f2
69204      r4i1p1f2
446915     r7i1p1f2
447033     r9i1p1f2
447416     r8i1p1f2
447716     r6i1p1f2
447737     r5i1p1f2
448057    r10i1p1f2
Name: member_id, dtype: object

UKESM1-0-LL

Member ID: 70692     r1i1p1f2
204829    r2i1p1f2
205962    r3i1p1f2
205986    r4i1p1f2
Name: member_id, dtype: object

EC-Earth3

Member ID: 462694    r3i1p1f1
Name: member_id, dtype: object

CNRM-CM6-1-HR

Member ID: 376104    r1i1p1f2
Name: member_id, dtype: object

HadGEM3-GC31-LL

Member ID: 209341    r1i1p1f3
244310    r2i1p1f3
255404    r3i1p1f3
255623    r4i1p1f3
Name: member_id, dtype: object

HadGEM3-GC31-MM

Member ID: 402100    r1i1p1f3
Name: member_id, dtype: object

CNRM-CM6-1

Member ID: 701    r1i1p1f2
Name: member_id, dtype: object


In [30]:
#checking times for r1i1p1f1 models
for source in source_list_4:
    #access models
    df_1pct_CO2 = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1'& table_id == 'Amon' & variable_id == 'tas' & experiment_id=='1pctCO2' & source_id == @source")
    ds_1pct_CO2 = load_zarr_dset(df_1pct_CO2)
    print("\n"+source)
    print(ds_1pct_CO2.time[0])
    print(ds_1pct_CO2.time[-1])


MCM-UA-1-0
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1, 1, 17, 0, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 0001-01-17 00:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    calendar_type:  noleap
    description:    for time-mean fields
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(500, 12, 17, 0, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 0500-12-17 00:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    calendar_type:  noleap
    description:    for time-mean fields
    long_name:      time
    standard_name:  time

GISS-E2-2-G
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 ...
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_b

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



ACCESS-CM2
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(950, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 ...
    time     object 0950-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(1099, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 ...
    time     object 1099-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

CIESM
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 ...
    time     object 0001-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.Data

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



ACCESS-ESM1-5
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(101, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 ...
    time     object 0101-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(250, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 ...
    time     object 0250-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

CanESM5
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 ...
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarr

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)



MIROC6
<xarray.DataArray 'time' ()>
array(cftime.DatetimeGregorian(3200, 1, 16, 12, 0, 0, 0, has_year_zero=False),
      dtype=object)
Coordinates:
    height   float64 ...
    time     object 3200-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeGregorian(3349, 12, 16, 12, 0, 0, 0, has_year_zero=False),
      dtype=object)
Coordinates:
    height   float64 ...
    time     object 3349-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time


  array = array.get_duck_array()


In [104]:
#query individual model and see if we can find other member_id for model w 1850-2100
df_1pctCO2 = df.query(  "source_id == 'FGOALS-g3' & activity_id=='CMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='1pctCO2'")
print(df_1pctCO2['member_id'])

395504    r1i1p1f1
398232    r3i1p1f1
398243    r2i1p1f1
Name: member_id, dtype: object


In [105]:
#query individual model and see if we can find other member_id for model w 1850-2100
df_1pctCO2 = df.query(  "source_id == 'FGOALS-g3' & member_id == 'r1i1p1f1' & activity_id=='CMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='1pctCO2'")
ds_1pctCO2 = load_zarr_dset(df_1pctCO2)
print(ds_1pctCO2.time[0])
print(ds_1pctCO2.time[-1])

<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(370, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 ...
    time     object 0370-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(526, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 ...
    time     object 0526-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time


### Query for piControl Models for 1pctCO2 Baseline

In [9]:
#prob want same member_id for them, find piControl datasets (of same models)
#need to add in the other 1pctCO2 models and reassign them time index because the initialization times don't actually matter
df_tas_piControl = df.query("activity_id == 'CMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id == 'piControl'")
df_tas_piControl.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
9380,CMIP,NOAA-GFDL,GFDL-CM4,piControl,r1i1p1f1,Amon,tas,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/piCon...,,20180701
18371,CMIP,NOAA-GFDL,GFDL-ESM4,piControl,r1i1p1f1,Amon,tas,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/piCo...,,20180701
31930,CMIP,CNRM-CERFACS,CNRM-CM6-1,piControl,r1i1p1f2,Amon,tas,gr,gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-1/...,,20180814
32235,CMIP,NASA-GISS,GISS-E2-1-G,piControl,r1i1p1f1,Amon,tas,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/pi...,,20180824
40425,CMIP,BCC,BCC-CSM2-MR,piControl,r1i1p1f1,Amon,tas,gn,gs://cmip6/CMIP6/CMIP/BCC/BCC-CSM2-MR/piContro...,,20181016


In [10]:
#individual query to see if piControl has the same member_ids for models
#time periods don't matter for piControl as well for 1pctCO2
df_tas_piControl = df.query("activity_id == 'CMIP' & source_id ==  'CNRM-ESM2-1' & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'tas' & experiment_id == 'piControl'")
df_tas_piControl
#ds_tas_piControl = load_zarr_dset(df_tas_piControl)

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version


In [11]:
#if it doesn't exist, then find new member_id for it where both exists
df_tas_piControl = df.query("activity_id == 'CMIP' & source_id == 'AWI-ESM-1-1-LR' & table_id == 'Amon' & variable_id == 'tas' & experiment_id == 'piControl'")
print("\nMember_ID: " + str(df_tas_piControl['member_id']))


Member_ID: 416433    r1i1p1f1
Name: member_id, dtype: object


In [50]:
#double check in 1pctCO2
df_tas_1pctCO2 = df.query("activity_id == 'CMIP' & source_id == 'HadGEM3-GC31-MM' & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'tas' & experiment_id == '1pctCO2'")
df_tas_1pctCO2

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version


## Query Models for TOA

### Historical + SSP585

In [5]:
#rlut
df_hist_toa_rlut = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical'")
df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585'")
#rsut
df_hist_toa_rsut = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical'")
df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585'")
#rsdt
df_hist_toa_rsdt = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical'")
df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585'")
#models
models_toa_hist_rlut  =set(df_hist_toa_rlut.source_id)
models_toa_hist_rsut  =set(df_hist_toa_rsut.source_id)
models_toa_hist_rsdt  =set(df_hist_toa_rsdt.source_id) # all models that have toa in the historical simulations
models_toa_ssp585_rlut=set(df_ssp585_toa_rlut.source_id) #all models that have toa in the ssp585 simulations
models_toa_ssp585_rsut=set(df_ssp585_toa_rsut.source_id)
models_toa_ssp585_rsdt=set(df_ssp585_toa_rsdt.source_id)

#intersection of all of the models that have all radiation types
source_set_toa = set.intersection(models_toa_hist_rlut, models_toa_hist_rsut, models_toa_hist_rsdt, models_toa_ssp585_rlut, models_toa_ssp585_rsut, models_toa_ssp585_rsdt)
source_list_toa = list(source_set_toa)
print(len(source_list_toa))

41


In [6]:
print(source_list_toa)

['E3SM-1-1', 'INM-CM5-0', 'BCC-CSM2-MR', 'IPSL-CM6A-LR', 'EC-Earth3-Veg', 'CMCC-CM2-SR5', 'TaiESM1', 'EC-Earth3', 'MRI-ESM2-0', 'CNRM-CM6-1-HR', 'KACE-1-0-G', 'NorESM2-LM', 'HadGEM3-GC31-MM', 'EC-Earth3-Veg-LR', 'CAMS-CSM1-0', 'ACCESS-CM2', 'UKESM1-0-LL', 'HadGEM3-GC31-LL', 'CESM2-WACCM', 'KIOST-ESM', 'MIROC-ES2L', 'MPI-ESM1-2-HR', 'MIROC6', 'NESM3', 'FGOALS-g3', 'CanESM5-CanOE', 'INM-CM4-8', 'FIO-ESM-2-0', 'CESM2', 'AWI-CM-1-1-MR', 'MPI-ESM1-2-LR', 'GFDL-ESM4', 'GISS-E2-1-G', 'CIESM', 'IITM-ESM', 'CanESM5', 'FGOALS-f3-L', 'CNRM-ESM2-1', 'GFDL-CM4', 'NorESM2-MM', 'CNRM-CM6-1']


In [7]:
#rlut
df_hist_toa_rlut = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical' & member_id == 'r1i1p1f1'")
df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585' & member_id == 'r1i1p1f1'")
#rsut
df_hist_toa_rsut = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical' & member_id == 'r1i1p1f1'")
df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585' & member_id == 'r1i1p1f1'")
#rsdt
df_hist_toa_rsdt = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical' & member_id == 'r1i1p1f1'")
df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585' & member_id == 'r1i1p1f1'")
#models
models_toa_hist_rlut  =set(df_hist_toa_rlut.source_id)
models_toa_hist_rsut  =set(df_hist_toa_rsut.source_id)
models_toa_hist_rsdt  =set(df_hist_toa_rsdt.source_id) # all models that have toa in the historical simulations
models_toa_ssp585_rlut=set(df_ssp585_toa_rlut.source_id) #all models that have toa in the ssp585 simulations
models_toa_ssp585_rsut=set(df_ssp585_toa_rsut.source_id)
models_toa_ssp585_rsdt=set(df_ssp585_toa_rsdt.source_id)

#intersection of all of the models that have all radiation types
source_set_toa_2 = set.intersection(models_toa_hist_rlut, models_toa_hist_rsut, models_toa_hist_rsdt, models_toa_ssp585_rlut, models_toa_ssp585_rsut, models_toa_ssp585_rsdt)
source_list_toa_2 = list(source_set_toa_2)
print(len(source_list_toa_2))

31


In [8]:
print(source_list_toa_2)

['E3SM-1-1', 'INM-CM5-0', 'BCC-CSM2-MR', 'IPSL-CM6A-LR', 'EC-Earth3-Veg', 'CMCC-CM2-SR5', 'TaiESM1', 'EC-Earth3', 'MRI-ESM2-0', 'KACE-1-0-G', 'NorESM2-LM', 'EC-Earth3-Veg-LR', 'CAMS-CSM1-0', 'ACCESS-CM2', 'CESM2-WACCM', 'KIOST-ESM', 'MPI-ESM1-2-HR', 'MIROC6', 'NESM3', 'FGOALS-g3', 'INM-CM4-8', 'FIO-ESM-2-0', 'AWI-CM-1-1-MR', 'MPI-ESM1-2-LR', 'GFDL-ESM4', 'IITM-ESM', 'CIESM', 'CanESM5', 'FGOALS-f3-L', 'GFDL-CM4', 'NorESM2-MM']


In [48]:
source_list_rest_toa_models = source_list_toa.copy()
for source in source_list_toa_2:
    if source in source_list_toa:
        source_list_rest_toa_models.remove(source)
print(len(source_list_rest_toa_models))
print(source_list_rest_toa_models)

10
['CNRM-CM6-1', 'MIROC-ES2L', 'CESM2', 'CanESM5-CanOE', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-LL', 'CNRM-ESM2-1', 'UKESM1-0-LL', 'GISS-E2-1-G', 'CNRM-CM6-1-HR']


In [30]:
for source in source_list_rest_toa_models:
    #access models
    #rlut
    df_hist_toa_rlut = df.query(  "activity_id=='CMIP' & source_id == @source        & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical'")
    df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP' & source_id == @source  & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585'")
    #rsut
    df_hist_toa_rsut = df.query(  "activity_id=='CMIP' & source_id == @source         & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical'")
    df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP' & source_id == @source   & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585'")
    #rsdt
    df_hist_toa_rsdt = df.query(  "activity_id=='CMIP' & source_id == @source & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical'")
    df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP' & source_id == @source & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585'")
    print("\n"+source)
    print("\nHistorical ID: " + str(df_hist_toa_rlut['member_id']))
    print("\nSSP585 ID: " + str(df_ssp585_toa_rlut['member_id']))


CNRM-CM6-1-HR

Historical ID: 375886    r1i1p1f2
Name: member_id, dtype: object

SSP585 ID: 392368    r1i1p1f2
Name: member_id, dtype: object

MIROC-ES2L

Historical ID: 276954       r2i1p1f2
277065       r3i1p1f2
277182       r1i1p1f2
427210       r8i1p1f2
427297       r9i1p1f2
427748       r6i1p1f2
427771       r7i1p1f2
428217       r5i1p1f2
428507       r4i1p1f2
469457      r10i1p1f2
511322    r1i1000p1f2
515797      r15i1p1f2
515940      r23i1p1f2
516022      r22i1p1f2
516085      r13i1p1f2
516123      r17i1p1f2
516154      r14i1p1f2
516245      r16i1p1f2
516277      r12i1p1f2
516391      r11i1p1f2
516459      r20i1p1f2
516505      r18i1p1f2
516613      r29i1p1f2
516678      r30i1p1f2
516727      r19i1p1f2
516871      r26i1p1f2
516922      r21i1p1f2
516933      r28i1p1f2
517043      r25i1p1f2
Name: member_id, dtype: object

SSP585 ID: 277351     r1i1p1f2
502933     r6i1p1f2
503376     r4i1p1f2
504278     r8i1p1f2
504425     r7i1p1f2
504718    r10i1p1f2
504811     r9i1p1f2
505708  

In [64]:
for source in source_list_rest_toa_models:
    #access models
    #rlut
    df_hist_toa_rlut = df.query(  "activity_id=='CMIP' & source_id == @source        & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical'")
    df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP' & source_id == @source  & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585'")
    #rsut
    df_hist_toa_rsut = df.query(  "activity_id=='CMIP' & source_id == @source         & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical'")
    df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP' & source_id == @source   & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585'")
    #rsdt
    df_hist_toa_rsdt = df.query(  "activity_id=='CMIP' & source_id == @source & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical'")
    df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP' & source_id == @source & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585'")
    print("\n"+source)
    print("\nHistorical ID: " + str(df_hist_toa_rsut['member_id']))
    print("\nSSP585 ID: " + str(df_ssp585_toa_rsut['member_id']))


CNRM-CM6-1

Historical ID: 37627      r1i1p1f2
43346      r2i1p1f2
50761      r7i1p1f2
50930      r9i1p1f2
51725      r6i1p1f2
52019      r5i1p1f2
52200      r4i1p1f2
52450      r8i1p1f2
52611      r3i1p1f2
52620     r10i1p1f2
298312    r22i1p1f2
298390    r14i1p1f2
298511    r18i1p1f2
298589    r21i1p1f2
298735    r23i1p1f2
299001    r17i1p1f2
299041    r16i1p1f2
299184    r15i1p1f2
299383    r13i1p1f2
299643    r19i1p1f2
299675    r30i1p1f2
300222    r27i1p1f2
300299    r20i1p1f2
300429    r25i1p1f2
300835    r28i1p1f2
300840    r12i1p1f2
300911    r26i1p1f2
300972    r11i1p1f2
300994    r24i1p1f2
448232    r29i1p1f2
Name: member_id, dtype: object

SSP585 ID: 54535    r1i1p1f2
71396    r4i1p1f2
74458    r6i1p1f2
75376    r5i1p1f2
75543    r2i1p1f2
75961    r3i1p1f2
Name: member_id, dtype: object

MIROC-ES2L

Historical ID: 276649     r2i1p1f2
277073     r3i1p1f2
277211     r1i1p1f2
427214     r8i1p1f2
427318     r9i1p1f2
427752     r6i1p1f2
427775     r7i1p1f2
428243     r5i1p1f2
42

In [49]:
for source in source_list_rest_toa_models:
    #access models
    #rlut
    df_hist_toa_rlut = df.query(  "activity_id=='CMIP' & source_id == @source        & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical'")
    df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP' & source_id == @source  & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585'")
    #rsut
    df_hist_toa_rsut = df.query(  "activity_id=='CMIP' & source_id == @source         & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical'")
    df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP' & source_id == @source   & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585'")
    #rsdt
    df_hist_toa_rsdt = df.query(  "activity_id=='CMIP' & source_id == @source & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical'")
    df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP' & source_id == @source & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585'")
    print("\n"+source)
    print("\nHistorical ID: " + str(df_hist_toa_rsdt['member_id']))
    print("\nSSP585 ID: " + str(df_ssp585_toa_rsdt['member_id']))


CNRM-CM6-1

Historical ID: 37628      r1i1p1f2
43360      r2i1p1f2
50778      r7i1p1f2
50901      r9i1p1f2
51723      r6i1p1f2
52017      r5i1p1f2
52198      r4i1p1f2
52437      r8i1p1f2
52613      r3i1p1f2
52878     r10i1p1f2
298308    r22i1p1f2
298392    r14i1p1f2
298523    r18i1p1f2
298564    r21i1p1f2
299008    r17i1p1f2
299042    r16i1p1f2
299179    r15i1p1f2
299382    r13i1p1f2
299647    r30i1p1f2
299687    r19i1p1f2
299725    r27i1p1f2
300256    r20i1p1f2
300431    r25i1p1f2
300837    r28i1p1f2
300876    r12i1p1f2
300913    r26i1p1f2
300942    r11i1p1f2
300992    r24i1p1f2
448183    r29i1p1f2
Name: member_id, dtype: object

SSP585 ID: 54630    r1i1p1f2
71398    r4i1p1f2
74426    r6i1p1f2
75373    r5i1p1f2
75541    r2i1p1f2
76031    r3i1p1f2
Name: member_id, dtype: object

MIROC-ES2L

Historical ID: 276668     r2i1p1f2
277091     r3i1p1f2
277208     r1i1p1f2
427212     r8i1p1f2
427296     r9i1p1f2
427354    r10i1p1f2
427750     r6i1p1f2
427805     r7i1p1f2
428242     r5i1p1f2
42

In [62]:
#check for r1i1p1f1
for source in source_list_toa_2:
    #access models
    #rlut
    df_hist_toa_rlut = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & source_id == @source        & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical'")
    df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP' & member_id == 'r1i1p1f1' & source_id == @source  & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585'")
    #rsut
    df_hist_toa_rsut = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & source_id == @source         & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical'")
    df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP' & member_id == 'r1i1p1f1' & source_id == @source   & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585'")
    #rsdt
    df_hist_toa_rsdt = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & source_id == @source & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical'")
    df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP' & member_id == 'r1i1p1f1' & source_id == @source & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585'")
    print("\n"+source)
    print(ds_hist_toa_rsdt.time[0])
    print(ds_ssp585_toa_rsdt.time[-1])


EC-Earth3-Veg-LR
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array('2100-12-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 2100-12-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

GFDL-ESM4
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array('2100-12-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     date

In [56]:
#check for r1i1p1f1
for source in source_list_toa_2:
    #access models
    #rlut
    df_hist_toa_rlut = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & source_id == @source        & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical'")
    df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP' & member_id == 'r1i1p1f1' & source_id == @source  & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585'")
    #rsut
    df_hist_toa_rsut = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & source_id == @source         & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical'")
    df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP' & member_id == 'r1i1p1f1' & source_id == @source   & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585'")
    #rsdt
    df_hist_toa_rsdt = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & source_id == @source & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical'")
    df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP' & member_id == 'r1i1p1f1' & source_id == @source & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585'")
    print("\n"+source)
    print(ds_hist_toa_rsut.time[0])
    print(ds_ssp585_toa_rsut.time[-1])


EC-Earth3-Veg-LR
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array('2100-12-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 2100-12-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

GFDL-ESM4
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array('2100-12-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     date

In [16]:
#check times for r1i1p1fi models
for source in source_list_toa_2:
    #access models
    #rlut
    df_hist_toa_rlut = df.query(  "activity_id=='CMIP'        & source_id == @source & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical'")
    df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP' & source_id == @source & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585'")
    ds_hist_toa_rlut = load_zarr_dset(df_hist_toa_rlut)
    ds_ssp585_toa_rlut = load_zarr_dset(df_ssp585_toa_rlut)
    #rsut
    df_hist_toa_rsut = df.query(  "activity_id=='CMIP'        & source_id == @source & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical'")
    df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP' & source_id == @source & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585'")
    ds_hist_toa_rsut = load_zarr_dset(df_hist_toa_rsut)
    ds_ssp585_toa_rsut = load_zarr_dset(df_ssp585_toa_rsut)
    #rsdt
    df_hist_toa_rsdt = df.query(  "activity_id=='CMIP'        & source_id == @source & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical'")
    df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP' & source_id == @source & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585'")
    ds_hist_toa_rsdt = load_zarr_dset(df_hist_toa_rsdt)
    ds_ssp585_toa_rsdt = load_zarr_dset(df_ssp585_toa_rsdt)
    #check each time span of models
    #could it be that some variables vary in time periods compared to the others in the same model/source_id?
    #we can check each individ variable first and then if it's fine we can concat tg 
    #concat tg to check
    print("\n"+source)
    print(ds_hist_toa_rlut.time[0])
    print(ds_ssp585_toa_rlut.time[-1])


E3SM-1-1
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(2100, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 2100-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

INM-CM5-0
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(2100, 12, 16, 12, 0, 0, 0, has_year_zero=True),


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



MRI-ESM2-0
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(2300, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 2300-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

KACE-1-0-G
<xarray.DataArray 'time' ()>
array(cftime.Datetime360Day(1850, 1, 16, 0, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-16 00:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.Datetime360Day(2100, 12, 16, 0, 0, 0, 0, has_year_zero=True),
      dtype

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



ACCESS-CM2
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(2300, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 2300-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

CESM2-WACCM
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 15, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-15 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    standard_name:  time
    title:          time
    type:           double
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(2299, 12, 15, 12, 0, 0, 0, ha

In [47]:
#individual query with model and member_id to see if the start time and end time periods are 1850-2100
#rlut
df_hist_toa_rlut = df.query(  "activity_id=='CMIP'        & source_id == 'CESM2' & member_id == 'r4i1p1f1' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical'")
df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP' & source_id == 'CESM2' & member_id == 'r4i1p1f1' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585'")
ds_hist_toa_rlut = load_zarr_dset(df_hist_toa_rlut)
ds_ssp585_toa_rlut = load_zarr_dset(df_ssp585_toa_rlut)
#rsut
df_hist_toa_rsut = df.query(  "activity_id=='CMIP'        & source_id == 'CESM2' & member_id == 'r4i1p1f1' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical'")
df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP' & source_id == 'CESM2' & member_id == 'r4i1p1f1' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585'")
ds_hist_toa_rsut = load_zarr_dset(df_hist_toa_rsut)
ds_ssp585_toa_rsut = load_zarr_dset(df_ssp585_toa_rsut)
#rsdt
df_hist_toa_rsdt = df.query(  "activity_id=='CMIP'        & source_id == 'CESM2' & member_id == 'r4i1p1f1' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical'")
df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP' & source_id == 'CESM2' & member_id == 'r4i1p1f1' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585'")
ds_hist_toa_rsdt = load_zarr_dset(df_hist_toa_rsdt)
ds_ssp585_toa_rsdt = load_zarr_dset(df_ssp585_toa_rsdt)
print("\n"+source)
print(ds_hist_toa_rlut.time[0])
print(ds_ssp585_toa_rlut.time[-1])


NorESM2-MM
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 15, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-15 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    standard_name:  time
    title:          time
    type:           double
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(2100, 12, 15, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 2100-12-15 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    standard_name:  time
    title:          time
    type:           double


In [8]:
#individual query with model and member_id to see if the start time and end time periods are 1850-2100
#rlut
df_hist_toa_rlut = df.query(  "activity_id=='CMIP'        & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical'")
df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585'")
ds_hist_toa_rlut = load_zarr_dset(df_hist_toa_rlut)
ds_ssp585_toa_rlut = load_zarr_dset(df_ssp585_toa_rlut)
#rsut
df_hist_toa_rsut = df.query(  "activity_id=='CMIP'        & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical'")
df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585'")
ds_hist_toa_rsut = load_zarr_dset(df_hist_toa_rsut)
ds_ssp585_toa_rsut = load_zarr_dset(df_ssp585_toa_rsut)
#rsdt
df_hist_toa_rsdt = df.query(  "activity_id=='CMIP'        & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical'")
df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585'")
ds_hist_toa_rsdt = load_zarr_dset(df_hist_toa_rsdt)
ds_ssp585_toa_rsdt = load_zarr_dset(df_ssp585_toa_rsdt)
print(ds_hist_toa_rsut.time[0])
print(ds_ssp585_toa_rsut.time[-1])

<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time
    time_origin:    1850-01-01 00:00:00
<xarray.DataArray 'time' ()>
array('2100-12-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 2100-12-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time
    time_origin:    1850-01-01 00:00:00


In [27]:
df['variable_id'].unique()

array(['ps', 'rsds', 'rlus', 'rlds', 'psl', 'prw', 'hurs', 'huss', 'hus',
       'hfss', 'rsus', 'evspsbl', 'rsdt', 'hfls', 'rsut', 'clt', 'zg',
       'ts', 'va', 'uas', 'vas', 'tauv', 'tauu', 'tas', 'ta', 'ua', 'pr',
       'prc', 'rsutcs', 'wtem', 'vtem', 'prsn', 'rlut', 'rlutcs',
       'tasmax', 'tasmin', 'emidust', 'emiss', 'mmrbc', 'mmrdust',
       'mmroa', 'mmrpm2p5', 'o3', 'mmrsoa', 'mmrss', 'od550lt1aer', 'oh',
       'emidms', 'mmrso4', 'cltc', 'ptp', 'airmass', 'ccb', 'cdnc', 'toz',
       'so2', 'rsutcsaf', 'wa', 'rlutcsaf', 'rlutaf', 'od870aer',
       'od550aer', 'abs550aer', 'rsutaf', 'snw', 'mrsos', 'mrso', 'mrro',
       'areacella', 'siconc', 'basin', 'mrros', 'mlotst', 'clivi', 'hur',
       'sfdsi', 'co2mass', 'rsntds', 'masso', 'soga', 'evspsblsoi', 'sos',
       'sosga', 'tauuo', 'sfcWind', 'clwvi', 'vo', 'vmo', 'uo', 'umo',
       'tosga', 'tauvo', 'tos', 'thetao', 'thetaoga', 'pbo', 'thkcello',
       'orog', 'volo', 'wfo', 'cllcalipso', 'evspsblpot', 'wap', '

In [192]:
#additional variables (rsutcs and rlutcs) for CRE
#rsutcs
df_hist_toa_rsutcs = df.query(  "activity_id=='CMIP'        & source_id == 'CESM2' & member_id == 'r10i1p1f1' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='historical'")
df_ssp585_toa_rsutcs = df.query("activity_id=='ScenarioMIP' & source_id == 'CESM2' & member_id == 'r10i1p1f1' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='ssp585'")
ds_hist_toa_rsutcs = load_zarr_dset(df_hist_toa_rsutcs)
ds_ssp585_toa_rsutcs = load_zarr_dset(df_ssp585_toa_rsutcs)
#rlutcs
df_hist_toa_rlutcs = df.query(  "activity_id=='CMIP'        & source_id == 'CESM2' & member_id == 'r10i1p1f1' & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='historical'")
df_ssp585_toa_rlutcs = df.query("activity_id=='ScenarioMIP' & source_id == 'CESM2' & member_id == 'r10i1p1f1' & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='ssp585'")
ds_hist_toa_rlutcs = load_zarr_dset(df_hist_toa_rlutcs)
ds_ssp585_toa_rlutcs = load_zarr_dset(df_ssp585_toa_rlutcs)

print(ds_hist_toa_rsutcs.time[0])
print(ds_ssp585_toa_rsutcs.time[-1])

<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 15, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-15 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    standard_name:  time
    title:          time
    type:           double
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(2100, 12, 15, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 2100-12-15 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    standard_name:  time
    title:          time
    type:           double


In [194]:
#see if it exists CRE
#rlut
df_hist_toa_rlut = df.query(  "activity_id=='CMIP'        & source_id == 'CESM2' & member_id == 'r10i1p1f1' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='historical'")
df_ssp585_toa_rlut = df.query("activity_id=='ScenarioMIP' & source_id == 'CESM2' & member_id == 'r10i1p1f1' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='ssp585'")
ds_hist_toa_rlut = load_zarr_dset(df_hist_toa_rlut)
ds_ssp585_toa_rlut = load_zarr_dset(df_ssp585_toa_rlut)
#rsut
df_hist_toa_rsut = df.query(  "activity_id=='CMIP'        & source_id == 'CESM2' & member_id == 'r10i1p1f1' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='historical'")
df_ssp585_toa_rsut = df.query("activity_id=='ScenarioMIP' & source_id == 'CESM2' & member_id == 'r10i1p1f1' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='ssp585'")
ds_hist_toa_rsut = load_zarr_dset(df_hist_toa_rsut)
ds_ssp585_toa_rsut = load_zarr_dset(df_ssp585_toa_rsut)
#rsdt
df_hist_toa_rsdt = df.query(  "activity_id=='CMIP'        & source_id == 'CESM2' & member_id == 'r10i1p1f1' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='historical'")
df_ssp585_toa_rsdt = df.query("activity_id=='ScenarioMIP' & source_id == 'CESM2' & member_id == 'r10i1p1f1' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='ssp585'")
ds_hist_toa_rsdt = load_zarr_dset(df_hist_toa_rsdt)
ds_ssp585_toa_rsdt = load_zarr_dset(df_ssp585_toa_rsdt)

print(ds_hist_toa_rsdt.time[0])
print(ds_ssp585_toa_rsdt.time[-1])

<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 15, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-15 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    standard_name:  time
    title:          time
    type:           double
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(2100, 12, 15, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 2100-12-15 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    standard_name:  time
    title:          time
    type:           double


### Abrupt4xCO2 + piControl

In [5]:
#query all models for toa radiation variables for abrupt-4xCO2
#rlut
df_abrupt4xCO2_toa_rlut = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='abrupt-4xCO2'")
#rsut
df_abrupt4xCO2_toa_rsut = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='abrupt-4xCO2'")
#rsdt
df_abrupt4xCO2_toa_rsdt = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='abrupt-4xCO2'")
#models
models_toa_abrupt4xCO2_rlut  = set(df_abrupt4xCO2_toa_rlut.source_id)
models_toa_abrupt4xCO2_rsut  = set(df_abrupt4xCO2_toa_rsut.source_id)
models_toa_abrupt4xCO2_rsdt  = set(df_abrupt4xCO2_toa_rsdt.source_id)

#intersection of all of the models that have all radiation types
source_set_abrupt4xCO2 = set.intersection(models_toa_abrupt4xCO2_rlut, models_toa_abrupt4xCO2_rsut, models_toa_abrupt4xCO2_rsdt)
source_list_abrupt4xCO2 = list(source_set_abrupt4xCO2)
print(len(source_set_abrupt4xCO2))

50


In [6]:
#query all models for toa radiation variables for abrupt-4xCO2 w/ r1i1p1f1
#rlut
df_abrupt4xCO2_toa_rlut = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='abrupt-4xCO2'")
#rsut
df_abrupt4xCO2_toa_rsut = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='abrupt-4xCO2'")
#rsdt
df_abrupt4xCO2_toa_rsdt = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='abrupt-4xCO2'")
#models
models_toa_abrupt4xCO2_rlut  = set(df_abrupt4xCO2_toa_rlut.source_id)
models_toa_abrupt4xCO2_rsut  = set(df_abrupt4xCO2_toa_rsut.source_id)
models_toa_abrupt4xCO2_rsdt  = set(df_abrupt4xCO2_toa_rsdt.source_id)

#intersection of all of the models that have all radiation types
source_set_abrupt4xCO2_2 = set.intersection(models_toa_abrupt4xCO2_rlut, models_toa_abrupt4xCO2_rsut, models_toa_abrupt4xCO2_rsdt)
source_list_abrupt4xCO2_2 = list(source_set_abrupt4xCO2_2)
print(len(source_list_abrupt4xCO2_2))

42


In [7]:
print(source_list_abrupt4xCO2_2)

['GISS-E2-1-G', 'E3SM-1-0', 'IITM-ESM', 'CAS-ESM2-0', 'INM-CM5-0', 'CMCC-ESM2', 'NorCPM1', 'FIO-ESM-2-0', 'CESM2', 'GISS-E2-2-G', 'MPI-ESM1-2-HR', 'EC-Earth3-Veg', 'CMCC-CM2-SR5', 'CIESM', 'BCC-CSM2-MR', 'ACCESS-CM2', 'KACE-1-0-G', 'INM-CM4-8', 'FGOALS-f3-L', 'CESM2-WACCM-FV2', 'NESM3', 'BCC-ESM1', 'CAMS-CSM1-0', 'CESM2-FV2', 'SAM0-UNICON', 'CanESM5', 'IPSL-CM6A-LR', 'TaiESM1', 'GFDL-CM4', 'MPI-ESM-1-2-HAM', 'MIROC6', 'GFDL-ESM4', 'ACCESS-ESM1-5', 'EC-Earth3-AerChem', 'MPI-ESM1-2-LR', 'MRI-ESM2-0', 'CESM2-WACCM', 'AWI-CM-1-1-MR', 'KIOST-ESM', 'NorESM2-MM', 'FGOALS-g3', 'GISS-E2-1-H']


In [8]:
#find models that don't have r1i1p1f1
source_abrupt4xCO2_rest_models = source_list_abrupt4xCO2.copy()
for source in source_list_abrupt4xCO2_2:
    if source in source_list_abrupt4xCO2:
        source_abrupt4xCO2_rest_models.remove(source)
print(source_abrupt4xCO2_rest_models)

['CNRM-ESM2-1', 'CNRM-CM6-1', 'UKESM1-0-LL', 'HadGEM3-GC31-MM', 'EC-Earth3', 'MIROC-ES2L', 'CNRM-CM6-1-HR', 'HadGEM3-GC31-LL']


In [9]:
for source in source_abrupt4xCO2_rest_models:
    #rlut
    df_abrupt4xCO2_toa_rlut = df.query(  "activity_id=='CMIP' & source_id == @source & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='abrupt-4xCO2'")
    #rsut
    df_abrupt4xCO2_toa_rsut = df.query(  "activity_id=='CMIP' & source_id == @source & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='abrupt-4xCO2'")
    #rsdt
    df_abrupt4xCO2_toa_rsdt = df.query(  "activity_id=='CMIP' & source_id == @source & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='abrupt-4xCO2'")
    print("\n"+source)
    print(str(df_abrupt4xCO2_toa_rsdt['member_id']))


HadGEM3-GC31-LL
209320    r1i1p1f3
Name: member_id, dtype: object

CNRM-ESM2-1
41086    r1i1p1f2
41445    r2i1p1f2
53110    r3i1p1f2
Name: member_id, dtype: object

MIROC-ES2L
277179    r1i1p1f2
Name: member_id, dtype: object

EC-Earth3
383118    r3i1p1f1
439980    r8i1p1f1
Name: member_id, dtype: object

CNRM-CM6-1-HR
375847    r1i1p1f2
Name: member_id, dtype: object

HadGEM3-GC31-MM
403165    r1i1p1f3
Name: member_id, dtype: object

UKESM1-0-LL
70766    r1i1p1f2
Name: member_id, dtype: object

CNRM-CM6-1
21545    r1i1p1f2
39473    r6i1p1f2
39676    r5i1p1f2
39721    r4i1p1f2
39843    r3i1p1f2
39912    r2i1p1f2
Name: member_id, dtype: object


In [10]:
#individual check member_id for other models
#rlut
df_abrupt4xCO2_toa_rlut = df.query(  "activity_id=='CMIP' & source_id ==  'CNRM-CM6-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='abrupt-4xCO2'")
ds_abrupt4xCO2_toa_rlut = load_zarr_dset(df_abrupt4xCO2_toa_rlut)
#rsut
df_abrupt4xCO2_toa_rsut = df.query(  "activity_id=='CMIP' & source_id ==  'CNRM-CM6-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='abrupt-4xCO2'")
ds_abrupt4xCO2_toa_rsut = load_zarr_dset(df_abrupt4xCO2_toa_rsut)
#rsdt
df_abrupt4xCO2_toa_rsdt = df.query(  "activity_id=='CMIP' & source_id ==  'CNRM-CM6-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='abrupt-4xCO2'")
ds_abrupt4xCO2_toa_rsdt = load_zarr_dset(df_abrupt4xCO2_toa_rsdt)
#check member_id
print("\n"+source)
print(ds_abrupt4xCO2_toa_rsdt.time[0])
print(ds_abrupt4xCO2_toa_rsdt.time[-1])


CNRM-CM6-1
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time
    time_origin:    1850-01-01 00:00:00
<xarray.DataArray 'time' ()>
array('1999-12-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 1999-12-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time
    time_origin:    1850-01-01 00:00:00


In [11]:
#check r1i1p1f1 models
for source in source_list_abrupt4xCO2_2:
    #rlut
    df_abrupt4xCO2_toa_rlut = df.query(  "activity_id=='CMIP' & source_id == @source & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='abrupt-4xCO2'")
    ds_abrupt4xCO2_toa_rlut = load_zarr_dset(df_abrupt4xCO2_toa_rlut)
    #rsut
    df_abrupt4xCO2_toa_rsut = df.query(  "activity_id=='CMIP' & source_id == @source & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='abrupt-4xCO2'")
    ds_abrupt4xCO2_toa_rsut = load_zarr_dset(df_abrupt4xCO2_toa_rsut)
    #rsdt
    df_abrupt4xCO2_toa_rsdt = df.query(  "activity_id=='CMIP' & source_id == @source & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='abrupt-4xCO2'")
    ds_abrupt4xCO2_toa_rsdt = load_zarr_dset(df_abrupt4xCO2_toa_rsdt)
    #check member_id
    print("\n"+source)
    print(ds_abrupt4xCO2_toa_rlut.time[0])
    print(ds_abrupt4xCO2_toa_rlut.time[-1])


NorCPM1
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 0001-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(150, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 0150-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

GFDL-CM4
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 0001-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    calendar_type:  noleap
    description:    Temporal mean
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoL

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



ACCESS-ESM1-5
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(101, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 0101-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(250, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 0250-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

GFDL-ESM4
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 0001-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    calendar_type:  noleap
    description:    Temporal mean
    long_name:      time
    standard_name:  time
<xarray.DataArray 't

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



ACCESS-CM2
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(950, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 0950-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(1099, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1099-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

INM-CM4-8
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1999, 12, 16, 12, 0, 0,

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



IPSL-CM6A-LR
<xarray.DataArray 'time' ()>
array(cftime.DatetimeGregorian(1850, 1, 16, 12, 0, 0, 0, has_year_zero=False),
      dtype=object)
Coordinates:
    time     object 1850-01-16 12:00:00
Attributes:
    _ChunkSizes:    1
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeGregorian(2749, 12, 16, 12, 0, 0, 0, has_year_zero=False),
      dtype=object)
Coordinates:
    time     object 2749-12-16 12:00:00
Attributes:
    _ChunkSizes:    1
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time

KIOST-ESM
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 17, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-17 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



MIROC6
<xarray.DataArray 'time' ()>
array(cftime.DatetimeGregorian(3200, 1, 16, 12, 0, 0, 0, has_year_zero=False),
      dtype=object)
Coordinates:
    time     object 3200-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeGregorian(3449, 12, 16, 12, 0, 0, 0, has_year_zero=False),
      dtype=object)
Coordinates:
    time     object 3449-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

GISS-E2-2-G
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(2000, 12, 16, 12, 0, 0, 0, has_year_zero

In [127]:
#check r1i1f1p1 models in piControl too with indivdiual query
#rlut
df_toa_piControl_rlut = df.query("activity_id == 'CMIP' & source_id == 'CNRM-CM6-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id == 'piControl'")
ds_toa_piControl_rlut = load_zarr_dset(df_toa_piControl_rlut)
#rsut
df_toa_piControl_rsut = df.query("activity_id == 'CMIP' & source_id == 'CNRM-CM6-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id == 'piControl'")
ds_toa_piControl_rsut = load_zarr_dset(df_toa_piControl_rsut)
#rsdt
df_toa_piControl_rsdt = df.query("activity_id == 'CMIP' & source_id == 'CNRM-CM6-1' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id == 'piControl'")
ds_toa_piControl_rsdt = load_zarr_dset(df_toa_piControl_rsdt)
#check current variable
print(ds_toa_piControl_rsdt.source_id)
print(ds_toa_piControl_rsdt.time[0])
print(ds_toa_piControl_rsdt.time[-1])

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)


CNRM-CM6-1
<xarray.DataArray 'time' ()>
array(cftime.DatetimeGregorian(1850, 1, 16, 12, 0, 0, 0, has_year_zero=False),
      dtype=object)
Coordinates:
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time
    time_origin:    1850-01-01 00:00:00
<xarray.DataArray 'time' ()>
array(cftime.DatetimeGregorian(2349, 12, 16, 12, 0, 0, 0, has_year_zero=False),
      dtype=object)
Coordinates:
    time     object 2349-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time
    time_origin:    1850-01-01 00:00:00


  array = array.get_duck_array()
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()


In [122]:
#check if it exists for piControl r1i1p1f1
#rlut
df_toa_piControl_rlut = df.query("activity_id == 'CMIP' & source_id == 'MIROC-ES2L' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id == 'piControl'")
#rsut
df_toa_piControl_rsut = df.query("activity_id == 'CMIP' & source_id == 'MIROC-ES2L' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id == 'piControl'")
#rsdt
df_toa_piControl_rsdt = df.query("activity_id == 'CMIP' & source_id == 'MIROC-ES2L' & member_id == 'r1i1p1f2' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id == 'piControl'")

df_toa_piControl_rlut

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
277099,CMIP,MIROC,MIROC-ES2L,piControl,r1i1p1f2,Amon,rlut,gn,gs://cmip6/CMIP6/CMIP/MIROC/MIROC-ES2L/piContr...,,20190823


In [75]:
#find other member_ids for the ones that didn't match for abrupt4xCO2
#rlut
df_toa_abrupt4xCO2_rlut = df.query("activity_id == 'CMIP' & source_id == 'UKESM1-0-LL' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id == 'abrupt-4xCO2'")
#rsut
df_toa_abrupt4xCO2_rsut = df.query("activity_id == 'CMIP' & source_id == 'UKESM1-0-LL' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id == 'abrupt-4xCO2'")
#rsdt
df_toa_abrupt4xCO2_rsdt = df.query("activity_id == 'CMIP' & source_id == 'UKESM1-0-LL' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id == 'abrupt-4xCO2'")
#check member_ids
print(df_toa_abrupt4xCO2_rsdt['source_id'])
print(df_toa_abrupt4xCO2_rsdt['member_id'])

70766    UKESM1-0-LL
Name: source_id, dtype: object
70766    r1i1p1f2
Name: member_id, dtype: object


In [70]:
#check for new member_id time for abrupt4xCO2
#rlut
df_toa_abrupt4xCO2_rlut = df.query("activity_id == 'CMIP' & source_id == 'CanESM5' & member_id == 'r1i1p2f1' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id == 'abrupt-4xCO2'")
ds_toa_abrupt4xCO2_rlut = load_zarr_dset(df_toa_abrupt4xCO2_rlut)
#rsut
df_toa_abrupt4xCO2_rsut = df.query("activity_id == 'CMIP' & source_id == 'CanESM5' & member_id == 'r1i1p2f1' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id == 'abrupt-4xCO2'")
ds_toa_abrupt4xCO2_rsut = load_zarr_dset(df_toa_abrupt4xCO2_rsut)
#rsdt
df_toa_abrupt4xCO2_rsdt = df.query("activity_id == 'CMIP' & source_id == 'CanESM5' & member_id == 'r1i1p2f1' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id == 'abrupt-4xCO2'")
ds_toa_abrupt4xCO2_rsdt = load_zarr_dset(df_toa_abrupt4xCO2_rsdt)
#check times
print(ds_toa_abrupt4xCO2_rsut.time[0])
print(ds_toa_abrupt4xCO2_rsut.time[-1])

<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(2000, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    time     object 2000-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time


In [73]:
#find other member_ids for the ones that didn't match for piControl
#rlut
df_toa_piControl_rlut = df.query("activity_id == 'CMIP' & source_id == 'FGOALS-f3-L' & member_id == 'r3i1p1f1' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id == 'piControl'")
ds_toa_piControl_rlut = load_zarr_dset(df_toa_piControl_rlut)
#rsut
df_toa_piControl_rsut = df.query("activity_id == 'CMIP' & source_id == 'FGOALS-f3-L' & member_id == 'r3i1p1f1' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id == 'piControl'")
ds_toa_piControl_rsut = load_zarr_dset(df_toa_piControl_rsut)
#rsdt
df_toa_piControl_rsdt = df.query("activity_id == 'CMIP' & source_id == 'FGOALS-f3-L' & member_id == 'r3i1p1f1' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id == 'piControl'")
ds_toa_piControl_rsdt = load_zarr_dset(df_toa_piControl_rsdt)
#check times
print(ds_toa_piControl_rsdt.time[0])
print(ds_toa_piControl_rsdt.time[-1])

IndexError: index -1 is out of bounds for axis 0 with size 0

In [8]:
#check if cre variables are there
#additional variables (rsutcs and rlutcs) for CRE
#rsutcs
df_toa_abrupt4xCO2_rsutcs = df.query(  "activity_id=='CMIP'  & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='abrupt-4xCO2'")
df_toa_piControl_rsutcs = df.query(    "activity_id=='CMIP'  & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='piControl'")
ds_toa_abrupt4xCO2_rsutcs = load_zarr_dset(df_toa_abrupt4xCO2_rsutcs)
ds_toa_piControl_rsutcs = load_zarr_dset(df_toa_piControl_rsutcs)
#rlutcs
df_toa_abrupt4xCO2_rlutcs = df.query(  "activity_id=='CMIP' & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='abrupt-4xCO2'")
df_toa_piControl_rlutcs = df.query(    "activity_id=='CMIP' & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='piControl'")
ds_toa_abrupt4xCO2_rlutcs = load_zarr_dset(df_toa_abrupt4xCO2_rlutcs)
ds_toa_piControl_rlutcs = load_zarr_dset(df_toa_piControl_rlutcs)

print(ds_toa_abrupt4xCO2_rlutcs.time[0])
print(ds_toa_abrupt4xCO2_rlutcs.time[-1])

IndexError: index -1 is out of bounds for axis 0 with size 0

In [10]:
#check if cre variables are there
#additional variables (rsutcs and rlutcs) for CRE
#rsutcs
df_toa_abrupt4xCO2_rsutcs = df.query(  "activity_id=='CMIP'  & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='abrupt-4xCO2'")
df_toa_piControl_rsutcs = df.query(    "activity_id=='CMIP'  & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='piControl'")
ds_toa_abrupt4xCO2_rsutcs = load_zarr_dset(df_toa_abrupt4xCO2_rsutcs)
ds_toa_piControl_rsutcs = load_zarr_dset(df_toa_piControl_rsutcs)
#rlutcs
df_toa_abrupt4xCO2_rlutcs = df.query(  "activity_id=='CMIP' & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='abrupt-4xCO2'")
df_toa_piControl_rlutcs = df.query(    "activity_id=='CMIP' & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='piControl'")
ds_toa_abrupt4xCO2_rlutcs = load_zarr_dset(df_toa_abrupt4xCO2_rlutcs)
ds_toa_piControl_rlutcs = load_zarr_dset(df_toa_piControl_rlutcs)

print(ds_toa_piControl_rsutcs.time[0])
print(ds_toa_piControl_rsutcs.time[-1])

IndexError: index -1 is out of bounds for axis 0 with size 0

In [14]:
#do cre variables exist
#rsutcs
df_toa_abrupt4xCO2_rsutcs = df.query(  "activity_id=='CMIP'  & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='abrupt-4xCO2'")
df_toa_piControl_rsutcs = df.query(    "activity_id=='CMIP'  & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='piControl'")

#rlutcs
df_toa_abrupt4xCO2_rlutcs = df.query(  "activity_id=='CMIP' & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='abrupt-4xCO2'")
df_toa_piControl_rlutcs = df.query(    "activity_id=='CMIP' & source_id == 'CAMS-CSM1-0' & member_id == 'r2i1p1f1' & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='piControl'")

#df_toa_abrupt4xCO2_rlutcs
df_toa_piControl_rlutcs

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version


In [7]:
#check for other member_ids that exist for CRE variables that don't exist for previously chosen TOA member_ids
#rsutcs
df_toa_abrupt4xCO2_rsutcs = df.query(  "activity_id=='CMIP'  & source_id ==   'CAMS-CSM1-0' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='abrupt-4xCO2'")
df_toa_piControl_rsutcs = df.query(    "activity_id=='CMIP'  & source_id ==   'CAMS-CSM1-0' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='piControl'")

#rlutcs
df_toa_abrupt4xCO2_rlutcs = df.query(  "activity_id=='CMIP' & source_id ==   'CAMS-CSM1-0' & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='abrupt-4xCO2'")
df_toa_piControl_rlutcs = df.query(    "activity_id=='CMIP' & source_id ==   'CAMS-CSM1-0'  & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='piControl'")

print(df_toa_abrupt4xCO2_rlutcs['source_id'])
print(df_toa_abrupt4xCO2_rlutcs['member_id'])

217615    CAMS-CSM1-0
245467    CAMS-CSM1-0
Name: source_id, dtype: object
217615    r1i1p1f1
245467    r2i1p1f1
Name: member_id, dtype: object


In [None]:
#check for other member_ids for piControl
#rsutcs
df_toa_abrupt4xCO2_rsutcs = df.query(  "activity_id=='CMIP'  & source_id == 'FGOALS-g3' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='abrupt-4xCO2'")
df_toa_piControl_rsutcs = df.query(    "activity_id=='CMIP'  & source_id == 'FGOALS-g3' & table_id == 'Amon' & variable_id == 'rsutcs' & experiment_id=='piControl'")

#rlutcs
df_toa_abrupt4xCO2_rlutcs = df.query(  "activity_id=='CMIP' & source_id == 'FGOALS-g3' & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='abrupt-4xCO2'")
df_toa_piControl_rlutcs = df.query(    "activity_id=='CMIP' & source_id == 'FGOALS-g3' & table_id == 'Amon' & variable_id == 'rlutcs' & experiment_id=='piControl'")

print(df_toa_piControl_rlutcs['source_id'])
print(df_toa_piControl_rlutcs['member_id'])

## EOF for Different RCP (TAS)

### Historical & SSP 126

In [14]:
scenario = df[df['activity_id'] == 'ScenarioMIP']
scenario.experiment_id.unique()

array(['ssp585', 'ssp245', 'ssp119', 'ssp370', 'ssp126', 'ssp460',
       'ssp434', 'ssp534-over', 'rcp26-cmip5', 'rcp45-cmip5',
       'rcp85-cmip5'], dtype=object)

In [12]:
df_hist_tas = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'")
df_ssp126_tas = df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp126'")
models_tas_hist  =set(df_hist_tas.source_id)    # all models that have temperature in the historical simulations
models_tas_ssp126=set(df_ssp126_tas.source_id)  # all models that have temperature in the ssp126     simulation
# let's only keep the models that have all the runs (the intersection of the set)
source_set = set.intersection(models_tas_hist, models_tas_ssp126)
source_list=list(source_set)
len(source_list)

45

In [13]:
print(source_list)

['TaiESM1', 'NorESM2-LM', 'CanESM5', 'INM-CM4-8', 'NESM3', 'UKESM1-0-LL', 'CNRM-CM6-1-HR', 'ACCESS-ESM1-5', 'CAMS-CSM1-0', 'EC-Earth3', 'AWI-CM-1-1-MR', 'CMCC-CM2-SR5', 'MPI-ESM1-2-LR', 'CESM2-WACCM', 'CIESM', 'IITM-ESM', 'FGOALS-f3-L', 'GISS-E2-1-H', 'MPI-ESM1-2-HR', 'BCC-CSM2-MR', 'IPSL-CM6A-LR', 'GFDL-ESM4', 'INM-CM5-0', 'CNRM-CM6-1', 'FIO-ESM-2-0', 'GISS-E2-1-G', 'CESM2', 'FGOALS-g3', 'MIROC6', 'KACE-1-0-G', 'CanESM5-CanOE', 'EC-Earth3-Veg', 'MCM-UA-1-0', 'HadGEM3-GC31-MM', 'IPSL-CM5A2-INCA', 'HadGEM3-GC31-LL', 'EC-Earth3-Veg-LR', 'MIROC-ES2L', 'CMCC-ESM2', 'CAS-ESM2-0', 'KIOST-ESM', 'NorESM2-MM', 'MRI-ESM2-0', 'ACCESS-CM2', 'CNRM-ESM2-1']


In [14]:
df_tas_hist=df.query  ("activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'& member_id=='r1i1p1f1'")
df_tas_ssp126=df.query("activity_id=='ScenarioMIP'  & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp126'    & member_id=='r1i1p1f1'")

#  assemble a set of models (source_id) for each experiment + variable combination
models_tas_hist  =set(df_tas_hist.source_id)    # all models that have temperature in the historical simulations
models_tas_ssp126=set(df_tas_ssp126.source_id)  # all models that have temperature in the ssp126     simulations

source_set_2 = set.intersection(models_tas_hist, models_tas_ssp126)
source_list_2 =list(source_set_2)
len(source_list_2) 

33

In [15]:
source_list_2

['TaiESM1',
 'NorESM2-LM',
 'CanESM5',
 'INM-CM4-8',
 'NESM3',
 'ACCESS-ESM1-5',
 'CAMS-CSM1-0',
 'EC-Earth3',
 'AWI-CM-1-1-MR',
 'CMCC-CM2-SR5',
 'MPI-ESM1-2-LR',
 'CESM2-WACCM',
 'CIESM',
 'IITM-ESM',
 'FGOALS-f3-L',
 'MPI-ESM1-2-HR',
 'BCC-CSM2-MR',
 'IPSL-CM6A-LR',
 'GFDL-ESM4',
 'INM-CM5-0',
 'FIO-ESM-2-0',
 'FGOALS-g3',
 'MIROC6',
 'KACE-1-0-G',
 'EC-Earth3-Veg',
 'IPSL-CM5A2-INCA',
 'EC-Earth3-Veg-LR',
 'CMCC-ESM2',
 'CAS-ESM2-0',
 'KIOST-ESM',
 'NorESM2-MM',
 'MRI-ESM2-0',
 'ACCESS-CM2']

In [8]:
for source in source_list_2:
    #access models
    df_hist_tas = df.query(  "activity_id=='CMIP' & member_id=='r1i1p1f1' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical' & source_id == @source")
    df_ssp126_tas = df.query("activity_id=='ScenarioMIP' & member_id=='r1i1p1f1'  & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp126' & source_id == @source")
    ds_hist_tas = load_zarr_dset(df_hist_tas)
    ds_ssp126_tas = load_zarr_dset(df_ssp126_tas)
    ds_all = xr.concat([ds_hist_tas, ds_ssp126_tas], dim = ('time'), coords='all', compat = 'override')
    print("\n"+source)
    print(ds_all.time[0])
    print(ds_all.time[-1])


TaiESM1
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 2.0
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(2100, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 2.0
    time     object 2100-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

CMCC-CM2-SR5
<xarray.DataArray 'time' ()>
array(cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 2.0
    time     object 1850-01-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



ACCESS-ESM1-5
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(2300, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 2.0
    time     object 2300-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

EC-Earth3-Veg-LR
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array('2100-12-16T12:00:0

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  array = array.get_duck_array()



ACCESS-CM2
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array(cftime.DatetimeProlepticGregorian(2300, 12, 16, 12, 0, 0, 0, has_year_zero=True),
      dtype=object)
Coordinates:
    height   float64 2.0
    time     object 2300-12-16 12:00:00
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time

MPI-ESM1-2-LR
<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    _ChunkSizes:    1
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time
<xarray.DataArray 'time' ()>
array('20

In [9]:
df_hist_tas = df.query(  "source_id ==  'NorESM2-LM' & activity_id=='CMIP'        & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'")
df_ssp126_tas = df.query("source_id ==  'NorESM2-LM' & activity_id=='ScenarioMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp126'")
print("\nHistorical ID: " + str(df_hist_tas['member_id']))
print("\nSSP126 ID: " + str(df_ssp126_tas['member_id']))


Historical ID: 254492    r1i1p1f1
294562    r3i1p1f1
294626    r2i1p1f1
Name: member_id, dtype: object

SSP126 ID: 380392    r1i1p1f1
Name: member_id, dtype: object


In [10]:
df_hist_tas = df.query(  "source_id ==  'NorESM2-MM' & activity_id=='CMIP'        & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'")
df_ssp126_tas = df.query("source_id ==  'NorESM2-MM' & activity_id=='ScenarioMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp126'")
print("\nHistorical ID: " + str(df_hist_tas['member_id']))
print("\nSSP5126 ID: " + str(df_ssp126_tas['member_id']))


Historical ID: 380148    r1i1p1f1
417693    r2i1p1f1
458117    r3i1p1f1
Name: member_id, dtype: object

SSP5126 ID: 379207    r1i1p1f1
Name: member_id, dtype: object


In [16]:
#remove any duplicate models in source_list and source_list_2
source_list_rest_models = source_list.copy()
for source in source_list_2:
    if source in source_list:
        source_list_rest_models.remove(source)
print(len(source_list_rest_models))
print(source_list_rest_models)

12
['UKESM1-0-LL', 'CNRM-CM6-1-HR', 'GISS-E2-1-H', 'CNRM-CM6-1', 'GISS-E2-1-G', 'CESM2', 'CanESM5-CanOE', 'MCM-UA-1-0', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-LL', 'MIROC-ES2L', 'CNRM-ESM2-1']


In [17]:
source_list_rest_models

['UKESM1-0-LL',
 'CNRM-CM6-1-HR',
 'GISS-E2-1-H',
 'CNRM-CM6-1',
 'GISS-E2-1-G',
 'CESM2',
 'CanESM5-CanOE',
 'MCM-UA-1-0',
 'HadGEM3-GC31-MM',
 'HadGEM3-GC31-LL',
 'MIROC-ES2L',
 'CNRM-ESM2-1']

In [44]:
df_hist_tas = df.query(  "source_id ==   'CNRM-ESM2-1' & activity_id=='CMIP'        & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'")
df_ssp126_tas = df.query("source_id ==   'CNRM-ESM2-1' & activity_id=='ScenarioMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp126'")
print("\nHistorical ID: " + str(df_hist_tas['member_id']))
print("\nSSP5126 ID: " + str(df_ssp126_tas['member_id']))


Historical ID: 44180      r1i1p1f2
50646      r3i1p1f2
51290      r5i1p1f2
51360      r2i1p1f2
52351      r4i1p1f2
406557    r10i1p1f2
406858     r9i1p1f2
406970     r8i1p1f2
407181     r7i1p1f2
430469    r11i1p1f2
Name: member_id, dtype: object

SSP5126 ID: 68935    r1i1p1f2
72111    r2i1p1f2
73540    r5i1p1f2
76246    r4i1p1f2
76383    r3i1p1f2
Name: member_id, dtype: object


In [46]:
#individual query 
df_hist_tas = df.query(  "source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & activity_id=='CMIP'        & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='historical'")
df_ssp126_tas = df.query("source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & activity_id=='ScenarioMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id=='ssp126'")
ds_hist_tas = load_zarr_dset(df_hist_tas)
ds_ssp126_tas = load_zarr_dset(df_ssp126_tas)
ds_all = xr.concat([ds_hist_tas, ds_ssp126_tas], dim = ('time'), coords='all', compat = 'override')
print(ds_all.time[0])
print(ds_all.time[-1])

<xarray.DataArray 'time' ()>
array('1850-01-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 1850-01-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time
    time_origin:    1850-01-01 00:00:00
<xarray.DataArray 'time' ()>
array('2100-12-16T12:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    height   float64 2.0
    time     datetime64[ns] 2100-12-16T12:00:00
Attributes:
    axis:           T
    bounds:         time_bounds
    long_name:      Time axis
    standard_name:  time
    time_origin:    1850-01-01 00:00:00
