## Preprocessing Global TOA Data for Abrupt4XCO2

 Preprocessing global TOA data for abrupt4xCO2 and piControl simulations.


In [1]:
#imports
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D 
from scipy import stats
import cftime
import numpy as np
import pandas as pd
import xarray as xr
import zarr
import gcsfs
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature

In [2]:
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')
df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,ps,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
1,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rsds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
2,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlus,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
3,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
4,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706


In [3]:
gcs = gcsfs.GCSFileSystem(token='anon')

In [4]:
def load_zarr_dset(df):
  # get the path to a specific zarr store (the first one from the dataframe above)
  zstore = df.zstore.values[-1]

  # create a mutable-mapping-style interface to the store
  mapper = gcs.get_mapper(zstore)

  # open it using xarray and zarr
  ds = xr.open_zarr(mapper, consolidated=True)
  return ds

In [5]:
#time index to assign abrupt4xCO2 model times to
new_time_index_abrupt =pd.date_range('1850-01-01', '1999-12-31', freq='ME')
new_time_index_abrupt 

DatetimeIndex(['1850-01-31', '1850-02-28', '1850-03-31', '1850-04-30',
               '1850-05-31', '1850-06-30', '1850-07-31', '1850-08-31',
               '1850-09-30', '1850-10-31',
               ...
               '1999-03-31', '1999-04-30', '1999-05-31', '1999-06-30',
               '1999-07-31', '1999-08-31', '1999-09-30', '1999-10-31',
               '1999-11-30', '1999-12-31'],
              dtype='datetime64[ns]', length=1800, freq='ME')

In [6]:
#query all models for toa radiation variables for abrupt-4xCO2
#rlut
df_abrupt4xCO2_toa_rlut = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='abrupt-4xCO2'")
#rsut
df_abrupt4xCO2_toa_rsut = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='abrupt-4xCO2'")
#rsdt
df_abrupt4xCO2_toa_rsdt = df.query(  "activity_id=='CMIP'         & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='abrupt-4xCO2'")
#models
models_toa_abrupt4xCO2_rlut  = set(df_abrupt4xCO2_toa_rlut.source_id)
models_toa_abrupt4xCO2_rsut  = set(df_abrupt4xCO2_toa_rsut.source_id)
models_toa_abrupt4xCO2_rsdt  = set(df_abrupt4xCO2_toa_rsdt.source_id)

#intersection of all of the models that have all radiation types
source_set_abrupt4xCO2 = set.intersection(models_toa_abrupt4xCO2_rlut, models_toa_abrupt4xCO2_rsut, models_toa_abrupt4xCO2_rsdt)
source_list_abrupt4xCO2 = list(source_set_abrupt4xCO2)
print(len(source_set_abrupt4xCO2))

50


In [7]:
#query all models for toa radiation variables for abrupt-4xCO2 w/ r1i1p1f1
#rlut
df_abrupt4xCO2_toa_rlut = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rlut' & experiment_id=='abrupt-4xCO2'")
#rsut
df_abrupt4xCO2_toa_rsut = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rsut' & experiment_id=='abrupt-4xCO2'")
#rsdt
df_abrupt4xCO2_toa_rsdt = df.query(  "activity_id=='CMIP' & member_id == 'r1i1p1f1' & table_id == 'Amon' & variable_id == 'rsdt' & experiment_id=='abrupt-4xCO2'")
#models
models_toa_abrupt4xCO2_rlut  = set(df_abrupt4xCO2_toa_rlut.source_id)
models_toa_abrupt4xCO2_rsut  = set(df_abrupt4xCO2_toa_rsut.source_id)
models_toa_abrupt4xCO2_rsdt  = set(df_abrupt4xCO2_toa_rsdt.source_id)

#intersection of all of the models that have all radiation types
source_set_abrupt4xCO2_2 = set.intersection(models_toa_abrupt4xCO2_rlut, models_toa_abrupt4xCO2_rsut, models_toa_abrupt4xCO2_rsdt)
source_list_abrupt4xCO2_2 = list(source_set_abrupt4xCO2_2)
print(len(source_list_abrupt4xCO2_2))

42


In [9]:
#remove models
model_list = [ 'CMCC-CM2-SR5',
               'MPI-ESM-1-2-HAM',
               'EC-Earth3-AerChem',
               'FGOALS-g3',
               'IITM-ESM',
               'CIESM',
               'CAS-ESM2-0']


In [10]:
for source in model_list:
    if source in source_list_abrupt4xCO2_2:
        source_list_abrupt4xCO2_2.remove(source)
print(len(source_list_abrupt4xCO2_2))
print(source_list_abrupt4xCO2_2)

35
['GISS-E2-1-H', 'FGOALS-f3-L', 'ACCESS-ESM1-5', 'GISS-E2-1-G', 'E3SM-1-0', 'GFDL-CM4', 'BCC-ESM1', 'NorESM2-MM', 'KIOST-ESM', 'NESM3', 'FIO-ESM-2-0', 'ACCESS-CM2', 'CESM2-WACCM', 'MIROC6', 'CESM2', 'CESM2-WACCM-FV2', 'CanESM5', 'INM-CM5-0', 'BCC-CSM2-MR', 'CMCC-ESM2', 'TaiESM1', 'MRI-ESM2-0', 'INM-CM4-8', 'MPI-ESM1-2-LR', 'KACE-1-0-G', 'SAM0-UNICON', 'GISS-E2-2-G', 'NorCPM1', 'MPI-ESM1-2-HR', 'IPSL-CM6A-LR', 'CAMS-CSM1-0', 'CESM2-FV2', 'EC-Earth3-Veg', 'AWI-CM-1-1-MR', 'GFDL-ESM4']


### 1.1 Look at One Model

In [11]:
#query data
