# Meta Data Maker

In [1]:
import xarray as xr
import numpy as np
import warnings
import cftime
from importlib import reload
import os, sys
from glob import glob
import constants
sys.path.append(constants.MODULE_DIR)
import json
# Custom plots for signal to noise functions.
import open_ds
import xarray_class_accessors as xca
warnings.filterwarnings('ignore')
import open_ds
import signal_to_noise as sn
import xarray_extender as xce

# Loading Data

In [5]:
# Note: Cannot use FILE_NAME_DICT forn this, as this was used in the creation of FILE_NAME_DICT
fnames_abrupt4x = list(map(os.path.basename, glob(os.path.join(constants.LONGRUNMIP_RETIMED_DIR, '*abrupt4x*'))))
fnames_abrupt4x

['tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025.nc',
 'tas_mon_ECEARTH_abrupt4x_150_g025.nc',
 'tas_ann_HadGEM2_abrupt4x_1299_g025.nc',
 'tas_mon_GISSE2R_abrupt4x_5001_g025.nc',
 'tas_mon_MPIESM12_abrupt4x_999_g025.nc',
 'tas_mon_CNRMCM61_abrupt4x_1850_g025.nc',
 'tas_mon_CCSM3_abrupt4x_2120_g025.nc',
 'tas_mon_MIROC32_abrupt4x_150_g025.nc',
 'tas_ann_MPIESM11_abrupt4x_4459_g025.nc',
 'tas_mon_CESM104_abrupt4x_5900_g025.nc',
 'tas_ann_FAMOUS_abrupt4x_3000_g025.nc',
 'tas_mon_HadCM3L_abrupt4x_1000_g025.nc',
 'tas_mon_IPSLCM5A_abrupt4x_1000_g025.nc',
 'tas_ann_GFDLCM3_abrupt4x_150_g025.nc',
 'tas_ann_GFDLESM2M_abrupt4x_150_g025.nc']

In [6]:
len(fnames_abrupt4x)

15

In [7]:
models_abrupt4x = np.array([fname.split('_')[2] for fname in fnames_abrupt4x])
models_abrupt4x

array(['ECHAM5MPIOM', 'ECEARTH', 'HadGEM2', 'GISSE2R', 'MPIESM12',
       'CNRMCM61', 'CCSM3', 'MIROC32', 'MPIESM11', 'CESM104', 'FAMOUS',
       'HadCM3L', 'IPSLCM5A', 'GFDLCM3', 'GFDLESM2M'], dtype='<U11')

In [8]:
abrupt4x_ds = open_ds.read_and_merge_netcdfs(fnames_abrupt4x, constants.LONGRUNMIP_RETIMED_DIR, verbose=True)
abrupt4x_ds

tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025.nc
<xarray.DataArray 'echam5mpiom' (time: 1001, lat: 72, lon: 144)>
[10378368 values with dtype=float32]
Coordinates:
  * time     (time) object 0000-12-31 00:00:00 ... 1000-12-31 00:00:00
  * lon      (lon) float64 1.25 3.75 6.25 8.75 11.25 ... 351.2 353.8 356.2 358.8
  * lat      (lat) float64 -88.75 -86.25 -83.75 -81.25 ... 83.75 86.25 88.75
-----------
tas_mon_ECEARTH_abrupt4x_150_g025.nc
<xarray.DataArray 'ecearth' (time: 150, lat: 72, lon: 144)>
[1555200 values with dtype=float32]
Coordinates:
  * time     (time) object 0000-12-31 00:00:00 ... 0149-12-31 00:00:00
  * lon      (lon) float64 1.25 3.75 6.25 8.75 11.25 ... 351.2 353.8 356.2 358.8
  * lat      (lat) float64 -88.75 -86.25 -83.75 -81.25 ... 83.75 86.25 88.75
-----------
tas_ann_HadGEM2_abrupt4x_1299_g025.nc
<xarray.DataArray 'hadgem2' (time: 1299, lat: 72, lon: 144)>
[13468032 values with dtype=float32]
Coordinates:
  * lon      (lon) float64 1.25 3.75 6.25 8.75 11.25 ... 351.2 353

In [12]:
fnames_control = list(map(os.path.basename, glob(os.path.join(constants.LONGRUNMIP_RETIMED_DIR, '*control*'))))
fnames_control 

['tas_ann_HadGEM2_control_239_g025.nc',
 'tas_mon_HadCM3L_control_1000_g025.nc',
 'tas_mon_GFDLCM3_control_5200_g025.nc',
 'tas_mon_CCSM3_control_1530_g025.nc',
 'tas_mon_GISSE2R_control_5225_g025.nc',
 'tas_ann_MPIESM11_control_2000_g025.nc',
 'tas_mon_IPSLCM5A_control_1000_g025.nc',
 'tas_mon_ECHAM5MPIOM_control_100_g025.nc',
 'tas_mon_GFDLESM2M_control_1340_g025.nc',
 'tas_mon_MPIESM12_control_1237_g025.nc',
 'tas_ann_GFDLESM2M_control_150_g025.nc',
 'tas_ann_FAMOUS_control_3000_g025.nc',
 'tas_mon_CESM104_control_1000_g025.nc',
 'tas_mon_ECEARTH_control_508_g025.nc',
 'tas_ann_GFDLCM3_control_150_g025.nc',
 'tas_mon_MIROC32_control_680_g025.nc',
 'tas_mon_CNRMCM61_control_2000_g025.nc']

In [15]:
control_ds = open_ds.read_and_merge_netcdfs(fnames_control, constants.LONGRUNMIP_RETIMED_DIR)
control_ds

tas_ann_HadGEM2_control_239_g025.nc
 - Dropping coords ['height']
tas_mon_HadCM3L_control_1000_g025.nc
tas_mon_GFDLCM3_control_5200_g025.nc
tas_mon_CCSM3_control_1530_g025.nc
tas_mon_GISSE2R_control_5225_g025.nc
tas_ann_MPIESM11_control_2000_g025.nc
tas_mon_IPSLCM5A_control_1000_g025.nc
 - Dropping coords ['height']
tas_mon_ECHAM5MPIOM_control_100_g025.nc
tas_mon_GFDLESM2M_control_1340_g025.nc
tas_mon_MPIESM12_control_1237_g025.nc
tas_ann_GFDLESM2M_control_150_g025.nc
tas_ann_FAMOUS_control_3000_g025.nc
 - Dropping coords ['height']
tas_mon_CESM104_control_1000_g025.nc
tas_mon_ECEARTH_control_508_g025.nc
tas_ann_GFDLCM3_control_150_g025.nc
tas_mon_MIROC32_control_680_g025.nc
tas_mon_CNRMCM61_control_2000_g025.nc
 - Dropping coords ['height']


In [16]:
abrupt_models = list(abrupt4x_ds.data_vars)
abrupt_models

['echam5mpiom',
 'ecearth',
 'hadgem2',
 'gisse2r',
 'mpiesm12',
 'cnrmcm61',
 'ccsm3',
 'miroc32',
 'mpiesm11',
 'cesm104',
 'famous',
 'hadcm3l',
 'ipslcm5a',
 'gfdlcm3',
 'gfdlesm2m']

In [17]:
len(abrupt_models)

15

In [18]:
control_models = list(control_ds.data_vars)
control_models

['hadgem2',
 'hadcm3l',
 'gfdlcm3',
 'ccsm3',
 'gisse2r',
 'mpiesm11',
 'ipslcm5a',
 'echam5mpiom',
 'gfdlesm2m',
 'mpiesm12',
 'famous',
 'cesm104',
 'ecearth',
 'miroc32',
 'cnrmcm61']

In [19]:
len(control_models)

15

In [20]:
# Need the intersection: just want the models that have abrupt 4x and a control.
models = np.intersect1d(list(control_ds.data_vars),  list(abrupt4x_ds.data_vars))
models

array(['ccsm3', 'cesm104', 'cnrmcm61', 'ecearth', 'echam5mpiom', 'famous',
       'gfdlcm3', 'gfdlesm2m', 'gisse2r', 'hadcm3l', 'hadgem2',
       'ipslcm5a', 'miroc32', 'mpiesm11', 'mpiesm12'], dtype='<U11')

In [21]:
control_ds = control_ds[models]
abrupt4x_ds = abrupt4x_ds[models]

In [22]:
len(models)

15

# Model Information

In [23]:
# Getting the lenght of each model
model_lengths = {}
for model in models:
    exp_len = abrupt4x_ds[model].attrs['length']
    control_len = control_ds[model].attrs['length']
    model_lengths[model] = {'control': control_len, '4xCO2': exp_len}

In [24]:
model_lengths

{'ccsm3': {'control': 1530, '4xCO2': 2120},
 'cesm104': {'control': 1000, '4xCO2': 5900},
 'cnrmcm61': {'control': 2000, '4xCO2': 1850},
 'ecearth': {'control': 508, '4xCO2': 150},
 'echam5mpiom': {'control': 100, '4xCO2': 1001},
 'famous': {'control': 3000, '4xCO2': 3000},
 'gfdlcm3': {'control': 5200, '4xCO2': 150},
 'gfdlesm2m': {'control': 1340, '4xCO2': 150},
 'gisse2r': {'control': 5225, '4xCO2': 5001},
 'hadcm3l': {'control': 1000, '4xCO2': 1000},
 'hadgem2': {'control': 239, '4xCO2': 1299},
 'ipslcm5a': {'control': 1000, '4xCO2': 1000},
 'miroc32': {'control': 681, '4xCO2': 150},
 'mpiesm11': {'control': 2000, '4xCO2': 4459},
 'mpiesm12': {'control': 1237, '4xCO2': 1000}}

In [25]:
MIN_ACCEPTABLE_LENGTH = 700

In [26]:
good_models = {model: len_obj for model, len_obj in model_lengths.items() 
               if len_obj['control'] > MIN_ACCEPTABLE_LENGTH and len_obj['4xCO2'] > MIN_ACCEPTABLE_LENGTH}

In [27]:
bad_models = {model: len_obj for model, len_obj in model_lengths.items() 
               if len_obj['control'] < MIN_ACCEPTABLE_LENGTH and len_obj['4xCO2'] < MIN_ACCEPTABLE_LENGTH}

In [28]:
bad_models

{'ecearth': {'control': 508, '4xCO2': 150},
 'miroc32': {'control': 681, '4xCO2': 150}}

In [29]:
good_models

{'ccsm3': {'control': 1530, '4xCO2': 2120},
 'cesm104': {'control': 1000, '4xCO2': 5900},
 'cnrmcm61': {'control': 2000, '4xCO2': 1850},
 'famous': {'control': 3000, '4xCO2': 3000},
 'gisse2r': {'control': 5225, '4xCO2': 5001},
 'hadcm3l': {'control': 1000, '4xCO2': 1000},
 'ipslcm5a': {'control': 1000, '4xCO2': 1000},
 'mpiesm11': {'control': 2000, '4xCO2': 4459},
 'mpiesm12': {'control': 1237, '4xCO2': 1000}}

In [30]:
len(good_models)

9

In [31]:
meta_data = {'from_file': '04_mutimodel_mean_top_level_(nb27)', 'data_type':'lonrunmip',
             'min_acceptable_length':MIN_ACCEPTABLE_LENGTH
            }

In [32]:
longrunmip_model_lengths = {'all_models': model_lengths, 'good_models': good_models,
                            'bad_models': bad_models, 'metadata': meta_data, }


In [33]:
with open('data/longrunmip_model_lengths.json', 'w') as fp:
    json.dump(longrunmip_model_lengths, fp)

In [34]:
good_model_list = list(good_models)
good_model_list

['ccsm3',
 'cesm104',
 'cnrmcm61',
 'famous',
 'gisse2r',
 'hadcm3l',
 'ipslcm5a',
 'mpiesm11',
 'mpiesm12']