# Time Analysis

* Time analysis of lungrunmip data
* Number of models for each experiment.
This notebook contains an exploration of the timesteps in the longrunmip data.
The notebooks also contains a time override. The models in my study all need to have the same timestamps - year since initialisation. The easiest way to go about this is overriding all time stamps and adding back in new time depending if annual or monthly (ann or monn in file name). 

In [137]:
import os, sys, warnings, cftime
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot 
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
import constants
sys.path.append(constants.MODULE_DIR)
from importlib import reload
from time import perf_counter
from typing import List
import re
import xarray_class_accessors as xca
import utils
import open_ds

# Files

In [146]:
def refactor_longronmip_netcdf(ds: xr.Dataset) -> xr.Dataset:
    ds = open_ds.refactor_dims(ds)

    freq = 'M' if re.search('\w+mon\w+', fname) else 'Y'
    print(f'| {freq=} |', end='')
    ds['time'] = open_ds.make_new_time(ds, freq=freq, debug=False)
    
    
    if freq == 'M':
        
        print(f' resample=True, old_length = {len(ds.time.values)}, ', end='')
        ds = ds.resample(time='Y').mean()
    print(f' length = {len(ds.time.values)}')
        
    return ds
            

In [147]:
os.listdir(constants.LONGRUNMIP_DIR)

['tas', 'netTOA', 'pr', '.ipynb_checkpoints', 'psl', 'sic']

In [148]:
# The directory where all of the longrun mip data used in this notebook is stored.
variable = 'pr'
ROOT_DIR = os.path.join(constants.LONGRUNMIP_DIR, variable, 'regrid')

In [156]:
# All the files
files = os.listdir(ROOT_DIR)
utils.pprint_list(files)

lenght = 50
 0. pr_mon_ECEARTH_rcp85_1271_g025.nc
 1. pr_mon_MPIESM12_control_1237_g025.nc


In [157]:
OUTPUT_DIR = os.path.join(constants.LONGRUNMIP_DIR, variable, 'regrid_retimestamped')
OUTPUT_DIR

'/g/data/w40/ab2313/PhD/longrunmip/pr/regrid_retimestamped'

In [158]:
try:
    os.mkdir(OUTPUT_DIR)
except FileExistsError as e:
    pass

In [None]:
failed_files = []
for i, fname in enumerate(files[37:]):
    t1 = perf_counter()
    
    print(f'- {fname}', end='')
    fpath = os.path.join(ROOT_DIR, fname)
    try:
        ds = open_ds.open_dataset(fpath)
    except:
        print(f'{fname} has failed.')
        failed_files.append(fname)
        
    if ds:
        ds = refactor_longronmip_netcdf(ds)
        ds.to_netcdf(os.path.join(OUTPUT_DIR, fname))
            
        t2 = perf_counter()
        print(f'| complete ({t2-t1})')

- pr_mon_GFDLESM2M_control_1340_g025.nc| freq='M' | resample=True, old_length = 16080,  length = 1340
| complete (128.60581745859236)
- pr_mon_CESM104_abrupt4x_5900_g025.ncpr_mon_CESM104_abrupt4x_5900_g025.nc has failed.
| freq='M' | resample=True, old_length = 1340,  length = 112
| complete (6.739913293160498)
- pr_mon_CCSM3_control_1530_g025.nc| freq='M' | resample=True, old_length = 18360,  length = 1530
| complete (153.91942045465112)
- pr_mon_CCSM3_abrupt8x_1450_g025.nc| freq='M' | resample=True, old_length = 17400,  length = 1450
| complete (138.56768074817955)
- pr_ann_HadGEM2ES_abrupt4x_1299_g025.nc| freq='Y' | length = 1299
| complete (2.379066358320415)
- pr_mon_MPIESM12_abrupt4x_999_g025.nc| freq='M' | resample=True, old_length = 12000,  length = 1000
| complete (87.88444184698164)
- pr_mon_MPIESM12_abrupt2x_999_g025.nc| freq='M' | resample=True, old_length = 12000,  length = 1000
| complete (87.47770055849105)
- pr_mon_IPSLCM5A_abrupt4x_1000_g025.nc| freq='M' | resample=Tru

In [None]:
utils.pprint_list(failed_files)

In [166]:
file_check = os.listdir(OUTPUT_DIR)
utils.pprint_list(file_check)

lenght = 38
 0. pr_mon_ECEARTH_rcp85_1271_g025.nc
 1. pr_mon_MPIESM12_control_1237_g025.nc


In [None]:
# '/g/data/w40/ab2313/PhD/longrunmip/pr/regrid/pr_mon_CESM104_abrupt4x_5900_g025.nc'