# Time Analysis

* Time analysis of lungrunmip data
* Number of models for each experiment.
This notebook contains an exploration of the timesteps in the longrunmip data.
The notebooks also contains a time override. The models in my study all need to have the same timestamps - year since initialisation. The easiest way to go about this is overriding all time stamps and adding back in new time depending if annual or monthly (ann or monn in file name). 

In [1]:
import os, sys, warnings, cftime
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot 
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
import constants
sys.path.append(constants.MODULE_DIR)
from importlib import reload
from time import perf_counter
from typing import List
import re
import xarray_class_accessors as xca
import utils
import open_ds

from pprint import pprint

# Files

In [35]:
def remove_other_variables(ds, variable):
    try:
        ds = ds[[variable]]
    # The dataset does not contain the variable for some reeason
    except KeyError as e:
        print('| Variable not found in dataset checing for other possible names')
        data_vars = np.array(ds.data_vars)
        
        # Only one variable there, so this must be it.
        if len(data_vars) == 1:
            ds = ds.rename({data_vars[0]: variable})
            # Can now try this again
            ds = ds[[variable]]
        else:
            possible_extra_vars = ['time_bnds', 'height', 'TAREA']
            possible_name = data_vars[~np.isin(data_vars, possible_extra_vars)]
            
            if len(possible_name) != 1:
                raise ValueError(f'Cant automate name change {possible_name=}')
                
            ds = ds.rename({possible_name[0]: variable})
            
            ds = ds[[variable]]
        
    return ds

In [36]:
def refactor_longronmip_netcdf(ds: xr.Dataset, months:list = None) -> xr.Dataset:
    print('\n| Recatoring dims |', end='')
    ds = open_ds.refactor_dims(ds)

    freq = 'M' if re.search('\w+mon\w+', fname) else 'Y'
    print(f' | {freq=}', end='')
    ds['time'] = open_ds.make_new_time(ds, freq=freq, debug=False)
    
    if months and freq == 'M':
        print(f' | subsetting for months {months}')
        print(f' length = {len(ds.time.values)}')
        ds = ds.where(ds.time.dt.month.isin(months), drop=True)
        print(f' length = {len(ds.time.values)}')

    if freq == 'M':
        print(f' | resample=True, old_length = {len(ds.time.values)}, ', end='')
        ds = ds.resample(time='Y').mean()
    print(f' length = {len(ds.time.values)}')
        
    return ds   

In [37]:
os.listdir(constants.LONGRUNMIP_DIR)

['tas',
 'surf',
 'netTOA',
 'landesea_masks',
 'tos',
 'pr',
 '.ipynb_checkpoints',
 'psl',
 'sic']

In [38]:
variable = 'sic' # Completed: pr, tas, sic, surf, tos, psl, netTOA
ROOT_DIR = os.path.join(constants.LONGRUNMIP_DIR, variable, 'regrid')

In [39]:
files = os.listdir(ROOT_DIR)
utils.pprint_list(files)

lenght = 47
 0. sic_mon_GISSE2R_control_5225_g025.nc
 1. sic_mon_CNRMCM61_abrupt2x_750_g025.nc


In [63]:
month_group = 'aso'

In [64]:
MONTH_GROUP_MAPPING = {'fma': [2,3,4], 'aso': [8, 9, 10], None: None}
months = MONTH_GROUP_MAPPING[month_group]
months

[8, 9, 10]

In [65]:
OUTPUT_DIR = os.path.join(constants.LONGRUNMIP_DIR, variable, 'regrid_retimestamped')
utils.mkdir_no_error(OUTPUT_DIR)
if month_group:
    OUTPUT_DIR = os.path.join(OUTPUT_DIR, month_group)
    utils.mkdir_no_error(OUTPUT_DIR)
OUTPUT_DIR

'/g/data/w40/ab2313/PhD/longrunmip/sic/regrid_retimestamped/aso'

In [66]:
utils.pprint_list(os.listdir(OUTPUT_DIR), num_start_items=0)

lenght = 0


In [67]:
# Only need 4x and control
files = [f for f in files if 'abrupt4x' in f or 'control' in f]
utils.pprint_list(files, num_start_items=len(files))

lenght = 26
 0. sic_mon_GISSE2R_control_5225_g025.nc
 1. sic_mon_CCSM3_abrupt4x_2120_g025.nc
 2. sic_mon_MIROC32_abrupt4x_150_g025.nc
 3. sic_mon_GFDLESM2M_control_1340_g025.nc
 4. sic_mon_MPIESM11_abrupt4x_4520_g025.nc
 5. sic_mon_HadCM3L_abrupt4x_1000_g025.nc
 6. sic_mon_MPIESM12_control_1500_g025.nc
 7. sic_ann_HadGEM2_abrupt4x_1295_g025.nc
 8. sic_mon_ECEARTH_control_508_g025.nc
 9. sic_ann_HadGEM2_control_239_g025.nc
 10. sic_mon_GISSE2R_abrupt4x_5001_g025.nc
 11. sic_mon_HadCM3L_control_1000_g025.nc
 12. sic_mon_IPSLCM5A_abrupt4x_1000_g025.nc
 13. sic_mon_CNRMCM61_control_2000_g025.nc
 14. sic_mon_CCSM3_control_1530_g025.nc
 15. sic_ann_FAMOUS_abrupt4x_3000_g025.nc
 16. sic_mon_MPIESM12_abrupt4x_1000_g025.nc
 17. sic_mon_ECHAM5MPIOM_control_100_g025.nc
 18. sic_ann_FAMOUS_control_3000_g025.nc
 19. sic_mon_CESM104_abrupt4x_5900_g025.nc
 20. sic_mon_MPIESM11_control_2000_g025.nc
 21. sic_mon_CESM104_control_1000_g025.nc
 22. sic_mon_CNRMCM61_abrupt4x_1850_g025.nc
 23. sic_mon_MIROC

In [68]:
# Special files that may have failed.
# These are checked in http://localhost:8888/notebooks/Documents/PhD/longrumip_00_07_data_opening_check.ipynb
# 'tas_mon_CNRMCM61_control_2000_g025.nc', 'tas_mon_IPSLCM5A_abrupt4x_1000_g025.nc' 
# 'surf_ann_CESM104_abrupt4x_5900_g025.nc', 'surf_ann_CESM104_abrupt4x_5900_g025.nc'
# files = ['sic_mon_CESM104_abrupt4x_5900_g02.nc']

In [69]:
error_log = []
# sic_mon_CESM104_abrupt4x_5900_g025 error but no log

In [70]:
for i, fname in enumerate(files):

    t1 = perf_counter()
    print(f'{i+1}: - {fname}', end='')
    fpath = os.path.join(ROOT_DIR, fname)

    try:
        ds = xr.open_dataset(fpath, use_cftime=True)
        ds = remove_other_variables(ds, variable) 
        ds = refactor_longronmip_netcdf(ds, months=months)
    except ValueError as e:
        ds = xr.open_dataset(fpath, decode_times=False)
        ds = remove_other_variables(ds, variable) 
        ds = refactor_longronmip_netcdf(ds, months=months)
    except (ValueError, OSError, KeyError, FileNotFoundError) as e:
        print(f'\nERROR: {e}\n')
        error_log.append({fname: e})       
    try:
        ds.to_netcdf(os.path.join(OUTPUT_DIR, fname))
        t2 = perf_counter()
        print(f'| complete ({t2-t1})')
    except PermissionError as e:
        print(f'\nERROR: {e}\n')
        error_log.append({fname: e})        

1: - sic_mon_GISSE2R_control_5225_g025.nc
| Recatoring dims | | freq='M' | subsetting for months [8, 9, 10]
 length = 62700
 length = 15675
 | resample=True, old_length = 15675,  length = 5225
| complete (7596.684042098001)
2: - sic_mon_CCSM3_abrupt4x_2120_g025.nc
| Recatoring dims | | freq='M' | subsetting for months [8, 9, 10]
 length = 25440
 length = 6360
 | resample=True, old_length = 6360,  length = 2120
| complete (187.68991639837623)
3: - sic_mon_MIROC32_abrupt4x_150_g025.nc
| Recatoring dims | | freq='M' | subsetting for months [8, 9, 10]
 length = 1800
 length = 450
 | resample=True, old_length = 450,  length = 150
| complete (9.663656548596919)
4: - sic_mon_GFDLESM2M_control_1340_g025.nc
| Recatoring dims |Chaning TIME to time
 | freq='M' | subsetting for months [8, 9, 10]
 length = 16080
 length = 4020
 | resample=True, old_length = 4020,  length = 1340
| complete (125.61894918698817)
5: - sic_mon_MPIESM11_abrupt4x_4520_g025.nc
| Recatoring dims | | freq='M' | subsetting fo

In [71]:
error_log

[{'sic_mon_CESM104_abrupt4x_5900_g025.nc': OSError(-101, 'NetCDF: HDF error')}]

In [None]:
file_check = os.listdir(OUTPUT_DIR)
utils.pprint_list(file_check)

Failed files <br> 
<b> Pr </b>
* 'pr_mon_CESM104_abrupt4x_5900_g025.nc'

<b> tas </b>
* 'tas_mon_GISSE2R_control_5225_g025.nc'

<b> surf </b>

lenght = 1
 0. surf_ann_CESM104_control_1000_g025.nc
 
<b> tos </b>
<s> 
lenght = 1
0. tos_ann_CESM104_control_1000_g025.nc </s>

<b> netNOTA </b>

lenght = 13
 0. netTOA_mon_MPIESM12_abrupt32x_103_g025.nc
 1. netTOA_mon_ECEARTH_rcp85_1271_g025.nc
 2. netTOA_mon_MIROC32_1pct2x_2003_g025.nc
 3. netTOA_mon_GFDLESM2M_1pct2x_4500_g025.nc
 4. netTOA_mon_GISSE2R_1pct4x_5001_g025.nc
 5. netTOA_mon_ECEARTH_historical_156_g025.nc
 6. netTOA_mon_CESM104_abrupt8x_4000_g025.nc
 7. netTOA_mon_MPIESM12_abrupt2x_1000_g025.nc
 8. netTOA_mon_CNRMCM61_abrupt2x_750_g025.nc
 9. netTOA_mon_CCSM3_abrupt2x_3000_g025.nc
 10. netTOA_mon_MPIESM12_abrupt16x_999_g025.nc
 11. netTOA_mon_MPIESM12_abrupt8x_1000_g025.nc
 12. netTOA_mon_MIROC32_1pct4x_2002_g025.nc


<b> sic </b>
None of these are 4xCO2. There is only one control file.
lenght = 22
 0. sic_mon_GISSE2R_control_5225_g025.nc
 1. sic_mon_CNRMCM61_abrupt2x_750_g025.nc
 2. sic_mon_MIROC32_1pct4x_2002_g025.nc
 3. sic_mon_HadCM3L_abrupt2x_1000_g025.nc
 4. sic_mon_GISSE2R_1pct4x_5001_g025.nc
 5. sic_mon_HadCM3L_abrupt6x_1000_g025.nc
 6. sic_mon_MPIESM12_abrupt8x_1000_g025.nc
 7. sic_ann_FAMOUS_abrupt2x_3000_g025.nc
 8. sic_mon_MPIESM12_abrupt16x_1000_g025.nc
 9. sic_mon_MPIESM12_abrupt2x_1000_g025.nc
 10. sic_mon_CESM104_abrupt8x_5100_g025.nc
 11. sic_mon_ECEARTH_rcp85_1271_g025.nc
 12. sic_mon_GFDLCM3_1pct2x_5000_g025.nc
 13. sic_mon_HadCM3L_abrupt8x_1000_g025.nc
 14. sic_mon_ECHAM5MPIOM_1pct4x_6080_g025.nc
 15. sic_mon_CESM104_abrupt4x_5900_g025.nc
 16. sic_mon_GFDLESM2M_1pct2x_4500_g025.nc
 17. sic_mon_ECEARTH_historical_156_g025.nc
 18. sic_mon_CCSM3_abrupt2x_3000_g025.nc
 19. sic_mon_CESM104_abrupt2x_2500_g025.nc
 20. sic_mon_MIROC32_1pct2x_2003_g025.nc
 21. sic_mon_MPIESM12_abrupt32x_103_g025.nc
