# Saving Loess Detrended Data
Saving all data with loess detrend.

<b> PHD-1 </b>
* Switched read_and_merge_netcdfs from longrunmip_loaded function to open_ds - all opening files are now done within the one py file
* longrunmip loader removed. Better just to load files in one by one
* Moved from init to top level
* Removed all extra code (see nb 23 in init for old code).


In [2]:
import xarray as xr
from importlib import reload
import numpy as np
import pandas as pd
import os
from glob import glob
from time import perf_counter

In [12]:
import sys
sys.path.append('modules')
import constants
import xarray_class_accessors as xca
import open_ds
import signal_to_noise as sn

In [21]:
chunks = {'lat':48/2,'lon':96/2,'time':-1}

In [14]:
# The directory where all of the longrun mip data used in this notebook is stored.
ROOT_DIR = os.path.join(constants.LONGRUNMIP_DIR,'regrid_retimestamped')
ROOT_DIR

'/g/data/w40/ab2313/PhD/longrunmip/regrid_retimestamped'

In [15]:
os.listdir(ROOT_DIR)[:5]

['tas_ann_HadGEM2_control_239_g025.nc',
 'tas_mon_HadCM3L_control_1000_g025.nc',
 'tas_mon_GFDLCM3_control_5200_g025.nc',
 'tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025.nc',
 'tas_mon_GFDLCM3_1pct2x_5000_g025.nc']

# Data

In [16]:
fnames_abrupt4x = list(map(os.path.basename, glob(os.path.join(ROOT_DIR, '*abrupt4x*'))))
fnames_abrupt4x[:3]

['tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025.nc',
 'tas_mon_ECEARTH_abrupt4x_150_g025.nc',
 'tas_ann_HadGEM2_abrupt4x_1299_g025.nc']

In [17]:
fnames_control = list(map(os.path.basename, glob(os.path.join(ROOT_DIR, '*control*'))))
fnames_control[:3]

['tas_ann_HadGEM2_control_239_g025.nc',
 'tas_mon_HadCM3L_control_1000_g025.nc',
 'tas_mon_GFDLCM3_control_5200_g025.nc']

In [18]:
# List of all the models that have an experiment
models  = [f.split('_')[2] for f in fnames_abrupt4x]
models

['ECHAM5MPIOM',
 'ECEARTH',
 'HadGEM2',
 'GISSE2R',
 'MPIESM12',
 'CNRMCM61',
 'CCSM3',
 'MIROC32',
 'MPIESM11',
 'CESM104',
 'FAMOUS',
 'HadCM3L',
 'IPSLCM5A',
 'GFDLCM3',
 'GFDLESM2M']

# Single Model Sample

In [51]:
model = models[0]
print(model)

ECHAM5MPIOM


In [52]:
chunks = {'lat':100,'lon':100}

In [53]:
ROOT_DIR

'/g/data/w40/ab2313/PhD/longrunmip/regrid_retimestamped'

In [59]:
fname_experiment = [f for f in fnames_abrupt4x if model in f][0]
fname_control = [f for f in fnames_control if model in f][0]

print(fname_experiment, fname_control)

tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025.nc tas_mon_ECHAM5MPIOM_control_100_g025.nc


In [60]:
# Data for the model
single_experiment =  xr.open_dataset(os.path.join(ROOT_DIR,fname_experiment),
                             use_cftime = True, chunks = chunks).tas

# if model not in list(control_ds.data_vars):
#     continue
single_control = xr.open_dataset(os.path.join(ROOT_DIR,fname_control),
                             use_cftime = True, chunks = chunks).tas

In [8]:
# Getting the anomalies for the experiment
experiment_anom = single_experiment.clima.anomalies(start = single_experiment.time.dt.year.values[0], 
                                          end = single_experiment.time.dt.year.values[-1],
                            historical = single_experiment)

echam5mpiom
Using historical dataset


In [9]:
# Drop na from time dim: loess will try and do this and get a mismatch otherwise
single_control = single_control.dropna(dim='time')
experiment_anom = experiment_anom.dropna(dim='time')
experiment_anom

Unnamed: 0,Array,Chunk
Bytes,39.59 MiB,324.00 kiB
Shape,"(1001, 72, 144)","(8, 72, 144)"
Count,864 Tasks,126 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 39.59 MiB 324.00 kiB Shape (1001, 72, 144) (8, 72, 144) Count 864 Tasks 126 Chunks Type float32 numpy.ndarray",144  72  1001,

Unnamed: 0,Array,Chunk
Bytes,39.59 MiB,324.00 kiB
Shape,"(1001, 72, 144)","(8, 72, 144)"
Count,864 Tasks,126 Chunks
Type,float32,numpy.ndarray


In [10]:
# Loess filtering
loess_experiment_anom = experiment_anom.sn.loess_grid()

In [11]:
loess_control = single_control.sn.loess_grid()

In [13]:
# Get the file name for the control and experiment by searching for the file name
# containing the model in the list.

control_save_name = [f for f in fnames_control if model.upper() in f.upper()][0]
experiment_save_name = [f for f in fnames_abrupt4x if model.upper() in f.upper()][0]

print(control_save_name, experiment_save_name)

tas_mon_ECHAM5MPIOM_control_100_g025.nc tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025.nc


In [29]:
control_save_name = control_save_name.replace('.nc', '_loess.nc')
experiment_save_name = experiment_save_name.replace('.nc', '_loess.nc')
print(control_save_name, experiment_save_name)

tas_mon_ECHAM5MPIOM_control_100_g025_loess_loess.nc tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025_loess_loess.nc


In [31]:

# Adding some attributes
now = pd.Timestamp.now().strftime(format = '%Y/%m/%d')
history = {'history': f'Modified by Alex Borowiak on {now}','stats': 'loess detrended data'}
loess_experiment_anom.attrs = history
loess_control.attrs = history

In [33]:
# loess_experiment_anom.to_netcdf(os.path.join(ROOT_DIR, 'loess', experiment_save_name))
# loess_control.to_netcdf(os.path.join(ROOT_DIR, 'loess', control_save_name))

print(os.path.join(ROOT_DIR, 'loess', experiment_save_name),\
    os.path.join(ROOT_DIR, 'loess', control_save_name), sep='\n')

/g/data/w40/ab2313/PhD/longrunmip/regrid_retimestamped/loess/tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025_loess_loess.nc
/g/data/w40/ab2313/PhD/longrunmip/regrid_retimestamped/loess/tas_mon_ECHAM5MPIOM_control_100_g025_loess_loess.nc


# Looping Through All

In [19]:
models

['ECHAM5MPIOM',
 'ECEARTH',
 'HadGEM2',
 'GISSE2R',
 'MPIESM12',
 'CNRMCM61',
 'CCSM3',
 'MIROC32',
 'MPIESM11',
 'CESM104',
 'FAMOUS',
 'HadCM3L',
 'IPSLCM5A',
 'GFDLCM3',
 'GFDLESM2M']

In [22]:
%%time
for model in models:
    t1 = perf_counter()
    print(model)

    fname_experiment = [f for f in fnames_abrupt4x if model in f][0]
    fname_control = [f for f in fnames_control if model in f][0]
    
    print('- Files to open:')
    print(fname_experiment, fname_control)
    print(f'- Path to files {ROOT_DIR}')

    # Data for the model
    print('- Opening experiminet')
    single_experiment =  xr.open_dataset(os.path.join(ROOT_DIR,fname_experiment),
                                 use_cftime = True, chunks = chunks).tas

    # if model not in list(control_ds.data_vars):
    #     continue
    print('- Opening control. ', end='')
    single_control = xr.open_dataset(os.path.join(ROOT_DIR,fname_control),
                                 use_cftime = True, chunks = chunks).tas

    # Getting the anomalies for the experiment
    experiment_anom = single_experiment.clima.anomalies(start = single_experiment.time.dt.year.values[0], 
                                              end = single_experiment.time.dt.year.values[-1],
                                historical = single_experiment)

    # Drop na from time dim: loess will try and do this and get a mismatch otherwise
    print('- Removing nans')
    single_control = single_control.dropna(dim='time')
    experiment_anom = experiment_anom.dropna(dim='time')

    # Loess filtering
    print('- Applying loess filter to experiment')
    loess_experiment_anom = experiment_anom.sn.loess_grid()
    print('- Applying loess filter to control')
    loess_control = single_control.sn.loess_grid()

    # Get the file name for the control and experiment by searching for the file name
    # containing the model in the list.
    control_save_name = [f for f in fnames_control if model.upper() in f.upper()][0]
    experiment_save_name = [f for f in fnames_abrupt4x if model.upper() in f.upper()][0]

    print(control_save_name, experiment_save_name)

    control_save_name = control_save_name.replace('.nc', '_loess.nc')
    experiment_save_name = experiment_save_name.replace('.nc', '_loess.nc')
    print('- New file names:')
    print(control_save_name, experiment_save_name)


    # Adding some attributes
    now = pd.Timestamp.now().strftime(format = '%Y/%m/%d')
    history = {'history': f'Modified by Alex Borowiak on {now}','stats': 'loess detrended data'}
    print(f'- Adding metedata to files {history}')
    loess_experiment_anom.attrs = history
    loess_control.attrs = history
    
    
    # Saving files
    print('- Saving files as:')
    print(os.path.join(ROOT_DIR, 'loess', experiment_save_name),\
        os.path.join(ROOT_DIR, 'loess', control_save_name), sep='\n')
    
    
    
    loess_experiment_anom.to_netcdf(os.path.join(ROOT_DIR, 'loess', experiment_save_name))
    loess_control.to_netcdf(os.path.join(ROOT_DIR, 'loess', control_save_name))
    
    t2 = perf_counter()
    time_taken = (t2 - t1)/60
    print(f'COMPLETE - {time_taken} minutes \n\n')

ECHAM5MPIOM
- Files to open:
tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025.nc tas_mon_ECHAM5MPIOM_control_100_g025.nc
- Path to files /g/data/w40/ab2313/PhD/longrunmip/regrid_retimestamped
- Opening experiminet
- Opening control. Using historical dataset
- Removing nans
- Applying loess filter to experiment
- Applying loess filter to control
tas_mon_ECHAM5MPIOM_control_100_g025.nc tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025.nc
- New file names:
tas_mon_ECHAM5MPIOM_control_100_g025_loess.nc tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025_loess.nc
- Adding metedata to files {'history': 'Modified by Alex Borowiak on 2022/05/11', 'stats': 'loess detrended data'}
- Saving files as:
/g/data/w40/ab2313/PhD/longrunmip/regrid_retimestamped/loess/tas_mon_ECHAM5MPIOM_abrupt4x_1001_g025_loess.nc
/g/data/w40/ab2313/PhD/longrunmip/regrid_retimestamped/loess/tas_mon_ECHAM5MPIOM_control_100_g025_loess.nc
COMPLETE - 33.00794105860017 minutes 


ECEARTH
- Files to open:
tas_mon_ECEARTH_abrupt4x_150_g025.nc tas_mon_ECEARTH_c

- Removing nans
- Applying loess filter to experiment
- Applying loess filter to control
tas_mon_CESM104_control_1000_g025.nc tas_mon_CESM104_abrupt4x_5900_g025.nc
- New file names:
tas_mon_CESM104_control_1000_g025_loess.nc tas_mon_CESM104_abrupt4x_5900_g025_loess.nc
- Adding metedata to files {'history': 'Modified by Alex Borowiak on 2022/05/12', 'stats': 'loess detrended data'}
- Saving files as:
/g/data/w40/ab2313/PhD/longrunmip/regrid_retimestamped/loess/tas_mon_CESM104_abrupt4x_5900_g025_loess.nc
/g/data/w40/ab2313/PhD/longrunmip/regrid_retimestamped/loess/tas_mon_CESM104_control_1000_g025_loess.nc
COMPLETE - 190.5454388415868 minutes 


FAMOUS
- Files to open:
tas_ann_FAMOUS_abrupt4x_3000_g025.nc tas_ann_FAMOUS_control_3000_g025.nc
- Path to files /g/data/w40/ab2313/PhD/longrunmip/regrid_retimestamped
- Opening experiminet
- Opening control. Using historical dataset
- Removing nans
- Applying loess filter to experiment
- Applying loess filter to control
tas_ann_FAMOUS_control_30