In [1]:
import os
os.chdir('/vortexfs1/home/anthony.meza/Atmospheric Rivers and Waves')

In [13]:
from help_funcs import * 
import cartopy.crs as ccrs
import cartopy.feature as cfeature

import gsw

import pandas as pd
import xarray as xr
import numpy as np
import netCDF4 as nc

import cmocean.cm as cm
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import seaborn as sns

from multiprocessing import Pool
import multiprocessing
from os.path import exists
from pathlib import Path
from natsort import natsorted
import importlib
import gc


In [3]:
sns.set_context("notebook")

In [4]:
%%time
GLORYS_dir = r"/vortexfs1/home/anthony.meza/GLORYS_data" 
results = [str(result) for result in list(Path(GLORYS_dir).rglob("*.[nN][cC]"))] #get all files
results = natsorted(results) #sort all files 

years = natsorted(list(set([result[41:45] for result in results])))
months = natsorted(['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'])

CPU times: user 169 ms, sys: 22.7 ms, total: 192 ms
Wall time: 266 ms


In [14]:
def remove_seasonal(ds):
    climatology = ds.groupby("time.month").mean("time")
    anomalies = ds.groupby("time.month") - climatology
    return anomalies.drop("month")

def timecat(fnames):
    datasets = xr.concat(fnames, dim = "time", data_vars = "minimal", coords = "minimal")
    datasets = datasets.sortby("time")
    return datasets

def read_subset(years, read_atm):
    n_cores = multiprocessing.cpu_count() - 5
    file_path = "ERA5_data"
    ERA5_results = natsorted([str(result) for result in list(Path(file_path).rglob("*.[nN][cC]"))])[:-1]
    files = natsorted([str(f) for f in ERA5_results if f[-7:-3] in years])
        
    dsets = []
    if __name__ == '__main__':
        with Pool(n_cores) as p:
            datasets = p.map(read_atm, files)
    datasets = [d for d in datasets]
    dsets = dsets + datasets #about 50 gigs of data!! 
    
    return dsets

In [15]:
def plot_spatial_var(ax, data, LONS, LATS, bounds, levels):
    cf = ax.contourf(LONS, LATS, data, 
                      vmin = -bounds, vmax = bounds, levels = levels, 
                      cmap=cm.curl, extend = "both")
    ax.coastlines()
    gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, alpha=0.0)
    gl.top_labels = False
    gl.right_labels = False
    return cf 


### Get NINO 3.4 Index

In [16]:
df = pd.read_csv("nino34.long.csv", index_col=0)
dates_df = df.astype("datetime64[D]")

for year in range(0, len(df.index)):
    for month in range(1, len(df.columns)+1):
        dates_df.iloc[year, month - 1] = np.datetime64(pd.Timestamp(df.index[year], month, 15), "D")
NINO34_ds = xr.DataArray(
            data=df.values.flatten(),
            dims=["time"],
            coords=dict(time=dates_df.values.flatten()))
NINO34 = NINO34_ds.sel(time = slice("1993", None))

### Make some anomaly movies 

In [45]:
def read_ds_atm(fname): #very little memory footprint, 1 day = 500kib
    with xr.open_dataset(fname) as ds:
        ds_new = ds.sel(longitude = slice(-125, -115), latitude = slice(50, 30))
        return ds_new

In [46]:
%%time 
all_years = [["1998", "1999"], ["2002", "2003"],
             ["2006", "2007"], ["2009", "2010"],
             ["2015", "2016"]]

all_slices = [slice("1998", "1998.35"), slice("2003", "2003.35"),
              slice("2007", "2007.35"), slice("2010", "2010.35"),
              slice("2016", "2016.35")]

all_events = ["97NinoTP", "02NinoTP", 
              "06NinoTP", "09NinoTP", 
              "15NinoTP"]
nevents = len(all_events)

CPU times: user 9 µs, sys: 0 ns, total: 9 µs
Wall time: 11.9 µs


In [47]:
def average_years(yrs, time_slice, event_name):
    NINO = read_subset(years, read_ds_atm)
    gc.collect()
    NINO_ds = timecat(NINO)

    tp_NINO = NINO_ds.tp
    tp_NINO = 24 * tp_NINO.resample(time="1D").mean() #resample from 1 hr to 1 day
    tp_NINO = tp_NINO.rolling(time=21, center=True).mean() #21-day rolling average

    tp_NINO = 1e3 * tp_NINO
    tp_NINO = remove_seasonal(tp_NINO).sel(time = time_slice)
    tp_NINO = tp_NINO.mean(dim = "time")
    return tp_NINO
    

In [48]:
tp_vars = []
for i in range(0, nevents):
    yrs = all_years[i]; time_slice=all_slices[i]; event_name = all_events[i]
    print(event_name)
    ds = average_years(yrs, time_slice, event_name)
    tp_vars.append(ds)


97NinoTP
02NinoTP
06NinoTP
09NinoTP
15NinoTP


In [None]:
fig, ax1 = plt.subplots(ncols = 5, figsize = (30, 10),  subplot_kw = {"projection":ccrs.PlateCarree()})
bounds = np.max([np.nanmax(np.abs(data)) for data in tp_vars])
levels = np.linspace(-1.5, 1.5, 25)

for i in range(0, nevents):
    ds = tp_vars[i]
    data = ds.values
    LONS, LATS = np.meshgrid(ds.longitude, ds.latitude)

    cf = plot_spatial_var(ax1[i], data, LONS, LATS, bounds, levels)
    gc.collect()

fig.colorbar(cf, ax = ax1, fraction = 0.03, orientation = "horizontal")
fig.suptitle("Total Precipitation Anomalies \n (Seasonal Cycle Removed)")


Text(0.5, 0.98, 'Total Precipitation Anomalies \n (Seasonal Cycle Removed)')