In [1]:
import copy
import glob
import pickle
import warnings
from datetime import datetime, timedelta
from itertools import product
import joblib

import cartopy
import cartopy.crs as ccrs
import cartopy.feature
import cartopy.feature as cfeature
import cartopy.feature as cf
import cartopy.io.shapereader as shpreader
import matplotlib as mpl
import matplotlib.path as mpath
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import shapely.geometry as sgeom
import xarray as xr
from scipy import stats
from scipy.spatial.distance import cdist
from shapely import geometry
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import sys
# sys.path.append("/glade/u/home/jhayron/WeatherRegimes/Scripts/")
# import cluster_analysis, narm_analysis, som_analysis

In [2]:
## The goal of this code is to get daily anomalies of the input variables, including some combination of them

# SST - alone
# ST - alone
# SM - alone
# SM + SST: limited region
# ST + SST: full region
# ST + SST: limited region
# OLR (olr is really top thermal longwave radiation, less negative = positive anomalies = more convection)
# U10


# Initially do the anom process for each variable individually

In [5]:
names_variables = ['olr', 'sm', 'sst', 'st', 'u10']
units = ['J/m2','m3/m3','K','K','m/s']

In [6]:
for iname in range(len(names_variables)):
    print(names_variables[iname])
    dataset_era = xr.open_dataset(f'/glade/work/jhayron/Weather_Regimes/ERA5/Daily_1degree/netcdf_final/{names_variables[iname]}.nc')
    #daily climatology
    dataset_era = dataset_era.isel(time = pd.to_datetime(dataset_era.time).year<=2010)
    dataset_era = dataset_era.isel(time = ~((pd.to_datetime(dataset_era.time).day == 29)&((pd.to_datetime(dataset_era.time).month == 2))))
    doy = pd.to_datetime(dataset_era.time).day_of_year
    climatology = []
    for i in range(1,366):
        climatology.append(dataset_era.isel(time = doy == i)[names_variables[iname]].mean('time'))
    climatology = xr.Dataset({
                 f'{names_variables[iname]}_climatology': (['day_of_year','lat','lon'], np.array(climatology)),
                },
                 coords =
                {'day_of_year': (['day_of_year'], np.arange(1,366)),
                 'lat' : (['lat'], dataset_era.lat.values),
                 'lon' : (['lon'], dataset_era.lon.values)
                },
                attrs = 
                {'File Author' : 'Jhayron S. Pérez-Carrasquilla','units':units[iname]})
    climatology.to_netcdf(f'/glade/work/jhayron/Weather_Regimes/ERA5/Daily_1degree/climatologies/{names_variables[iname]}_climatology_1959_2010.nc')
    smooth_climatology = xr.concat([climatology[f'{names_variables[iname]}_climatology'], climatology[f'{names_variables[iname]}_climatology'],climatology[f'{names_variables[iname]}_climatology']], dim='day_of_year')
    smooth_climatology = smooth_climatology.rolling(day_of_year=31, min_periods=1, center=True).mean(skipna=True).rolling(
                                      day_of_year=31, min_periods=1, center=True).mean(skipna=True)
    smooth_climatology = smooth_climatology.isel(day_of_year=slice(365,365 * 2))
    smooth_climatology = smooth_climatology.transpose('day_of_year','lat','lon')
    
    ### compute anomalies
    
    dataset_era = xr.open_dataset(f'/glade/work/jhayron/Weather_Regimes/ERA5/Daily_1degree/netcdf_final/{names_variables[iname]}.nc')
    anomalies = copy.deepcopy(dataset_era)
    
    for day in range(1,367):
        # print(day) 
        if day == 366:
            anomalies[names_variables[iname]][{'time':(pd.to_datetime(dataset_era.time).day_of_year == day)}] = \
                (dataset_era[names_variables[iname]].isel(time = (pd.to_datetime(dataset_era.time).day_of_year == day)) \
                - smooth_climatology.sel(day_of_year = day-1))
        else:
            anomalies[names_variables[iname]][{'time':(pd.to_datetime(dataset_era.time).day_of_year == day)}] = \
                (dataset_era[names_variables[iname]].isel(time = (pd.to_datetime(dataset_era.time).day_of_year == day)) \
                - smooth_climatology.sel(day_of_year = day))
    anomalies = anomalies.rename({names_variables[iname]:f'{names_variables[iname]}_anomalies'})
    anomalies.to_netcdf(f'/glade/work/jhayron/Weather_Regimes/ERA5/Daily_1degree/anomalies/{names_variables[iname]}_anomalies_v1959_2010.nc')
    
    

olr
sm
sst
st
u10


# Create combined datasets

## SST (ocean) + ST (land)

In [3]:
path_anomalies = '/glade/work/jhayron/Weather_Regimes/ERA5/Daily_1degree/anomalies/'

In [4]:
sst_anoms = xr.open_dataset(f'{path_anomalies}sst_anomalies_v1959_2010.nc')
st_anoms = xr.open_dataset(f'{path_anomalies}st_anomalies_v1959_2010.nc')

In [5]:
where_land = np.where(np.isfinite(sst_anoms.isel(time=0).sst_anomalies)==False)
where_ocean = np.where(np.isfinite(sst_anoms.isel(time=0).sst_anomalies)==True)

In [6]:
np.save(f'{path_anomalies}where_land',where_land)
np.save(f'{path_anomalies}where_ocean',where_ocean)

In [7]:
array_sst = copy.deepcopy(sst_anoms.sst_anomalies.values)
array_st = copy.deepcopy(st_anoms.st_anomalies.values)

In [8]:
for i in range(len(array_sst)):
    array_sst[i,where_land[0],where_land[1]] = array_st[i,where_land[0],where_land[1]]

In [9]:
sst_st_anoms = copy.deepcopy(sst_anoms)
sst_st_anoms = sst_st_anoms.assign(sst_anomalies = (sst_st_anoms.sst_anomalies.dims,array_sst))

In [13]:
sst_st_anoms.to_netcdf(f'{path_anomalies}sst_st_anomalies_v1959_2010.nc')

## SST (ocean) + SM (land)

In [14]:
path_anomalies = '/glade/work/jhayron/Weather_Regimes/ERA5/Daily_1degree/anomalies/'

In [15]:
sm_anoms = xr.open_dataset(f'{path_anomalies}sm_anomalies_v1959_2010.nc')

In [16]:
array_sst = copy.deepcopy(sst_anoms.sst_anomalies.values)
array_sm = copy.deepcopy(sm_anoms.sm_anomalies.values)

In [19]:
for i in range(len(array_sst)):
    array_sst[i,where_land[0],where_land[1]] = array_sm[i,where_land[0],where_land[1]]

In [21]:
sst_sm_anoms = copy.deepcopy(sm_anoms)
sst_sm_anoms = sst_sm_anoms.assign(sm_anomalies = (sst_sm_anoms.sm_anomalies.dims,array_sst))

In [26]:
sst_sm_anoms.to_netcdf(f'{path_anomalies}sst_sm_anomalies_v1959_2010.nc')