In [2]:
# import packages
import sys

import numpy as np
import numpy.ma as ma
import netCDF4 as nc
import matplotlib as mpl
import matplotlib.pyplot as plt

import matplotlib.gridspec as gridspec
import cartopy.crs as ccrs
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter

import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input, Dropout, Softmax 
import tensorflow.keras as keras
import tensorflow.keras.layers as layers

import warnings
warnings.filterwarnings('ignore')
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [3]:
# setting up figure parameters
mpl.rcParams['figure.dpi'] = 150
mpl.rcParams['axes.labelsize'] = 12
mpl.rcParams['axes.titlesize'] = 12
mpl.rcParams['xtick.labelsize'] = 10
mpl.rcParams['ytick.labelsize'] = 10
plt.rcParams['figure.figsize'] = (15.0/2, 20.0/2)

# Data preparation

In [4]:
# read in data
ORAp_t = '../data/sosstsst_control_regrid_monthly_3D_195801-201412.nc'
ORAp_h = '../data/sossheig_control_regrid_monthly_3D_195801-201412.nc'
ORAp_s = '../data/sosaline_control_regrid_monthly_3D_195801-201412.nc'
ORAp_m = '../data/somxl030_control_regrid_monthly_3D_195801-201412.nc'

ORA_SST = nc.Dataset(ORAp_t).variables['sst'][:]
ORA_SSH = nc.Dataset(ORAp_h).variables['ssh'][:]
ORA_SSS = nc.Dataset(ORAp_s).variables['sss'][:]
ORA_MLD = nc.Dataset(ORAp_m).variables['mxl'][:]

monthofYear = np.arange(1, np.shape(ORA_SST)[0]+1)
 
lat = nc.Dataset(ORAp_t).variables['lat'][:] #all nc files are regridded to same latlon values
lon = nc.Dataset(ORAp_t).variables['lon'][:]

In [15]:
# calculate climatology for every grid
OSST_c = np.nanmean(np.array(ORA_SST).reshape((int(ORA_SST.shape[0]/12), 12, len(lat), len(lon))), axis=0)
OSSH_c = np.nanmean(np.array(ORA_SSH).reshape((int(ORA_SSH.shape[0]/12), 12, len(lat), len(lon))), axis=0)
OSSS_c = np.nanmean(np.array(ORA_SSS).reshape((int(ORA_SSS.shape[0]/12), 12, len(lat), len(lon))), axis=0)
OMLD_c = np.nanmean(np.array(ORA_MLD).reshape((int(ORA_MLD.shape[0]/12), 12, len(lat), len(lon))), axis=0)
OSST_cf = np.tile(OSST_c, (int(ORA_SST.shape[0]/12),1,1))
OSSH_cf = np.tile(OSSH_c, (int(ORA_SSH.shape[0]/12),1,1))
OSSS_cf = np.tile(OSSS_c, (int(ORA_SSS.shape[0]/12),1,1))
OMLD_cf = np.tile(OMLD_c, (int(ORA_MLD.shape[0]/12),1,1))

# remove climatology (just for predictors)
OSST_a = ORA_SST - OSST_cf
OSSH_a = ORA_SSH - OSSH_cf
OSSS_a = ORA_SSS - OSSS_cf

In [21]:
# predictors and predictand
X = np.empty((OSST_a.shape[0], OSST_a.shape[1], OSST_a.shape[2], 6))
X[:,:,:,:] = np.nan

X[:,:,:,0] = OSST_cf
X[:,:,:,1] = OSSH_cf
X[:,:,:,2] = OSSS_cf
X[:,:,:,3] = OSST_a
X[:,:,:,4] = OSSH_a
X[:,:,:,5] = OSSS_a
# X[:,:,:,6] = monthofYear   # leave monthofyear out before dimension reduction

y = np.copy(ORA_MLD)

label_names = ['MLD']
feature_list = ['month of year', 'SST - climatology', 'SSH - climatology', 'SSS - climatology', 'SST - anomaly', 'SSH - anomaly', 'SSS - anomaly']

# Unsupervised learning: PCA

In [24]:
# standardize X and Y
standardize_input = lambda dat, x, s: (dat - x) / s

# Calculate mean and standard deviation of the training data
Xmean = X.mean(axis=0)
Xstd = X.std(axis=0)
ymean = y.mean(axis=0)
ystd = y.std(axis=0)

X_new = standardize_input(X, Xmean, Xstd)
y_new = standardize_input(y, ymean, ystd)