# Read in data

In [None]:
import numpy.linalg as la
import xarray as xr
import numpy as np

def normalize_data(x):
    nt, ny, nx = x.shape
    x = x.reshape(nt//12, 12, ny, nx)
    # seasonal mean
    x_mean = x.mean(axis=0, keepdims=True)
    # remove seasonal cycle
    x = x - x_mean
    # reshape time dimension
    x = x.reshape(nt, ny, nx)
    # normalize by standard deviation
    x = x / x.std()
    return x
# read in the data
dir = "./NCEP_monthly_mean_data/"
taux_ds = xr.open_dataset(dir + "uflx.sfc.mon.mean.tropics.nc", use_cftime=True)
tauy_ds = xr.open_dataset(dir + "vflx.sfc.mon.mean.tropics.nc", use_cftime=True)
sst_ds = xr.open_dataset(dir + "skt.sfc.mon.mean.tropics.nc", use_cftime=True)
grid_ds = xr.open_dataset(dir + "lsmask.tropics.nc", use_cftime=True)
# get the data
taux_da, tauy_da, sst_da, grid_da = taux_ds.uflx, tauy_ds.vflx, sst_ds.skt, grid_ds.lsmask
# reverse the latitude dimension to make it increasing
taux_da = taux_da.reindex(lat=taux_da.lat[::-1])
tauy_da = tauy_da.reindex(lat=tauy_da.lat[::-1])
sst_da = sst_da.reindex(lat=sst_da.lat[::-1])
grid_da = grid_da.reindex(lat=grid_da.lat[::-1])
# select data from 1948 to 1999 for training, 
# and in a target region (30N-30S, 100E-60W)
lat_min, lat_max = -30, 30
lon_min, lon_max = 100, 300
taux = taux_da.sel(time=slice("1948", "1999"), lon=slice(lon_min, lon_max))
tauy = tauy_da.sel(time=slice("1948", "1999"), lon=slice(lon_min, lon_max))
sst = sst_da.sel(time=slice("1948", "1999"), lon=slice(lon_min, lon_max))
grid = grid_da.sel(lon=slice(lon_min, lon_max))
lat, lon = taux.lat.values, taux.lon.values
time = sst.time.values
# get the data as numpy arrays
taux, tauy, sst, grid = taux.values, tauy.values, sst.values, grid.values
grid = grid[0,:,:]
# get dimensions
nt, ny, nx = taux.shape
# standard deviation
taux_std = taux.std()
tauy_std = tauy.std()
sst_std = sst.std()
# normalize the data
taux_anom = normalize_data(taux)
tauy_anom = normalize_data(tauy)
sst_anom = normalize_data(sst)
# mask out the land
taux_anom[:, grid == -1] = 0
tauy_anom[:, grid == -1] = 0
sst_anom[:, grid == -1] = 0
# concatenate taux and tauy
tau = np.concatenate((taux_anom, tauy_anom), axis=1)
# reshape vars to space x time
tau = tau.reshape(nt, 2*ny*nx).T 
sst = sst_anom.reshape(nt, ny*nx).T
# form matrix A
A = 1/nt * tau @ sst.T
# apply SVD on A
U, S, V = la.svd(A, full_matrices=True)