# Read in data

In [None]:
import numpy.linalg as la
import xarray as xr
import numpy as np

def normalize_data(x):
    nt, ny, nx = x.shape
    x = x.reshape(nt//12, 12, ny, nx)
    # seasonal mean
    x_mean = x.mean(axis=0, keepdims=True)
    # remove seasonal cycle
    x = x - x_mean
    # reshape time dimension
    x = x.reshape(nt, ny, nx)
    # normalize by standard deviation
    x = x / x.std()
    return x
# read in the data
dir = "./NCEP_monthly_mean_data/"
sst_ds = xr.open_dataset(dir + "skt.sfc.mon.mean.tropics.nc", use_cftime=True)
grid_ds = xr.open_dataset(dir + "lsmask.tropics.nc", use_cftime=True)
# get the data
sst_da, grid_da = sst_ds.skt, grid_ds.lsmask
# reverse the latitude dimension to make it increasing
sst_da = sst_da.reindex(lat=sst_da.lat[::-1])
grid_da = grid_da.reindex(lat=grid_da.lat[::-1])
# select data from 1948 to 1999 for training, 
# and in a target region (30N-30S, 100E-60W)
lat_min, lat_max = -30, 30
lon_min, lon_max = 100, 300
sst = sst_da.sel(time=slice("1948", "1999"), lon=slice(lon_min, lon_max))
grid = grid_da.sel(lon=slice(lon_min, lon_max))
lat, lon = sst.lat.values, sst.lon.values
time = sst.time.values
# get the data as numpy arrays
sst, grid = sst.values, grid.values
grid = grid[0,:,:]
# get dimensions
nt, ny, nx = sst.shape
# standard deviation
sst_std = sst.std()
# normalize the data
sst_anom = normalize_data(sst)
# mask out the land
sst_anom[:, grid == -1] = 0
# reshape vars to space x time
sst = sst_anom.reshape(nt, ny*nx).T
