# Generate NN data - surface and interior variables 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import xarray as xr
import glob
from xgcm import Grid


#path
BASE = '/scratch/ab10313/pleiades/'

#PATH_NN = BASE+'NN_data_smooth/'
#os.mkdir(PATH_NN)

# NN data path: surface
PATH_NN_surface = BASE+'NN_data_surface/'
#os.mkdir(PATH_NN_surface)

# NN data path: inter
PATH_NN_interior = BASE+'NN_data_interior/'
#os.mkdir(PATH_NN_interior)


PATH_LIST_full = glob.glob(BASE+'*_smooth/preprcossed_data/')
# remove two regions without strong w'b'
#PATH_LIST_full.remove(BASE+'04_equator_atlantic_smooth/preprcossed_data/')
#PATH_LIST_full.remove(BASE+'10_north_pacific_smooth/preprcossed_data/')
#PATH_LIST_full.remove(BASE+'15_bengal_smooth/preprcossed_data/') 

## Helper functions

In [5]:
# course grain

def coarse_grain(data,time_factor, grid_factor):
    if len(data.dims) == 3:
        data_cg = data.rolling(time=time_factor, center=True).mean().dropna(dim="time", how="all").coarsen(i=grid_factor,j=grid_factor, boundary="trim").mean()
    elif len(data.dims) == 2:
        data_cg = np.tile(data.coarsen(i=grid_factor,j=grid_factor, boundary="trim").mean(),(846,1,1))
    return data_cg

In [3]:
# normalize

def normalize(data):
    normalized_data =  (data - np.nanmean(data))/np.nanstd(data)
    data_mean = np.tile(np.nanmean(data),(846))
    data_std = np.tile(np.nanstd(data),(846))
    return normalized_data,data_mean, data_std

In [4]:
# splits datasets along the spacial axes and concats them back into single array under time

def load_data_norm(var_name_string,time_factor=14,grid_factor=12):
    PATH_LIST = glob.glob(BASE+'*_smooth/preprcossed_data/'+var_name_string+'.nc') 
    data_0 = xr.open_dataarray(PATH_LIST[0])
    data_smooth_0 = coarse_grain(data_0,time_factor,grid_factor)
    data_smooth_norm_0, data_mean_0, data_std_0 = normalize(data_smooth_0)
    data_app = data_smooth_norm_0
    data_mean_app = data_mean_0
    data_std_app = data_std_0
    for i_file in np.arange(1,len(PATH_LIST)):
        PATH = PATH_LIST[i_file]
        data = xr.open_dataarray(PATH)
        data_smooth = coarse_grain(data,time_factor,grid_factor)
        data_smooth_norm, data_mean, data_std = normalize(data_smooth)
        data_app = np.concatenate((data_app,data_smooth_norm),axis=0)
        data_mean_app = np.concatenate((data_mean_app,data_mean),axis=0)
        data_std_app = np.concatenate((data_std_app,data_std),axis=0)
    return data_app, data_mean_app, data_std_app

In [5]:

def load_data(var_name_string,time_factor=14,grid_factor=12):
    PATH_LIST = glob.glob(BASE+'*_smooth/preprcossed_data/interior/'+var_name_string+'.nc') 
    data_0 = xr.open_dataarray(PATH_LIST[0])
    data_smooth_0 = coarse_grain(data_0,time_window,coarsen_factor)
    data_app = data_smooth_0
    for i_file in np.arange(1,len(PATH_LIST)):
        PATH = PATH_LIST[i_file]
        data = xr.open_dataarray(PATH)
        data_smooth = coarse_grain(data,time_window,coarsen_factor)
        data_app = np.concatenate((data_app,data_smooth),axis=0)
    return data_app


In [6]:
def WB_sg_target(PATH,time_factor=14,grid_factor=12):
    # WB
    B = coarse_grain(xr.open_dataarray(PATH+'B.nc'),time_factor,grid_factor).values
    W = coarse_grain(xr.open_dataarray(PATH+'W.nc'),time_factor,grid_factor).values
    WB = coarse_grain(xr.open_dataarray(PATH+'WB.nc'),time_factor,grid_factor).values
    
    # WB subgrid
    WB_sg = WB - W*B
    WB_sg_norm, WB_sg_mean, WB_sg_std = normalize(WB_sg)
    return WB_sg_norm, WB_sg_mean, WB_sg_std


    
def load_data_WB(time_factor=14,grid_factor=12):
    PATH_LIST_full = glob.glob(BASE+'*_smooth/preprcossed_data/interior/') 
    WB_sg_norm_0, WB_sg_mean_0, WB_sg_std_0 = WB_sg_target(PATH_LIST_full[0])
    data_app = WB_sg_norm_0
    data_mean_app = WB_sg_mean_0
    data_std_app = WB_sg_std_0
    for i_file in np.arange(1,len(PATH_LIST_full)):
        WB_sg_norm, WB_sg_mean, WB_sg_std = WB_sg_target(PATH_LIST_full[i_file])
        data_app = np.concatenate((data_app,WB_sg_norm),axis=0)
        data_mean_app = np.concatenate((data_mean_app,WB_sg_mean),axis=0)
        data_std_app = np.concatenate((data_std_app,WB_sg_std),axis=0)
    return data_app, data_mean_app, data_std_app

In [7]:
def grad_B_mag(PATH,time_factor=14,grid_factor=12):
    # grad_B
    B = coarse_grain(xr.open_dataarray(PATH+'B_surf.nc'),time_factor=14,grid_factor=12)
    B_x = (B.diff(dim='i')/(12*B.dxF)).interp(i=B.i,j=B.j,kwargs={"fill_value": "extrapolate"})
    B_y = (B.diff(dim='j')/(12*B.dyF)).interp(i=B.i,j=B.j,kwargs={"fill_value": "extrapolate"})
    grad_B = np.sqrt(B_y**2 + B_x**2).values

    grad_B_norm, grad_B_mean, grad_B_std = normalize(grad_B)
    return grad_B_norm, grad_B_mean, grad_B_std


    
def load_data_grad_B(time_factor=14,grid_factor=12):
    PATH_LIST_full = glob.glob(BASE+'*_smooth/preprcossed_data/surface/') 
    grad_B_norm_0, grad_B_mean_0, grad_B_std_0 = grad_B_mag(PATH_LIST_full[0])
    data_app = grad_B_norm_0
    data_mean_app = grad_B_mean_0
    data_std_app = grad_B_std_0
    for i_file in np.arange(1,len(PATH_LIST_full)):
        grad_B_norm, grad_B_mean, grad_B_std = grad_B_mag(PATH_LIST_full[i_file])
        data_app = np.concatenate((data_app,grad_B_norm),axis=0)
        data_mean_app = np.concatenate((data_mean_app,grad_B_mean),axis=0)
        data_std_app = np.concatenate((data_std_app,grad_B_std),axis=0)
    return data_app, data_mean_app, data_std_app

In [8]:
def TAU_mag(PATH,time_factor=14,grid_factor=12):
    # grad_B
    # wind stress
    TAUX = coarse_grain(xr.open_dataarray(PATH+'TAUX.nc'),time_factor=14,grid_factor=12)
    TAUY = coarse_grain(xr.open_dataarray(PATH+'TAUY.nc'),time_factor=14,grid_factor=12)
    TAU = np.sqrt(TAUY**2 + TAUX**2).values

    TAU_norm, TAU_mean, TAU_std = normalize(TAU)
    return TAU_norm, TAU_mean, TAU_std


    
def load_data_TAU_mag(time_factor=14,grid_factor=12):
    PATH_LIST_full = glob.glob(BASE+'*_smooth/preprcossed_data/surface/') 
    TAU_norm_0, TAU_mean_0, TAU_std_0 = TAU_mag(PATH_LIST_full[0])
    data_app = TAU_norm_0
    data_mean_app = TAU_mean_0
    data_std_app = TAU_std_0
    for i_file in np.arange(1,len(PATH_LIST_full)):
        TAU_norm, TAU_mean, TAU_std = TAU_mag(PATH_LIST_full[i_file])
        data_app = np.concatenate((data_app,TAU_norm),axis=0)
        data_mean_app = np.concatenate((data_mean_app,TAU_mean),axis=0)
        data_std_app = np.concatenate((data_std_app,TAU_std),axis=0)
    return data_app, data_mean_app, data_std_app

## Load data

In [9]:
# coarse-res horizontal buoyancy gradient 
grad_B_norm, grad_B_mean, grad_B_std = load_data_grad_B(time_factor=14,grid_factor=12)

In [44]:
# WB
WB_sg_norm, WB_sg_mean, WB_sg_std = load_data_WB(time_factor=14,grid_factor=12)

In [45]:
# Coriolis
FCOR, FCOR_mean, FCOR_std = load_data_norm('FCOR',time_factor=14,grid_factor=12) 

In [31]:
# H mixed layer (NEGATIVE)
HML, HML_mean, HML_std = load_data_norm('HML',time_factor=14,grid_factor=12) 

#HML = coarse_grain(xr.open_dataarray(PATH_gulf+'HML.nc'),time_factor=14,grid_factor=12)
#HML = -HML

In [32]:
# Nsquared 
#also making Nsquared very small but not zero to avoid singularity
# look at gsw to compute a different way. 
# IMPORTANT: SETTING TO 1e-5 just to move forward for now
Nsquared, Nsquared_mean, Nsquared_std = load_data_norm('Nsquared')

#Nsquared = coarse_grain(xr.open_dataarray(PATH_gulf+'Nsquared.nc'),time_factor=14,grid_factor=12)
#Nsquared = Nsquared.where(Nsquared>=0).where(Nsquared<=0).fillna(1e-5)

In [10]:
# wind stress
TAU_norm, TAU_mean, TAU_std  = load_data_TAU_mag(time_factor=14,grid_factor=12)

In [11]:
# surface heat flux
Q, Q_mean, Q_std = load_data_norm('Q',time_factor=14,grid_factor=12)
#Q = coarse_grain(xr.open_dataarray(PATH_gulf+'Q.nc'),time_factor=14,grid_factor=12)
#Q = Q.where(Q<0.).fillna(0.)

In [33]:
# H boundary layer (POSITIVE)
HBL, HBL_mean, HBL_std = load_data_norm('HBL',time_factor=14,grid_factor=12) 

#HBL = coarse_grain(xr.open_dataarray(PATH_gulf+'HBL.nc'),time_factor=14,grid_factor=12)

In [7]:
# cell size for weighted average
PATH_LIST = glob.glob(BASE+'*_smooth/preprcossed_data/interior/B.nc') 
B_0 = xr.open_dataarray(PATH_LIST[0])
data_0 = (B_0.dxF**2 + B_0.dyF**2)**0.5
data_smooth_0 = coarse_grain(data_0,time_factor=14,grid_factor=12)
data_app = data_smooth_0
for i_file in np.arange(1,len(PATH_LIST)):
    PATH = PATH_LIST[i_file]
    B_i = xr.open_dataarray(PATH)
    data = (B_i.dxF**2 + B_i.dyF**2)**0.5
    data_smooth = coarse_grain(data,time_factor=14,grid_factor=12)
    data_app = np.concatenate((data_app,data_smooth),axis=0)

drF = data_app

# save normalized NN data

In [46]:
# save normalized NN data

#surface
np.save(PATH_NN_surface+'grad_B.npy',grad_B_norm)
np.save(PATH_NN_surface+'FCOR.npy',FCOR)
np.save(PATH_NN_surface+'TAU.npy',TAU_norm)
np.save(PATH_NN_surface+'Q.npy',Q)



# interior
np.save(PATH_NN_interior+'HML.npy',HML)
np.save(PATH_NN_interior+'HBL.npy',HBL)

np.save(PATH_NN_interior+'WB_sg.npy',WB_sg_norm)
np.save(PATH_NN_interior+'WB_sg_mean.npy',WB_sg_mean)
np.save(PATH_NN_interior+'WB_sg_std.npy',WB_sg_std)

In [14]:
# grad_b, TAU, Q mean and std for going back to physical space

np.save(PATH_NN_surface+'grad_B_mean.npy',grad_B_mean)
np.save(PATH_NN_surface+'grad_B_std.npy',grad_B_std)

np.save(PATH_NN_surface+'TAU_mean.npy',TAU_mean)
np.save(PATH_NN_surface+'TAU_std.npy',TAU_std)

np.save(PATH_NN_surface+'Q_mean.npy',Q_mean)
np.save(PATH_NN_surface+'Q_std.npy',Q_std)

In [8]:
# grid cell size for weighted average
np.save(PATH_NN_surface+'drF.npy',drF)