In [6]:
import numpy as np
import matplotlib.pyplot as plt
import os
import xarray as xr
import glob


#path
BASE = '/scratch/ab10313/pleiades/'

PATH_NN = BASE+'NN_data_smooth/'
#os.mkdir(PATH_NN)

PATH_LIST_full = glob.glob(BASE+'*_smooth/preprcossed_data/')
# remove two regions without strong w'b'
PATH_LIST_full.remove(BASE+'04_equator_atlantic_smooth/preprcossed_data/')
PATH_LIST_full.remove(BASE+'10_north_pacific_smooth/preprcossed_data/')
PATH_LIST_full.remove(BASE+'15_bengal_smooth/preprcossed_data/') 

In [7]:
PATH_LIST_full

['/scratch/ab10313/pleiades/12_agulhas_smooth/preprcossed_data/',
 '/scratch/ab10313/pleiades/11_new_zealand_smooth/preprcossed_data/',
 '/scratch/ab10313/pleiades/01_gulf_smooth/preprcossed_data/',
 '/scratch/ab10313/pleiades/03_south_atlantic_smooth/preprcossed_data/',
 '/scratch/ab10313/pleiades/05_argentina_smooth/preprcossed_data/',
 '/scratch/ab10313/pleiades/13_australia_smooth/preprcossed_data/',
 '/scratch/ab10313/pleiades/14_indian_ocean_smooth/preprcossed_data/']

In [11]:
# function that splits datasets along the spacial axes and concats them back into single array under time

def normalize(data):
    normalized_data =  (data - np.nanmean(data))/np.nanstd(data)
    data_mean = np.tile(np.nanmean(data),(845))
    data_std = np.tile(np.nanstd(data),(845))
    return normalized_data,data_mean, data_std

def smooth(data,time_window, coarsen_factor):
    if len(data.dims) == 3:
        smoothed_data = data.rolling(time=time_window, center=True).mean().dropna(dim="time", how="all").coarsen(i=coarsen_factor,j=coarsen_factor, boundary="trim").mean()
    elif len(data.dims) == 2:
        smoothed_data = np.tile(data.coarsen(i=coarsen_factor,j=coarsen_factor, boundary="trim").mean(),(845,1,1))
    return smoothed_data


def load_data_norm(var_name_string,time_window=15,coarsen_factor=12):
    PATH_LIST = glob.glob(BASE+'*_smooth/preprcossed_data/'+var_name_string+'.nc') 
    data_0 = xr.open_dataarray(PATH_LIST[0])
    data_smooth_0 = smooth(data_0,time_window,coarsen_factor)
    data_smooth_norm_0, data_mean_0, data_std_0 = normalize(data_smooth_0)
    data_app = data_smooth_norm_0
    data_mean_app = data_mean_0
    data_std_app = data_std_0
    for i_file in np.arange(1,len(PATH_LIST)):
        PATH = PATH_LIST[i_file]
        data = xr.open_dataarray(PATH)
        data_smooth = smooth(data,time_window,coarsen_factor)
        data_smooth_norm, data_mean, data_std = normalize(data_smooth)
        data_app = np.concatenate((data_app,data_smooth_norm),axis=0)
        data_mean_app = np.concatenate((data_mean_app,data_mean),axis=0)
        data_std_app = np.concatenate((data_std_app,data_std),axis=0)
    return data_app, data_mean_app, data_std_app


def load_data(var_name_string,time_window=15,coarsen_factor=12):
    PATH_LIST = glob.glob(BASE+'*_smooth/preprcossed_data/'+var_name_string+'.nc') 
    data_0 = xr.open_dataarray(PATH_LIST[0])
    data_smooth_0 = smooth(data_0,time_window,coarsen_factor)
    data_app = data_smooth_0
    for i_file in np.arange(1,len(PATH_LIST)):
        PATH = PATH_LIST[i_file]
        data = xr.open_dataarray(PATH)
        data_smooth = smooth(data,time_window,coarsen_factor)
        data_app = np.concatenate((data_app,data_smooth),axis=0)
    return data_app

def FK_param(PATH,time_window=15,coarsen_factor=12):
    # FK08 parameterization inputs
    tau = 86400
    FCOR = smooth(xr.open_dataarray(PATH+'FCOR.nc'),time_window,coarsen_factor)
    B_x = smooth(xr.open_dataarray(PATH+'B_x.nc'),time_window,coarsen_factor).values
    B_y = smooth(xr.open_dataarray(PATH+'B_y.nc'),time_window,coarsen_factor).values
    grad_B = np.sqrt(B_y**2 + B_x**2)
    
    HML = smooth(xr.open_dataarray(PATH+'HML.nc'),time_window,coarsen_factor).values
    
    WB_FK = ((HML**2) * (grad_B**2))/np.sqrt(FCOR**2 + tau**-2)
    
    return WB_FK
    
    
def load_data_FK(time_window=15,coarsen_factor=12):
    PATH_LIST_full = glob.glob(BASE+'*_smooth/preprcossed_data/') 
    WB_FK_0 = FK_param(PATH_LIST_full[0])
    data_app = WB_FK_0
    for i_file in np.arange(1,len(PATH_LIST_full)):
        WB_FK = FK_param(PATH_LIST_full[i_file])
        data_app = np.concatenate((data_app,WB_FK),axis=0)
    return data_app

    
    
def WB_sg_target(PATH,time_window=15,coarsen_factor=12):
    # WB
    B = smooth(xr.open_dataarray(PATH+'B.nc'),time_window,coarsen_factor).values
    W = smooth(xr.open_dataarray(PATH+'W.nc'),time_window,coarsen_factor).values
    WB = smooth(xr.open_dataarray(PATH+'WB.nc'),time_window,coarsen_factor).values
    
    # WB subgrid
    WB_sg = WB - W*B
    WB_sg_norm, WB_sg_mean, WB_sg_std = normalize(WB_sg)
    return WB_sg_norm, WB_sg_mean, WB_sg_std


    
def load_data_WB(time_window=15,coarsen_factor=12):
    PATH_LIST_full = glob.glob(BASE+'*_smooth/preprcossed_data/') 
    WB_sg_norm_0, WB_sg_mean_0, WB_sg_std_0 = WB_sg_target(PATH_LIST_full[0])
    data_app = WB_sg_norm_0
    data_mean_app = WB_sg_mean_0
    data_std_app = WB_sg_std_0
    for i_file in np.arange(1,len(PATH_LIST_full)):
        WB_sg_norm, WB_sg_mean, WB_sg_std = WB_sg_target(PATH_LIST_full[i_file])
        data_app = np.concatenate((data_app,WB_sg_norm),axis=0)
        data_mean_app = np.concatenate((data_mean_app,WB_sg_mean),axis=0)
        data_std_app = np.concatenate((data_std_app,WB_sg_std),axis=0)
    return data_app, data_mean_app, data_std_app

In [12]:
def FK_param_Lf(PATH,time_window=15,coarsen_factor=12):
    # FK08 parameterization inputs
    tau = 86400
    FCOR = smooth(xr.open_dataarray(PATH+'FCOR.nc'),time_window,coarsen_factor)
    B_x = smooth(xr.open_dataarray(PATH+'B_x.nc'),time_window,coarsen_factor).values
    B_y = smooth(xr.open_dataarray(PATH+'B_y.nc'),time_window,coarsen_factor).values
    grad_B = np.sqrt(B_y**2 + B_x**2)
    
    HML = smooth(xr.open_dataarray(PATH+'HML.nc'),time_window,coarsen_factor).values
    #note the minus sign here because z is negative, change this after new preprocessing
    #also making Nsquared very small but not zero to avoid singularity
    Nsquared = smooth(xr.open_dataarray(PATH+'Nsquared.nc'),time_window,coarsen_factor).values
    Nsquared[Nsquared<=0]=1e-12
    
    # WB FK11 with Lf rescaling factor
    ds = 25e3 # approx 1/4 degree resolution  ~ 25km
    Lf = (np.sqrt(Nsquared)*HML)/np.sqrt(FCOR**2 + tau**-2)

    WB_FK = (ds /Lf)*((HML**2) * (grad_B**2))/np.sqrt(FCOR**2 + tau**-2)
    return WB_FK
    
    
def load_data_FK_Lf(time_window=15,coarsen_factor=12):
    PATH_LIST_full = glob.glob(BASE+'*_smooth/preprcossed_data/') 
    WB_FK_0 = FK_param_Lf(PATH_LIST_full[0])
    data_app = WB_FK_0
    for i_file in np.arange(1,len(PATH_LIST_full)):
        WB_FK = FK_param_Lf(PATH_LIST_full[i_file])
        data_app = np.concatenate((data_app,WB_FK),axis=0)
    return data_app



def Bod_param_Lf(PATH,time_window=15,coarsen_factor=12):
    # FK08 parameterization inputs
    tau = 86400
    FCOR = smooth(xr.open_dataarray(PATH+'FCOR.nc'),time_window,coarsen_factor)
    B_x = smooth(xr.open_dataarray(PATH+'B_x.nc'),time_window,coarsen_factor).values
    B_y = smooth(xr.open_dataarray(PATH+'B_y.nc'),time_window,coarsen_factor).values
    grad_B = np.sqrt(B_y**2 + B_x**2)
    
    HML = smooth(xr.open_dataarray(PATH+'HML.nc'),time_window,coarsen_factor).values
    TAUX = smooth(xr.open_dataarray(PATH+'TAUX.nc'),time_window,coarsen_factor).values
    TAUY = smooth(xr.open_dataarray(PATH+'TAUY.nc'),time_window,coarsen_factor).values
    TAU = np.sqrt(TAUY**2 + TAUX**2)

    Q = smooth(xr.open_dataarray(PATH+'Q.nc'),time_window,coarsen_factor).values
    HBL = smooth(xr.open_dataarray(PATH+'HBL.nc'),time_window,coarsen_factor).values
    
    # WB FK11 with Bod Lf rescaling factor
    ds = 25e3 # approx 1/4 degree resolution  ~ 25km
    m_star = 0.5
    n_star = 0.066
    Cl = 0.25
    u_star = np.sqrt(np.abs(TAU)/1000)

    
    Lf = (Cl * m_star * u_star**2)/(np.sqrt(FCOR**2 + tau**-2)*HBL)

    WB_BD_Lf = (ds /Lf)*((HML**2) * (grad_B**2))/np.sqrt(FCOR**2 + tau**-2)
    return WB_BD_Lf
    
    
def load_data_Bod_Lf(time_window=15,coarsen_factor=12):
    PATH_LIST_full = glob.glob(BASE+'*_smooth/preprcossed_data/') 
    WB_Bod_0 = Bod_param_Lf(PATH_LIST_full[0])
    data_app = WB_Bod_0
    for i_file in np.arange(1,len(PATH_LIST_full)):
        WB_Bod = Bod_param_Lf(PATH_LIST_full[i_file])
        data_app = np.concatenate((data_app,WB_Bod),axis=0)
    return data_app

In [13]:
# WB subgrid
WB_sg, WB_sg_mean, WB_sg_std = load_data_WB()

np.save(PATH_NN+'WB_sg.npy',WB_sg)
np.save(PATH_NN+'WB_sg_mean.npy',WB_sg_mean)
np.save(PATH_NN+'WB_sg_std.npy',WB_sg_std)

In [14]:
# WB FK parameterization
WB_FK = load_data_FK()
WB_FK_Lf = load_data_FK_Lf()
WB_BD_Lf = load_data_Bod_Lf()

np.save(PATH_NN+'WB_FK_Lf.npy',WB_FK_Lf)
np.save(PATH_NN+'WB_BD_Lf.npy',WB_BD_Lf)
np.save(PATH_NN+'WB_FK.npy',WB_FK)

In [15]:
# load FK08 variables relevant for w'b' parameterization:
FCOR, FCOR_mean, FCOR_std = load_data_norm('FCOR') 
B_x, B_x_mean, B_x_std = load_data_norm('B_x') 
B_y, B_y_mean, B_y_std = load_data_norm('B_y')
grad_B = np.sqrt(B_y**2 + B_x**2)

HML, HML_mean, HML_std = load_data_norm('HML') 
Nsquared, Nsquared_mean, Nsquared_std = load_data_norm('Nsquared')


In [16]:
# load other variables that may be relevant for w'b':

U, U_mean, U_std = load_data_norm('U') 
V, V_mean, V_std = load_data_norm('V') 

TAUX, TAUX_mean, TAUX_std = load_data_norm('TAUX')
TAUY, TAUY_mean, TAUY_std = load_data_norm('TAUY')
TAU = np.sqrt(TAUY**2 + TAUX**2)

Q, Q_mean, Q_std = load_data_norm('Q')
HBL, HBL_mean, HBL_std = load_data_norm('HBL')


In [18]:
# save normalized NN data

np.save(PATH_NN+'grad_B.npy',grad_B)
np.save(PATH_NN+'FCOR.npy',FCOR)
#np.save(PATH_NN+'WB_sg.npy',WB_sg)
#np.save(PATH_NN+'WB_FK.npy',WB_FK)
np.save(PATH_NN+'HML.npy',HML)

np.save(PATH_NN+'TAU.npy',TAU)
np.save(PATH_NN+'U.npy',U)
np.save(PATH_NN+'V.npy',V)
np.save(PATH_NN+'Q.npy',Q)
np.save(PATH_NN+'HBL.npy',HBL)
np.save(PATH_NN+'FCOR.nc',FCOR)

#np.save(PATH_NN+'WB_sg.npy',WB_sg)