In [1]:
import xarray as xr
import numpy as np
import atmos_physics as atmos_physics
import math

In [2]:
def get_train_test_split(training_split, longitudes, times):
    pure_split = int(longitudes*times*training_split)
    return math.floor(float(pure_split) / longitudes) * longitudes

In [3]:
#filepath = "/ocean/projects/ees220005p/gmooers/GM_Data/**00000[1]**.nc4"
filepath = "/ocean/projects/ees220005p/gmooers/GM_Data/**0000012[26]**.nc4"
savepath = "/ocean/projects/ees220005p/gmooers/GM_Data/training_data/"
n_z_input = 49
train_size=0.9

In [4]:
variables = xr.open_mfdataset(filepath)

In [5]:
x = variables.lon  # m
y = variables.lat  # m
z = variables.z  # m
p = variables.p  # hPa
rho = variables.rho  # kg/m^3
terra = variables.TERRA[:,:n_z_input]
SFC_PRES = variables.SFC_REFERENCE_P
SKT = variables.SKT
n_x = x.size
n_y = y.size
n_z = z.size
n_files = terra.shape[0]

In [6]:
cos_lat = np.zeros((n_files, n_y, n_x))
sin_lon = np.zeros((n_files, n_y, n_x))
cos_lat[:, :, :] = xr.ufuncs.cos(xr.ufuncs.radians(y.values[None, :, None]))
sin_lon[:, :, :] = xr.ufuncs.sin(xr.ufuncs.radians(x.values[None, None, :]))

In [7]:
adz = xr.zeros_like(z[:n_z_input]) 
dz = 0.5*(z[0]+z[1]) 
adz[0] = 1.

for k in range(1,n_z_input-1): # range doesn't include stopping number
    adz[k] = 0.5*(z[k+1]-z[k-1])/dz

adz[n_z_input-1] = (z[n_z_input-1]-z[n_z_input-2])/dz
rho_dz = adz*dz*rho

In [8]:
Tin = variables.TABS_SIGMA[:,:n_z_input] #originally just called tabs
Qrad = variables.QRAD_SIGMA[:,:n_z_input] / 86400
qt = (variables.QV_SIGMA[:,:n_z_input] + variables.QC_SIGMA[:,:n_z_input] + variables.QI_SIGMA[:,:n_z_input]) / 1000.0 # originally called qt
qp = variables.QP_SIGMA[:,:n_z_input] / 1000.0
q_auto_out = -1.0*variables.QP_MICRO_SIGMA[:,:n_z_input] / 1000.0
qpflux_z_coarse = variables.RHOQPW_SIGMA[:,:n_z_input] / 1000.0
T_adv_out = variables.T_FLUX_Z_OUT_SUBGRID_SIGMA[:,:n_z_input]     #originally tflux_z
q_adv_out = variables.Q_FLUX_Z_OUT_SUBGRID_SIGMA[:,:n_z_input] / 1000.0 #originally qtflux_z
qpflux_z = variables.QP_FLUX_Z_OUT_SUBGRID_SIGMA[:,:n_z_input] / 1000.0 
w = variables.W[:,:n_z_input]  # m/s
precip = variables.PREC_SIGMA[:,:n_z_input]  # precipitation flux kg/m^2/s
cloud_qt_flux = variables.SED_SIGMA[:,:n_z_input] / 1000.0
cloud_lat_heat_flux = variables.LSED_SIGMA[:,:n_z_input] 
qpflux_diff_coarse_z = variables.RHOQPS_SIGMA[:,:n_z_input] / 1000.0  # SGS qp flux kg/m^2/s Note that I need this variable
#q_auto_out = - dqp

In [9]:
a_pr = 1.0 / (atmos_physics.tprmax - atmos_physics.tprmin)
omp = np.maximum(0.0, np.minimum(1.0, (Tin - atmos_physics.tprmin) * a_pr))
fac = (atmos_physics.L + atmos_physics.Lf * (1.0 - omp)) / atmos_physics.cp

In [10]:
q_sed_fluxc_out = ((atmos_physics.L + atmos_physics.Lf) * cloud_qt_flux + cloud_lat_heat_flux) / atmos_physics.Lf
q_sed_fluxi_out = - (atmos_physics.L * cloud_qt_flux + cloud_lat_heat_flux) / atmos_physics.Lf
q_sed_flux_tot  = cloud_qt_flux

In [11]:
dfac_dz = np.zeros((n_files, n_z_input, n_y, n_x))
for k in range(n_z_input - 1):
    kb = max(0, k - 1)
    dfac_dz[:, k, :, :] = (fac[:, k + 1, :, :] - fac[:, k, :, :]) / rho_dz[k, :] * rho[:, k]

In [12]:
Tout = dfac_dz * (qpflux_z_coarse + qpflux_diff_coarse_z - precip) / rho

In [15]:
split_index = get_train_test_split(train_size, n_x*n_y, n_files)

In [30]:
my_dict_train = {}
my_dict_test = {}

In [34]:
Tin.shape

(2, 49, 426, 768)

In [31]:
Tin_new = Tin.transpose("z","time","lat","lon").values

In [35]:
Tin_new.shape

(49, 2, 426, 768)

In [36]:
np.array_equal(Tin[0,:,:,:], Tin_new[:,0,:,:])

True

In [37]:
Tin_new_something = np.reshape(Tin_new, (n_z_input, n_files*n_y*n_x))

In [38]:
Tin_new_something.shape

(49, 654336)

In [41]:
Tin_new_something_new = np.reshape(Tin_new_something, (n_z_input, n_files, n_y, n_x))

In [42]:
np.array_equal(Tin_new_something_new[:,0,:,:], Tin_new[:,0,:,:])

True

In [43]:
654336/ (n_y*n_x)

2.0

In [None]:
Tin_new = Tin.transpose("z","time","lat","lon").values
Tin_new_something = np.reshape(Tin_new, (n_z_input, n_files*n_y*n_x))
qin = qt.transpose("z","time","lat","lon").values
qin = np.reshape(qin, (n_z_input, n_files*n_y*n_x))
Tout = Tout.transpose("z","time","lat","lon").values
Tout = np.reshape(Tout, (n_z_input, n_files*n_y*n_x))
T_adv_out = T_adv_out.transpose("z","time","lat","lon").values
T_adv_out = np.reshape(T_adv_out, (n_z_input, n_files*n_y*n_x))
q_adv_out = q_adv_out.transpose("z","time","lat","lon").values
q_adv_out = np.reshape(q_adv_out, (n_z_input, n_files*n_y*n_x))
q_auto_out = q_auto_out.transpose("z","time","lat","lon").values
q_auto_out = np.reshape(q_auto_out, (n_z_input, n_files*n_y*n_x))

In [26]:
Tin_new = Tin.transpose("z","time","lat","lon").values
Tin_new_something = np.reshape(Tin_new, (n_z_input, n_files*n_y*n_x))
my_dict_train["Tin"] = (("z","sample"), Tin_new_something[...,:split_index])
my_dict_test["Tin"] = (("z","sample"), Tin_new_something[...,split_index:])

In [20]:
Tin_new.shape

(49, 654336)

In [21]:
Tin.shape

(2, 49, 426, 768)

In [33]:
np.array_equal(Tin_new[:,0,:,:], Tin_new_something[:,:split_index])

False

In [15]:
Tin = Tin.transpose("z","time","lat","lon").values
Tin = np.reshape(Tin, (n_z_input, n_y, n_files*n_x))
my_dict_train["Tin"] = (("z","lat","sample"), Tin[...,:split_index])
my_dict_test["Tin"] = (("z","lat","sample"), Tin[...,split_index:])

qin = qt.transpose("z","time","lat","lon").values
qin = np.reshape(qin, (n_z_input, n_y, n_files*n_x))
my_dict_train["qin"] = (("z","lat","sample"), qin[...,:split_index])
my_dict_test["qin"] = (("z","lat","sample"), qin[...,split_index:])

Tout = Tout.transpose("z","time","lat","lon").values
Tout = np.reshape(Tout, (n_z_input, n_y, n_files*n_x))
my_dict_train["Tout"] = (("z","lat","sample"), Tout[...,:split_index])
my_dict_test["Tout"] = (("z","lat","sample"), Tout[...,split_index:])

T_adv_out = T_adv_out.transpose("z","time","lat","lon").values
T_adv_out = np.reshape(T_adv_out, (n_z_input, n_y, n_files*n_x))
my_dict_train["T_adv_out"] = (("z","lat","sample"), T_adv_out[...,:split_index])
my_dict_test["T_adv_out"] = (("z","lat","sample"), T_adv_out[...,split_index:])

q_adv_out = q_adv_out.transpose("z","time","lat","lon").values
q_adv_out = np.reshape(q_adv_out, (n_z_input, n_y, n_files*n_x))
my_dict_train["q_adv_out"] = (("z","lat","sample"), q_adv_out[...,:split_index])
my_dict_test["q_adv_out"] = (("z","lat","sample"), q_adv_out[...,split_index:])

q_auto_out = q_auto_out.transpose("z","time","lat","lon").values
q_auto_out = np.reshape(q_auto_out, (n_z_input, n_y, n_files*n_x))
my_dict_train["q_adv_out"] = (("z","lat","sample"), q_adv_out[...,:split_index])
my_dict_test["q_adv_out"] = (("z","lat","sample"), q_adv_out[...,split_index:])

q_sed_flux_tot = q_sed_flux_tot.transpose("z","time","lat","lon").values
q_sed_flux_tot = np.reshape(q_sed_flux_tot, (n_z_input, n_y, n_files*n_x))
my_dict_train["q_sed_flux_tot"] = (("z","lat","sample"), q_sed_flux_tot[...,:split_index])
my_dict_test["q_sed_flux_tot"] = (("z","lat","sample"), q_sed_flux_tot[...,split_index:])

q_sed_fluxi_out = q_sed_fluxi_out.transpose("z","time","lat","lon").values
q_sed_fluxi_out = np.reshape(q_sed_fluxi_out, (n_z_input, n_y, n_files*n_x))
my_dict_train["q_sed_fluxi_out"] = (("z","lat","sample"), q_sed_fluxi_out[...,:split_index])
my_dict_test["q_sed_fluxi_out"] = (("z","lat","sample"), q_sed_fluxi_out[...,split_index:])

q_sed_fluxc_out = q_sed_fluxc_out.transpose("z","time","lat","lon").values
q_sed_fluxc_out = np.reshape(q_sed_fluxc_out, (n_z_input, n_y, n_files*n_x))
my_dict_train["q_sed_fluxc_out"] = (("z","lat","sample"), q_sed_fluxc_out[...,:split_index])
my_dict_test["q_sed_fluxc_out"] = (("z","lat","sample"), q_sed_fluxc_out[...,split_index:])

terra = terra.transpose("z","time","lat","lon").values
terra = np.reshape(terra, (n_z_input, n_y, n_files*n_x))
my_dict_train["terra"] = (("z","lat","sample"), terra[...,:split_index])
my_dict_test["terra"] = (("z","lat","sample"), terra[...,split_index:])

sfc_pres = SFC_PRES.transpose("time","lat","lon").values
sfc_pres = np.reshape(sfc_pres, (n_y, n_files*n_x))
my_dict_train["sfc_pres"] = (("lat","sample"), sfc_pres[...,:split_index])
my_dict_test["sfc_pres"] = (("lat","sample"), sfc_pres[...,split_index:])

skt = SKT.transpose("time","lat","lon").values
skt = np.reshape(skt, (n_y, n_files*n_x))
my_dict_train["skt"] = (("lat","sample"), skt[...,:split_index])
my_dict_test["skt"] = (("lat","sample"), skt[...,split_index:])

cos_lat = np.expand_dims(cos_lat, axis=0)
cos_lat = np.moveaxis(cos_lat, 2, 3)
cos_lat = np.reshape(cos_lat, (1, n_y, -1)).squeeze()
my_dict_train["cos_lat"] = (("lat","sample"), cos_lat[...,:split_index])
my_dict_test["cos_lat"] = (("lat","sample"), cos_lat[...,split_index:])

sin_lon = np.expand_dims(sin_lon, axis=0)
sin_lon = np.moveaxis(sin_lon, 2, 3)
sin_lon = np.reshape(sin_lon, (1, n_y, -1)).squeeze()
my_dict_train["sin_lon"] = (("lat","sample"), sin_lon[...,:split_index])
my_dict_test["sin_lon"] = (("lat","sample"), sin_lon[...,split_index:])

In [16]:
ds_train = xr.Dataset(
    my_dict_train,
    coords={
        "z": z[:n_z_input].values,
        "lat": y.values,
        "sample": np.arange(0,n_files*len(x.values), 1)[:split_index],
    },
)

In [17]:
ds_test = xr.Dataset(
    my_dict_test,
    coords={
        "z": z[:n_z_input].values,
        "lat": y.values,
        "sample": np.arange(0,n_files*len(x.values), 1)[split_index:],
    },
)

In [19]:
ds_train.to_netcdf(savepath + "_train.nc")
ds_test.to_netcdf(savepath + "_test.nc")