In [1]:
from cbrain.imports import *
from cbrain.data_generator import *
from cbrain.cam_constants import *
from cbrain.losses import *
from cbrain.utils import limit_mem
from cbrain.layers import *
from cbrain.data_generator import DataGenerator
import tensorflow as tf
from tensorflow import math as tfm
#import tensorflow_probability as tfp
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import xarray as xr
import numpy as np
from cbrain.model_diagnostics import ModelDiagnostics
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as imag
import scipy.integrate as sin
#import cartopy.crs as ccrs
import matplotlib.ticker as mticker
#from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import pickle
import sklearn
from sklearn.linear_model import LinearRegression
from scipy import stats
from sklearn.preprocessing import OneHotEncoder
import seaborn as sns
from cbrain.imports import *
from cbrain.utils import *
from cbrain.normalization import *
import h5py
from sklearn.preprocessing import OneHotEncoder
from cbrain.climate_invariant import *
import yaml
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
# Load coordinates (just pick any file from the climate model run)
coor = xr.open_dataset("/DFS-L/DATA/pritchard/ankitesg/data/CESM2_f19_v13_updated_NN_pelayout01_ens_07.cam.h1.2003-01-22-00000.nc",\
                    decode_times=False)
lat = coor.lat; lon = coor.lon; lev = coor.lev;
DATA_DIR = '/DFS-L/DATA/pritchard/ankitesg/datav3/'
hyam = coor.hyam
hybm = coor.hybm
scale_dict = load_pickle('/export/nfs0home/ankitesg/tom/CBRAIN-CAM/nn_config/scale_dicts/2020_10_16_scale_dict_RG.pkl')['scale_dict_RG']

In [3]:
in_vars = ['QBP','TBP','CLDLIQBP','CLDICEBP','PS', 'SOLIN', 'SHFLX', 'LHFLX']
out_vars = ['QBCTEND','TBCTEND','CLDLIQBCTEND', 'CLDICEBCTEND', 'NN2L_FLWDS', 'NN2L_PRECC', 
            'NN2L_PRECSC', 'NN2L_SOLL', 'NN2L_SOLLD', 'NN2L_SOLS', 'NN2L_SOLSD', 'NN2L_NETSW']

In [4]:
TRAINFILE = 'RG_SP_M4K_train_shuffle.nc'
NORMFILE = 'RG_SP_M4K_NORM_norm.nc'
VALIDFILE = 'RG_SP_M4K_valid.nc'

In [10]:
class DataGeneratorClimInvRealGeo(DataGenerator):

    def __init__(self, data_fn, input_vars, output_vars,
             norm_fn=None, input_transform=None, output_transform=None,
             batch_size=1024, shuffle=True, xarray=False, var_cut_off=None, normalize_flag=True,
             rh_trans=True,t2tns_trans=True,
             lhflx_trans=True,
             scaling=True,interpolate=True,
             hyam=None,hybm=None,
             inp_subRH=None,inp_divRH=None,
             inp_subTNS=None,inp_divTNS=None,
             lev=None, interm_size=40,
             lower_lim=6,
             is_continous=True,Tnot=5,
                mode='train', exp=None):
        self.scaling = scaling
        self.interpolate = interpolate
        self.rh_trans = rh_trans
        self.t2tns_trans = t2tns_trans
        self.lhflx_trans = lhflx_trans
        self.inp_shape = 64
        self.exp = exp
        self.mode=mode
        super().__init__(data_fn, input_vars,output_vars,norm_fn,input_transform,output_transform,
                        batch_size,shuffle,xarray,var_cut_off,normalize_flag) ## call the base data generator
        self.inp_sub = self.input_transform.sub
        self.inp_div = self.input_transform.div
        self.new_idx = np.concatenate((np.arange(8,26),np.arange(34,52),np.arange(60,78),np.arange(86,104),np.arange(104,108)))
        self.new_output_idx = np.concatenate((np.arange(8,26),np.arange(26,52),np.arange(60,78),np.arange(86,104),np.arange(104,112)))

        if self.rh_trans:
            self.qv2rhLayer = QV2RHNumpyReal(self.inp_sub,self.inp_div,inp_subRH,inp_divRH,hyam,hybm)

    def __getitem__(self, index):
        # Compute start and end indices for batch
        start_idx = index * self.batch_size
        end_idx = start_idx + self.batch_size
        # Grab batch from data
        batch = self.data_ds['vars'][start_idx:end_idx]
        X = batch[:, self.input_idxs]
        Y = batch[:, self.output_idxs]
        # Normalize
        X_norm = self.input_transform.transform(X)
        Y = self.output_transform.transform(Y)
        X_result = X_norm
        if self.rh_trans:
            X_result = self.qv2rhLayer.process(X_result)
        return X_result[:,self.new_idx], Y

### Model 1

In [26]:
in_vars_RH = ['RH','TBP','CLDLIQBP','CLDICEBP','PS', 'SOLIN', 'SHFLX', 'LHFLX']
BASE_DIR = '/DFS-L/DATA/pritchard/ankitesg/'
in_vars = ['QBP','TBP','CLDLIQBP','CLDICEBP','PS', 'SOLIN', 'SHFLX', 'LHFLX']
out_vars = ['QBCTEND','TBCTEND']

In [27]:
TRAINFILE_RH = 'RG_RH_M4K_NORM_train_shuffle.nc'
NORMFILE_RH = 'RG_RH_M4K_NORM_norm.nc'
VALIDFILE_RH = 'RG_RH_M4K_NORM_valid.nc'

In [28]:
train_gen_RH = DataGenerator(
    data_fn = f"{BASE_DIR}datav4/{TRAINFILE_RH}",
    input_vars = in_vars_RH,
    output_vars = out_vars,
    norm_fn = f"{BASE_DIR}datav4/{NORMFILE_RH}",
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    batch_size=1024,
    shuffle=True,
    normalize_flag=True
)

In [29]:
train_gen_RH[0][0].shape

(1024, 108)

In [17]:
TRAINFILE = 'RG_SP_M4K_train_shuffle.nc'
NORMFILE = 'RG_SP_M4K_NORM_norm.nc'
VALIDFILE = 'RG_SP_M4K_valid.nc'

In [18]:
train_gen = DataGeneratorClimInvRealGeo(
    data_fn = f'{DATA_DIR}{TRAINFILE}',
    input_vars = in_vars,
    output_vars = out_vars,
    norm_fn = f'{DATA_DIR}{NORMFILE}',
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    batch_size=1024,
    shuffle=True,
    normalize_flag=True,
    lev=lev,
    hyam=hyam,hybm=hybm,
    rh_trans = True,t2tns_trans=False,
    lhflx_trans=False,
    scaling=False,
    interpolate=False,
    inp_subRH=train_gen_RH.input_transform.sub,inp_divRH=train_gen_RH.input_transform.div
)


In [23]:
train_gen[0][0].shape

(1024, 76)

In [24]:
valid_gen = DataGeneratorClimInvRealGeo(
    data_fn = f'{DATA_DIR}{VALIDFILE}',
    input_vars = in_vars,
    output_vars = out_vars,
    norm_fn = f'{DATA_DIR}{NORMFILE}',
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    batch_size=1024,
    shuffle=True,
    normalize_flag=True,
    lev=lev,
    hyam=hyam,hybm=hybm,
    rh_trans = True,t2tns_trans=False,
    lhflx_trans=False,
    scaling=False,
    interpolate=False,
    inp_subRH=train_gen_RH.input_transform.sub,inp_divRH=train_gen_RH.input_transform.div
)


In [25]:
valid_gen[0][0].shape

(1024, 76)

In [36]:
model = Sequential()
model.add(Input(shape=(76,)))
model.add(Dense(320, activation='relu'))
for i in range(6):
    model.add(Dense(320, activation='relu'))
model.add(Dense(52, activation='linear'))

In [37]:
opt = tf.keras.optimizers.Adam()

In [38]:
model.compile(optimizer=opt, loss='mse')

In [39]:
# model.compile(tf.keras.optimizers.Adam(), loss="mse")
path_HDF5 = '/DFS-L/DATA/pritchard/ankitesg/models/'
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint(path_HDF5+'RH_RG_Model1_V1.h5',save_best_only=True, monitor='val_loss', mode='min')

In [40]:
with tf.device('/gpu:1'):
    Nep = 12
    model.fit_generator(train_gen, epochs=Nep, validation_data=valid_gen
                        ,callbacks=[earlyStopping, mcp_save])

Epoch 1/12
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


## Model 4

In [46]:
in_vars_RH = ['RH','TBP','CLDLIQBP','CLDICEBP','PS', 'SOLIN', 'SHFLX', 'LHFLX']
BASE_DIR = '/DFS-L/DATA/pritchard/ankitesg/'
in_vars = ['QBP','TBP','CLDLIQBP','CLDICEBP','PS', 'SOLIN', 'SHFLX', 'LHFLX']
out_vars = ['NN2L_FLWDS', 'NN2L_PRECC', 
            'NN2L_PRECSC', 'NN2L_SOLL', 'NN2L_SOLLD', 'NN2L_SOLS', 'NN2L_SOLSD', 'NN2L_NETSW']

In [47]:
TRAINFILE = 'RG_SP_M4K_train_shuffle.nc'
NORMFILE = 'RG_SP_M4K_NORM_norm.nc'
VALIDFILE = 'RG_SP_M4K_valid.nc'

In [48]:
train_gen = DataGeneratorClimInvRealGeo(
    data_fn = f'{DATA_DIR}{TRAINFILE}',
    input_vars = in_vars,
    output_vars = out_vars,
    norm_fn = f'{DATA_DIR}{NORMFILE}',
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    batch_size=1024,
    shuffle=True,
    normalize_flag=True,
    lev=lev,
    hyam=hyam,hybm=hybm,
    rh_trans = True,t2tns_trans=False,
    lhflx_trans=False,
    scaling=False,
    interpolate=False,
    inp_subRH=train_gen_RH.input_transform.sub,inp_divRH=train_gen_RH.input_transform.div
)


In [55]:
valid_gen = DataGeneratorClimInvRealGeo(
    data_fn = f'{DATA_DIR}{VALIDFILE}',
    input_vars = in_vars,
    output_vars = out_vars,
    norm_fn = f'{DATA_DIR}{NORMFILE}',
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    batch_size=1024,
    shuffle=True,
    normalize_flag=True,
    lev=lev,
    hyam=hyam,hybm=hybm,
    rh_trans = True,t2tns_trans=False,
    lhflx_trans=False,
    scaling=False,
    interpolate=False,
    inp_subRH=train_gen_RH.input_transform.sub,inp_divRH=train_gen_RH.input_transform.div
)


In [56]:
valid_gen[0][1].shape

(1024, 8)

In [51]:
model = Sequential()
model.add(Input(shape=(76,)))
model.add(Dense(320, activation='relu'))
for i in range(4):
    model.add(Dense(320, activation='relu'))
model.add(Dense(8, activation='linear'))

In [52]:
opt = tf.keras.optimizers.Adam()

In [53]:
model.compile(optimizer=opt, loss='mse')

In [54]:
# model.compile(tf.keras.optimizers.Adam(), loss="mse")
path_HDF5 = '/DFS-L/DATA/pritchard/ankitesg/models/'
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint(path_HDF5+'RH_RG_Model4_V1.h5',save_best_only=True, monitor='val_loss', mode='min')

In [57]:
with tf.device('/gpu:1'):
    Nep = 10
    model.fit_generator(train_gen, epochs=Nep, validation_data=valid_gen
                        ,callbacks=[earlyStopping, mcp_save])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
