In [1]:
import sys
sys.path.insert(1,"/home1/07064/tg863631/anaconda3/envs/CbrainCustomLayer/lib/python3.6/site-packages") #work around for h5py
from cbrain.imports import *
from cbrain.cam_constants import *
from cbrain.utils import *
from cbrain.layers import *
from cbrain.data_generator import DataGenerator
import tensorflow as tf
from tensorflow import math as tfm
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
# import tensorflow_probability as tfp
import xarray as xr
import numpy as np
from cbrain.model_diagnostics import ModelDiagnostics
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as imag
import scipy.integrate as sin
import matplotlib.ticker as mticker
import pickle
from tensorflow.keras import layers
from tensorflow.keras.losses import *
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import datetime
from cbrain.climate_invariant import *
import yaml

## Data Generators

In [2]:
from cbrain.imports import *
from cbrain.utils import *
from cbrain.normalization import *
import h5py
from sklearn.preprocessing import OneHotEncoder

In [40]:
class DataGeneratorClassification(tf.keras.utils.Sequence):
    def __init__(self, data_fn, input_vars, output_vars, percentile_path, data_name,
                 norm_fn=None, input_transform=None, output_transform=None,
                 batch_size=1024, shuffle=True, xarray=False, var_cut_off=None, normalize_flag=True, bin_size=100):
        # Just copy over the attributes
        self.data_fn, self.norm_fn = data_fn, norm_fn
        self.input_vars, self.output_vars = input_vars, output_vars
        self.batch_size, self.shuffle = batch_size, shuffle
        self.bin_size = bin_size
        self.percentile_bins = load_pickle(percentile_path)['Percentile'][data_name]
        self.enc = OneHotEncoder(sparse=False)
        classes = np.arange(self.bin_size+2)
        self.enc.fit(classes.reshape(-1,1))
        # Open datasets
        self.data_ds = xr.open_mfdataset(data_fn)
        if norm_fn is not None: self.norm_ds = xr.open_dataset(norm_fn)
     # Compute number of samples and batches
        self.n_samples = self.data_ds.X.shape[0]
        self.n_batches = int(np.floor(self.n_samples) / self.batch_size)

        self.n_inputs, self.n_outputs = 64, 64
        
                # Initialize input and output normalizers/transformers
        if input_transform is None:
            self.input_transform = Normalizer()
        elif type(input_transform) is tuple:
            ## normalize flag added by Ankitesh
            self.input_transform = InputNormalizer(
                self.norm_ds,normalize_flag, input_vars, input_transform[0], input_transform[1], var_cut_off)
        else:
            self.input_transform = input_transform  # Assume an initialized normalizer is passed
            
            
        if output_transform is None:
            self.output_transform = Normalizer()
        elif type(output_transform) is dict:
            self.output_transform = DictNormalizer(self.norm_ds, output_vars, output_transform)
        else:
            self.output_transform = output_transform  # Assume an initialized normalizer is passed

        # Now close the xarray file and load it as an h5 file instead
        # This significantly speeds up the reading of the data...
        if not xarray:
            self.data_ds.close()
            self.data_ds = h5py.File(data_fn, 'r')
    
    def __len__(self):
        return self.n_batches
     
    def __getitem__(self, index):
        # Compute start and end indices for batch
        start_idx = index * self.batch_size
        end_idx = start_idx + self.batch_size

        # Grab batch from data
        batch_X = self.data_ds['X'][start_idx:end_idx]
        Y = self.data_ds['Y'][start_idx:end_idx]
        # Normalize
        X = self.input_transform.transform(batch_X)
        return X, Y

    def on_epoch_end(self):
        self.indices = np.arange(self.n_batches)
        if self.shuffle: np.random.shuffle(self.indices)

In [41]:
scale_dict = load_pickle('/export/nfs0home/ankitesg/CBrain_project/CBRAIN-CAM/nn_config/scale_dicts/009_Wm2_scaling.pkl')

In [42]:
TRAINFILE = 'CI_SP_M4K_train_shuffle.nc'
TRAIN_FILE_ONEHOT = '/scratch/ankitesh/data/new_data_for_v2_*'
VALIDFILE = 'CI_SP_M4K_valid.nc'
NORMFILE = 'CI_SP_M4K_NORM_norm.nc'
data_path = '/fast/ankitesh/data/'

In [43]:
train_gen = DataGeneratorClassification(
    data_fn=f'{TRAIN_FILE_ONEHOT}', 
    input_vars=['QBP','TBP','PS', 'SOLIN', 'SHFLX', 'LHFLX'], 
    output_vars=['PHQ','TPHYSTND','FSNT', 'FSNS', 'FLNT', 'FLNS'], 
    percentile_path='/export/nfs0home/ankitesg/data/percentile_data.pkl', 
    data_name = 'M4K',
    input_transform = ('mean', 'maxrs'),
    output_transform = scale_dict,
    norm_fn = f'{data_path}{NORMFILE}',
    batch_size=1024,
    bin_size=100
)

OSError: Unable to open file (unable to open file: name = '/scratch/ankitesh/data/new_data_for_v2_*', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
train_gen.n_samples

## Model (flattened and reshaped)

In [33]:
inp_shape = train_gen[0][0].shape
output_shape = train_gen[0][1].shape

In [34]:
print(f"Input shape {inp_shape} \nOutput Shape {output_shape}")

Input shape (1024, 64) 
Output Shape (1024, 64, 102)


In [35]:
bin_size = 100

In [36]:
inp = Input(shape=(64,))
densout = Dense(128, activation='linear')(inp)
densout = LeakyReLU(alpha=0.3)(densout)
for i in range (4):
    densout = Dense(128, activation='linear')(densout)
    densout = LeakyReLU(alpha=0.3)(densout)

densout = Dense(256, activation='linear')(densout)
densout = LeakyReLU(alpha=0.3)(densout)
densout = Dense(512, activation='linear')(densout)
densout = LeakyReLU(alpha=0.3)(densout)
densout = Dense(1024, activation='linear')(densout)
densout = LeakyReLU(alpha=0.3)(densout)

densout = Dense(64*(bin_size+2))(densout)
reshaped = tf.keras.layers.Reshape((64,bin_size+2))(densout)
out = tf.keras.layers.Softmax()(reshaped) 
model = tf.keras.models.Model(inp, out)


In [37]:
model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 64)]              0         
_________________________________________________________________
dense_18 (Dense)             (None, 128)               8320      
_________________________________________________________________
leaky_re_lu_16 (LeakyReLU)   (None, 128)               0         
_________________________________________________________________
dense_19 (Dense)             (None, 128)               16512     
_________________________________________________________________
leaky_re_lu_17 (LeakyReLU)   (None, 128)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 128)               16512     
_________________________________________________________________
leaky_re_lu_18 (LeakyReLU)   (None, 128)               0   

In [38]:
model.compile(tf.keras.optimizers.Adam(), loss='categorical_crossentropy', metrics=["accuracy"])
path_HDF5 = '/scratch/ankitesh/models/'
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint(path_HDF5+'BF_Classification_bin_size_100_model_v2_fast.hdf5',save_best_only=True, monitor='val_loss', mode='min')

In [39]:
with tf.device('/gpu:1'):
    Nep = 5
    model.fit_generator(train_gen, epochs=Nep,
                      callbacks=[earlyStopping, mcp_save])

Epoch 1/5
   66/41200 [..............................] - ETA: 2:27:43 - loss: 4.2321 - accuracy: 0.0677

KeyboardInterrupt: 

## Model type 2 (all flattened)

In [46]:
bin_size = 20

In [47]:
inp = Input(shape=(64,))
densout = Dense(128, activation='linear')(inp)
densout = LeakyReLU(alpha=0.3)(densout)
for i in range (4):
    densout = Dense(128, activation='linear')(densout)
    densout = LeakyReLU(alpha=0.3)(densout)
densout = Dense(64*(bin_size+2), activation='sigmoid')(densout)
model = tf.keras.models.Model(inp, densout)

In [48]:
model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 64)]              0         
_________________________________________________________________
dense_25 (Dense)             (None, 128)               8320      
_________________________________________________________________
leaky_re_lu_23 (LeakyReLU)   (None, 128)               0         
_________________________________________________________________
dense_26 (Dense)             (None, 128)               16512     
_________________________________________________________________
leaky_re_lu_24 (LeakyReLU)   (None, 128)               0         
_________________________________________________________________
dense_27 (Dense)             (None, 128)               16512     
_________________________________________________________________
leaky_re_lu_25 (LeakyReLU)   (None, 128)               0   

In [49]:
model.compile(tf.keras.optimizers.Adam(), loss='categorical_crossentropy', metrics=["accuracy"])
path_HDF5 = '/scratch/ankitesh/models/'
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint(path_HDF5+'BF_Classification_bin_size_20_flat.hdf5',save_best_only=True, monitor='val_loss', mode='min')

In [50]:
with tf.device('/gpu:1'):
    Nep = 5
    model.fit_generator(train_gen, epochs=Nep, validation_data=valid_gen,\
                      callbacks=[earlyStopping, mcp_save])

Epoch 1/5
  242/41376 [..............................] - ETA: 1:06:06 - loss: 431.6241 - accuracy: 0.6286

KeyboardInterrupt: 

## Change Preprocessing way

Pre-dump the preprocessed data and simply load it during run time

In [22]:
# load the data nc file
ds = xr.open_dataset('/scratch/ankitesh/data/CI_SP_M4K_train_shuffle.nc')

In [26]:
ds['time']

## Model (mult-output classification)

In [29]:
bin_size = 20

In [30]:
#this defines a single branch out of 64 branches
def define_single_output_branch(densout,out_index):
    out = Dense(bin_size+2, activation='softmax',name=f"output_{out_index}")(densout)
    return out

In [31]:
inp = Input(shape=(64,))
densout = Dense(128, activation='linear')(inp)
densout = LeakyReLU(alpha=0.3)(densout)
for i in range (4):
    densout = Dense(128, activation='linear')(densout)
    densout = LeakyReLU(alpha=0.3)(densout)
densout = Dense(32, activation='linear')(densout)
densout = LeakyReLU(alpha=0.3)(densout)
all_outputs = [define_single_output_branch(densout,i) for i in range(64)]
model = tf.keras.models.Model(inputs=inp, outputs=all_outputs)

In [32]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 64)]         0                                            
__________________________________________________________________________________________________
dense_19 (Dense)                (None, 128)          8320        input_4[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_17 (LeakyReLU)      (None, 128)          0           dense_19[0][0]                   
__________________________________________________________________________________________________
dense_20 (Dense)                (None, 128)          16512       leaky_re_lu_17[0][0]             
____________________________________________________________________________________________

In [33]:
losses = {}
for i in range(64):
    losses[f'output_{i}'] = "categorical_crossentropy"

In [34]:
model.compile(tf.keras.optimizers.Adam(), loss=losses, metrics=["accuracy"])
path_HDF5 = '/scratch/ankitesh/models/'
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint(path_HDF5+'BF_Classification_bin_size_20.hdf5',save_best_only=True, monitor='val_loss', mode='min')

In [35]:
with tf.device('/gpu:1'):
    Nep = 5
    model.fit_generator(train_gen, epochs=Nep, validation_data=valid_gen,\
                      callbacks=[earlyStopping, mcp_save])

Epoch 1/5
   25/41376 [..............................] - ETA: 9:26:38 - loss: 195.5079 - output_0_loss: 2.5013 - output_1_loss: 2.4152 - output_2_loss: 3.0643 - output_3_loss: 3.0485 - output_4_loss: 3.1010 - output_5_loss: 3.0708 - output_6_loss: 3.0648 - output_7_loss: 3.0643 - output_8_loss: 3.0735 - output_9_loss: 3.0494 - output_10_loss: 3.0479 - output_11_loss: 3.0468 - output_12_loss: 3.0708 - output_13_loss: 3.0473 - output_14_loss: 3.0686 - output_15_loss: 3.0843 - output_16_loss: 3.0683 - output_17_loss: 3.0769 - output_18_loss: 3.0982 - output_19_loss: 3.0677 - output_20_loss: 3.1205 - output_21_loss: 3.0821 - output_22_loss: 3.1147 - output_23_loss: 3.0896 - output_24_loss: 3.0729 - output_25_loss: 3.1384 - output_26_loss: 3.0729 - output_27_loss: 3.0889 - output_28_loss: 3.0860 - output_29_loss: 3.1053 - output_30_loss: 3.0379 - output_31_loss: 2.9971 - output_32_loss: 3.0864 - output_33_loss: 3.0888 - output_34_loss: 3.0942 - output_35_loss: 3.0963 - output_36_loss: 3.081

KeyboardInterrupt: 

In [1]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 7730180181226295028
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 8952735796468704693
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 892534784
locality {
  bus_id: 1
  links {
    link {
      device_id: 1
      type: "StreamExecutor"
      strength: 1
    }
  }
}
incarnation: 17735162048825978315
physical_device_desc: "device: 0, name: Tesla V100-PCIE-32GB, pci bus id: 0000:3b:00.0, compute capability: 7.0"
, name: "/device:GPU:1"
device_type: "GPU"
memory_limit: 31763336397
locality {
  bus_id: 2
  numa_node: 1
  links {
    link {
      type: "StreamExecutor"
      strength: 1
    }
  }
}
incarnation: 10400612453446695277
physical_device_desc: "device: 1, name: Tesla V100-PCIE-32GB, pci bus id: 0000:d8:00.0, compute capability: 7.0"
, name: "/device:GPU:2"
device_type: "GPU"
memor

In [16]:
tf.__version__

'2.0.0'

In [45]:
data_temp = xr.open_mfdataset("/scratch/ankitesh/data/new_data_for_v2_*")

In [46]:
data_temp.to_netcdf("/scratch/ankitesh/data/train.nc")

MemoryError: Unable to allocate 19.9 GiB for an array with shape (409600, 64, 102) and data type float64