Similar to original model setup

In [1]:
import numpy as np
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.layers import Embedding, Flatten, Conv1D, BatchNormalization, LeakyReLU, Dropout, Dense, Add
from tensorflow.keras.layers import Concatenate, GlobalAveragePooling1D, AveragePooling1D, MaxPool1D
from keras import backend as K

In [3]:
scce = tf.keras.losses.SparseCategoricalCrossentropy(reduction='none')

In [20]:
class CNN:
    
    def __init__(self, input_shape, seed, **kwargs):
        
        self.__dict__.update(kwargs)
        
        self.input_shape = input_shape
        self.seed = seed
        self.train_callback = [keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=self.plateau_patience, min_lr=1e-3),
                               keras.callbacks.EarlyStopping(monitor="val_loss", patience=self.train_patience, 
                                                             restore_best_weights=True, verbose=1)]
        self.retrain_callback = [keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=self.plateau_patience, min_lr=1e-3),
                                 keras.callbacks.EarlyStopping(monitor="val_loss", patience=self.retrain_patience, 
                                                               restore_best_weights=True, verbose=1)]
        
        self.model = self.build_model()
    
    def build_model(self):
        
        init = tf.keras.initializers.GlorotUniform(self.seed)
        
        input_layer = keras.layers.Input(self.input_shape)
        x = input_layer
        embedding_layer = Embedding(input_dim=self.num_of_tokens, output_dim=self.embedding_dim, 
                                    embeddings_regularizer=regularizers.L2(1e-3)
                                    )(self.sic_input)
        embedding_layer = Flatten()(embedding_layer)
        
        assert len(self.filter_dims) == len(self.kernel_sizes)
        for i in range(len(self.filter_dims)):
            x = Conv1D(filters=self.filter_dims[i], kernel_size=self.kernel_sizes[i], padding="valid", strides=3,
                       kernel_initializer=init, 
                    #    bias_initializer='zeros'
                       use_bias = False
                       )(x)
            x = BatchNormalization()(x)
            x = LeakyReLU()(x)
        
        x = Flatten()(x)
        x = Concatenate()([x, embedding_layer])
        for layer_dim in self.layer_dims:
            x = Dense(layer_dim, activation=self.activation, kernel_initializer=init, bias_initializer='zeros')(x)
            x = Dropout(self.dropout_dense, seed=self.seed)(x)
        output_layer = Dense(5, activation="softmax", kernel_initializer=init, use_bias=False)(x)
        model = keras.models.Model(inputs=[self.target, input_layer, self.ret_d, self.sic_input], outputs=output_layer)
        model.add_loss(self.custom_loss(self.target, output_layer, self.ret_d))
        
        return model
    
    def custom_loss(self, y_true, y_pred, ret_d):
        y_true = tf.cast(y_true, dtype=tf.float32)
        return tf.reduce_mean(scce(y_true, y_pred) * tf.minimum(tf.abs(ret_d), 1))
    
    # Numpy version of loss
    def custom_loss_np(self, y_true, y_pred, ret_d):
        return np.mean(scce(y_true, y_pred) * np.minimum(abs(ret_d), 1))
    
    def compile_model(self):
        self.model.compile(loss=None, optimizer=keras.optimizers.Adam(self.learning_rate))
        # self.model.summary()
    
    # Using the same code for training and retraining model
    def train_model(self, x_train, y_train, ret_d_train, sic_train):
        tf.random.set_seed(self.seed)
        random.seed(self.seed)
        np.random.seed(self.seed)
        history = self.model.fit(
            x=[y_train, x_train, ret_d_train, sic_train],
            y=None,
            batch_size=self.batch_size,
            epochs=self.epochs,
            callbacks=self.train_callback,
            validation_split=self.validation_split,
            verbose=1
            )
        gc.collect()
    
    def evaluate_model(self, x_train, y_train, ret_d_train, sic_train, x_test, y_test, ret_d_test, sic_test, batch_size):
        y_pred = self.model.predict([y_train, x_train, ret_d_train, sic_train], batch_size=batch_size, verbose=0)
        print(f'Model training loss {self.custom_loss_np(y_train, y_pred, ret_d_train)}')
        y_pred = self.model.predict([y_test, x_test, ret_d_test, sic_test], batch_size=batch_size, verbose=0)
        print(f'Model test loss {self.custom_loss_np(y_test, y_pred, ret_d_test)}')
        
    def retrain_model(self, x_train, y_train, ret_d_train, sic_train):
        tf.random.set_seed(self.seed)
        random.seed(self.seed)
        np.random.seed(self.seed)

        K.set_value(self.model.optimizer.learning_rate, 1e-2)
        
        history = self.model.fit(
            x=[y_train, x_train, ret_d_train, sic_train],
            y=None,
            batch_size=self.batch_size,
            epochs=self.epochs,
            callbacks=self.retrain_callback,
            validation_split=self.validation_split,
            verbose=1
            )
        gc.collect()