Inspiration: https://www.tensorflow.org/tutorials/text/classify_text_with_bert

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
!pip install -q tensorflow-text
!pip install -q tf-models-official

[K     |████████████████████████████████| 3.4MB 19.4MB/s 
[K     |████████████████████████████████| 1.1MB 16.4MB/s 
[K     |████████████████████████████████| 51kB 9.3MB/s 
[K     |████████████████████████████████| 645kB 55.1MB/s 
[K     |████████████████████████████████| 37.6MB 78kB/s 
[K     |████████████████████████████████| 1.2MB 30.6MB/s 
[K     |████████████████████████████████| 174kB 56.6MB/s 
[K     |████████████████████████████████| 706kB 48.5MB/s 
[K     |████████████████████████████████| 358kB 35.3MB/s 
[K     |████████████████████████████████| 102kB 15.6MB/s 
[?25h  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Building wheel for py-cpuinfo (setup.py) ... [?25l[?25hdone


In [None]:
import os
import shutil
import itertools as it

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import tensorflow_addons as tfa
from official.nlp import optimization  # to create AdamW optmizer

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np

from sklearn.metrics import confusion_matrix, roc_curve, auc
from sklearn.model_selection import StratifiedShuffleSplit

tf.get_logger().setLevel('ERROR')

# Data importation

In [None]:
os.chdir('/content/drive/MyDrive/Capgemini/Hackathon/notebooks')
Xy = pd.read_csv(os.path.join('..', 'data', 'train.csv'), index_col=['id'])
X_test = pd.read_csv(os.path.join('..', 'data', 'test.csv'), index_col=['id'])

# Bert imports

In [None]:
bert_model_name = 'small_bert/bert_en_uncased_L-4_H-512_A-8'
# bert_model_name = 'bert_en_uncased_L-12_H-768_A-12' 

map_name_to_handle = {
    'bert_en_uncased_L-12_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3',
    'bert_en_cased_L-12_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/3',
    'bert_multi_cased_L-12_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/3',
    'small_bert/bert_en_uncased_L-2_H-128_A-2': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-2_H-256_A-4': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-2_H-512_A-8': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-2_H-768_A-12': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-4_H-128_A-2': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-4_H-256_A-4': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-4_H-512_A-8': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-4_H-768_A-12': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-6_H-128_A-2': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-6_H-256_A-4': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-6_H-512_A-8': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-6_H-768_A-12': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-8_H-128_A-2': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-8_H-256_A-4': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-8_H-512_A-8': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-8_H-768_A-12': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-10_H-128_A-2': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-10_H-256_A-4': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-10_H-512_A-8': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-10_H-768_A-12': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-12_H-128_A-2': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-12_H-256_A-4': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-12_H-512_A-8': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-12_H-768_A-12': 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-768_A-12/1',
    'albert_en_base': 'https://tfhub.dev/tensorflow/albert_en_base/2',
    'electra_small': 'https://tfhub.dev/google/electra_small/2',
    'electra_base': 'https://tfhub.dev/google/electra_base/2',
    'experts_pubmed': 'https://tfhub.dev/google/experts/bert/pubmed/2',
    'experts_wiki_books': 'https://tfhub.dev/google/experts/bert/wiki_books/2',
    'talking-heads_base': 'https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_base/1',
}

map_model_to_preprocess = {
    'bert_en_uncased_L-12_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'bert_en_cased_L-12_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-128_A-2': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-256_A-4': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-512_A-8': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-128_A-2': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-256_A-4': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-512_A-8': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-128_A-2': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-256_A-4': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-512_A-8': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-128_A-2': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-256_A-4': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-512_A-8': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-128_A-2': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-256_A-4': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-512_A-8': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-128_A-2': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-256_A-4': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-512_A-8': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'bert_multi_cased_L-12_H-768_A-12': 'https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3',
    'albert_en_base': 'https://tfhub.dev/tensorflow/albert_en_preprocess/2',
    'electra_small': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'electra_base': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'experts_pubmed': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'experts_wiki_books': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'talking-heads_base': 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
}

tfhub_handle_encoder = map_name_to_handle[bert_model_name]
tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]

print(f'BERT model selected           : {tfhub_handle_encoder}')
print(f'Preprocess model auto-selected: {tfhub_handle_preprocess}')

BERT model selected           : https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1
Preprocess model auto-selected: https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3


# Modeling functions

In [None]:
# from embedder.src.universal_hp_optimizer import UniversalHPOptimizer
import tensorflow as tf
import tensorflow.compat.v1 as tfc
from tensorboard.plugins.hparams import api as hp
import datetime
import numpy as np

DEFAULT_PARAMS = {
    "epochs": [10],
    'batch_size': [32],
    'batch_normalization': [False],
    'dropout': [0],
    'optimizer': ['adam'],
    'early_stopping': [3],
    'nb_columns': [-1]
}

class UniversalHPOptimizer():
    """ A class to optimize a given model and a given dictionary of parameters
    to test. It uses the Tensorboard API in order to log the results.
    
    """

    def __init__(self, dict_params, create_model, log_dir="logs/fit/", print_summary=False):
        """
        Args:
            dict_params ([dict]): [Dictionary which values are lists of possible
             values for each parameter to test in the GridSearch.]
            create_model ([funct]): [Function to create the model.]
            print_summary (bool, optional): [Whether to print a summary of the model.
            ]. Defaults to False.
        """
        self.user_input = dict_params
        self.create_model = create_model
        self.print_summary=print_summary
        self.best_model = None
        self.best_accuracy = None
        self.METRIC_ACCURACY = 'accuracy'
        self.METRIC_F1 = 'f1-score'
        self.log_dir = log_dir
        self.params = self.generate_hp_dict(dict_params)


    def generate_hp_dict(self, dict_params):
        
        ret = {}
        for k, v in DEFAULT_PARAMS.items():
            if k not in dict_params.keys():
                ret[k] = v
            else:
                ret[k] = dict_params[k]
        
        for k, v in dict_params.items():
            if k not in ret:
                ret[k] = v

        return ret


    def run_all(self, x_train, x_test, y_train=None, y_test=None):
        """[summary]

        Args:
          x_train ([ndarray]): [Array containing the training images.]
          y_train ([ndarray]): [Array containing the labels of the training 
          images.]
          x_test ([ndarray]): [Array containing the training images.]
          y_test ([ndarray]): [Array containing the labels of the training 
          images.]
        """

        session_num = 1

        keys, values = zip(*self.params.items())
        combinations = [dict(zip(keys, v)) for v in it.product(*values)]

        for hparams in combinations:
            run_name = "run-%d" % session_num
            print('\n--- Starting trial: %s' % run_name)
            print({k: v for k, v in hparams.items()})
            self.run(hparams, x_train, y_train, x_test, y_test, 'logs/hparam_tuning/' + run_name)
            session_num += 1


    def train_test_model(self, hparams, x_train, y_train, x_test, y_test):
        """[summary]

        Args:
            hparams ([dict]): [A dictionary of model parameters. The available
            parameters are :
            - batch_normalization (boolean): Adds two Batch normalization layers
            before the dropout layers. 
            - optimizer (str) : Type of optimizer to use.
            - dropout (float) : The dropout rate, between 0 and 1.
            - batch_size (int): size of the batch.
            - epoch (int): number of epochs.
            ]
            x_train ([ndarray]): [Array containing the training images.]
            y_train ([ndarray]): [Array containing the labels of the training 
            images.]
            x_test ([ndarray]): [Array containing the training images.]
            y_test ([ndarray]): [Array containing the labels of the training 
            images.]

        Returns:
            [float]: [Returns the scalar test loss of the test.]
        """
        model = self.create_model(hparams, self.print_summary)
        log_dir = os.path.join(self.log_dir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

        if hparams['nb_columns'] != -1:
            x_train = x_train.iloc[:, 0:hparams['nb_columns']]
            x_test = x_test.iloc[:, 0:hparams['nb_columns']]

        params_callback = {}
        for k, v in hparams.items():
            if k in self.user_input:
                params_callback[k] = v

        # tensorboard_callback = [tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1),
        #                         hp.KerasCallback(log_dir, params_callback)]
        #                         tf.keras.callbacks.EarlyStopping(monitor='f1-score', patience=hparams["early_stopping"], verbose=1)]

        tensorboard_callback = [tf.keras.callbacks.EarlyStopping(monitor='f1-score', patience=hparams["early_stopping"], verbose=1)]

        if y_train is None or y_test is None:
            # Assume batch dataset
            model.fit(x_train, batch_size=hparams["batch_size"], epochs=hparams["epochs"], validation_data=x_test) 
            _, accuracy = model.evaluate(x_test)
        else:
            print(f"x_train shape = {x_train.shape}, y_test shape = {y_train.shape}")
            model.fit(x_train, y_train, callbacks=tensorboard_callback, batch_size=hparams["batch_size"], epochs=hparams["epochs"], validation_data=(x_test, y_test))
            print(f"x_test shape = {x_test.shape}, y_test shape = {y_test.shape}")
            _, accuracy = model.evaluate(x_test, y_test)
        
        if isinstance(accuracy, np.ndarray):
            accuracy = sum(accuracy) / len(accuracy)

        print(f"Accuracy found = {accuracy}")
        print(f"Best Accuracy found = {self.best_accuracy}")
        if self.best_accuracy is None or accuracy > self.best_accuracy:
            self.best_accuracy = accuracy
            self.best_model = model
            self.best_params = hparams

        return accuracy
        # return 0

    def run(self, hparams, x_train, y_train, x_test, y_test, run_dir="."):
        """[summary]

        Args:
            hparams ([dict]): [A dictionary of model parameters. The available
            parameters are :
            - batch_normalization (boolean): Adds two Batch normalization layers
            before the dropout layers. 
            - optimizer (str) : Type of optimizer to use.
            - dropout (float) : The dropout rate, between 0 and 1.
            - batch_size (int): size of the batch.
            - epoch (int): number of epochs.
            ]
            x_train ([ndarray]): [Array containing the training images.]
            y_train ([ndarray]): [Array containing the labels of the training 
            images.]
            x_test ([ndarray]): [Array containing the training images.]
            y_test ([ndarray]): [Array containing the labels of the training 
            images.]
            run_dir (str, optional): [Path to the directory from which to run 
            the model.]. Defaults to ".".
        """

        with tf.summary.create_file_writer(run_dir).as_default():
            hp.hparams(hparams)  # record the values used in this trial
            accuracy = self.train_test_model(hparams, x_train, y_train, x_test, y_test)
            # tf.summary.scalar(self.METRIC_ACCURACY, accuracy, step=1)

 
    def predict(self, x_test):
        """ Predicts the results for x_test with the model.

        Args:
            x_test ([ndarray]): [Array containing the training images.]

        Returns:
            [tuple]: [Returns an array of weights for predictions and an 
            array of predicted labels.]
        """
        predicted_probas = self.best_model.predict(x_test, verbose=1, max_queue_size=10)
        predicted_classes = np.argmax(predicted_probas, axis=-1)
        return predicted_probas, predicted_classes


    def get_confusion_matrix(self, y_true, x_test=None, y_pred=None, labels=None):
        """ Builds a confusion matrix for the model.

        Args:
            y_true ([ndarray]): [Array containing the true labels.]
            x_test ([ndarray]): [Array containing the training images.]. Defaults to None.
            y_pred ([ndarray]): [Array containing the weigths for the prediction.]. Defaults to None.
            labels ([list], optional): [List containing all of the unique labels.]. Defaults to None. 

        Returns:
            [DataFrame]: [Returns a pandas DataFrame containing the confusion matrix, 
            with the rows being the true labels and the columns the predicted labels.]
        """
        if labels is not None:
            real_labels = ["real " + label for label in labels]
            pred_labels = ["pred " + label for label in labels]
        else:
            real_labels, pred_labels = None, None

        if y_pred is None:
            _, y_pred = self.predict(x_test)
        conf_matrix = tf.math.confusion_matrix(y_true, y_pred).numpy()
        if real_labels is not None and pred_labels is not None:
            conf_matrix = pd.DataFrame(conf_matrix, index=real_labels, columns=pred_labels)
        return conf_matrix


    def plot_evaluation_info(self, conf_matrix):
        """ Shows 3 confusion matrices for the model.

        Args:
            conf_matrix ([ndarray or pd.DataFrame]): [confusion matrix with absolute values]

        Returns: [None]
        """

        fig, ax = plt.subplots(ncols=3, nrows=1, figsize=(20, 5))
        conf_matrix_norm_recall = conf_matrix / (np.sum(conf_matrix, axis=1).to_numpy().reshape(len(conf_matrix), 1))
        conf_matrix_norm_precision = conf_matrix / np.sum(conf_matrix, axis=0)
        
        sns.heatmap(conf_matrix, annot=True, cmap='Blues', ax=ax[0]).set_title("Absolute value confusion matrix")
        sns.heatmap(conf_matrix_norm_recall, annot=True, fmt='.2%', cmap='Blues', ax=ax[1]).set_title("Recall confusion matrix")
        sns.heatmap(conf_matrix_norm_precision, annot=True, fmt='.2%', cmap='Blues', ax=ax[2]).set_title("Precision confusion matrix")
        plt.show()

In [None]:
def load_data_grid(Xy, X_test, column):

    Xy_train_column = Xy.dropna(subset=[column], inplace=False)
    X_test_column = X_test.dropna(subset=[column], inplace=False)
    X_test_column = X_test_column[column]
    X_train_column = Xy_train_column[column]
    y_train_column = Xy_train_column['class']
    
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
    for train_index, test_index in sss.split(X_train_column, y_train_column):
        X_train_column, X_val = X_train_column.iloc[train_index], X_train_column.iloc[test_index]
        y_train_column, y_val = y_train_column.iloc[train_index], y_train_column.iloc[test_index]

    y_train_cat, y_val_cat = tf.keras.utils.to_categorical(y_train_column), tf.keras.utils.to_categorical(y_val)

    return X_train_column, y_train_cat, X_val, y_val_cat, X_test_column


def load_data(Xy_train, X_test, column):

    Xy_train_column = Xy_train.dropna(subset=[column], inplace=False)
    X_test_column = X_test.dropna(subset=[column], inplace=False)
    X_test_column = X_test_column[column]
    X_train_column = Xy_train_column[column]
    y_train_column = Xy_train_column['class']
    y_train_column_cat = tf.keras.utils.to_categorical(y_train_column)

    return X_train_column, y_train_column_cat, X_test_column

In [None]:
def train_model(X_train, y_train_cat, tfhub_handle_encoder, tfhub_handle_preprocess, model_name, input_length=44, epochs=15):

    def build_classifier_model():
        text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
        preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
        encoder_inputs = preprocessing_layer(text_input)
        encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
        outputs = encoder(encoder_inputs)
        net = outputs['pooled_output']
        net = tf.keras.layers.Dropout(0.1)(net)
        net = tf.keras.layers.Dense(4, activation=tf.keras.activations.softmax, name='classifier')(net)
        return tf.keras.Model(text_input, net)

    bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)
    bert_model = hub.KerasLayer(tfhub_handle_encoder)

    classifier_model = build_classifier_model()
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    metrics = tfa.metrics.F1Score(num_classes=4)

    steps_per_epoch = len(X_train)
    # num_train_steps = steps_per_epoch * epochs
    num_train_steps = int(len(X_train) / 32)
    num_warmup_steps = int(0.1 * num_train_steps)

    init_lr = 3e-5
    optimizer = optimization.create_optimizer(init_lr=init_lr,
                                            num_train_steps=num_train_steps,
                                            num_warmup_steps=num_warmup_steps,
                                            optimizer_type='adamw')
    
    classifier_model.compile(optimizer=optimizer,
                         loss=loss,
                         metrics=metrics)

    print(f'Training model with {tfhub_handle_encoder}')
    history = classifier_model.fit(X_train, y_train_cat, 32, epochs=epochs, validation_split=0.2)

    return classifier_model

In [None]:
def predict_and_save(classifier_model, X_test, filename_submission, filename_unpack, filename_model, labels):

    y_pred_cat = classifier_model.predict(
        X_test, batch_size=None, verbose=1, steps=None, callbacks=None, max_queue_size=10,
        workers=1, use_multiprocessing=False)
    
    y_pred = np.argmax(y_pred_cat, axis=1)

    y_pred_submission = pd.DataFrame(y_pred, index=X_test.index.values, columns=['class'])
    y_pred_submission.index.rename('id', inplace=True)
    y_pred_submission.to_csv(os.path.join('..', 'data', filename_submission))

    y_pred_cat_df = pd.DataFrame(y_pred_cat, index=X_test.index.values, columns=labels)
    y_pred_cat_df.index.rename('id', inplace=True)
    y_pred_cat_df.to_csv(os.path.join('..', 'data', filename_unpack))

    classifier_model.save(os.path.join('..', 'saved_models', filename_model))

# Execution

## Title

In [None]:
# X_train_title, y_train_title_cat, X_val_title, y_val_title_cat, X_test_title = load_data(Xy, X_test, 'title')
# classifier_model_title = train_model(X_train_title, y_train_title_cat, tfhub_handle_encoder, tfhub_handle_preprocess, input_length=44)
# predict_and_save(classifier_model_title, X_test_title, 'title_submission3.csv', 'title_softmax3.csv', 'title_big_bert', labels=['title_0', 'title_1', 'title_2', 'title_3'])

## Description

In [None]:
# Loading data
X_train_desc, y_train_desc_cat, X_test_desc = load_data(Xy, X_test, 'description')
# Preprocessing
# test_preprocess_model = make_bert_preprocess_model(['description'], seq_length=172)
# text_preprocessed = test_preprocess_model(X_train_desc)
# Train model
classifier_model_desc = train_model(X_train_desc, y_train_desc_cat, tfhub_handle_encoder, tfhub_handle_preprocess, epochs=10)
predict_and_save(classifier_model_desc, X_test_desc, 'desc_submission_numtrainsteps.csv', 'desc_softmax_numtrainsteps.csv', 'desc_big_bert_numtrainsteps', labels=['desc_0', 'desc_1', 'desc_2', 'desc_3'])

Training model with https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




In [None]:
# classifier_model_desc.summary()

## GridSearch - HP Params

In [None]:
def generate_model(hparams, print_summary=False):

    def build_classifier_model():
        text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
        preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
        encoder_inputs = preprocessing_layer(text_input)
        encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
        outputs = encoder(encoder_inputs)
        net = outputs['pooled_output']
        net = tf.keras.layers.Dropout(hparams['dropout'])(net)
        net = tf.keras.layers.Dense(4, activation=tf.keras.activations.softmax, name='classifier')(net)
        return tf.keras.Model(text_input, net)

    bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)
    bert_model = hub.KerasLayer(tfhub_handle_encoder)

    classifier_model = build_classifier_model()
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    metrics = tfa.metrics.F1Score(num_classes=4)

    # steps_per_epoch = len(X_train)
    num_train_steps = hparams['steps_per_epoch'] * hparams['epochs']
    num_warmup_steps = int(0.1 * num_train_steps)

    init_lr = 3e-5
    optimizer = optimization.create_optimizer(init_lr=init_lr,
                                            num_train_steps=num_train_steps,
                                            num_warmup_steps=num_warmup_steps,
                                            optimizer_type='adamw')
    
    classifier_model.compile(optimizer=optimizer,
                         loss=loss,
                         metrics=metrics)

    return classifier_model

logs = os.path.join("..", "logs")
X_train_title, y_train_title, X_val_title, y_val_title, X_test_title = load_data(Xy, X_test, 'title')
X_train_desc, y_train_desc, X_val_desc, y_val_desc, X_test_desc = load_data(Xy, X_test, 'description')

params = {
    "epochs": [10],
    'early_stopping': [2],
    'dropout': [0.1],
    'steps_per_epoch': [len(X_train_title), int(len(X_train_title) / 32)],
    'batch_size': [16],
}

print(f"Shape X_train = {X_train_title.shape} \n X_val = {X_val_title.shape} \n y_train = {y_train_title.shape} \n y_val= {y_val_title.shape}")
hp_han = UniversalHPOptimizer(params, generate_model, log_dir=logs, print_summary=True)
hp_han.run_all(x_train=X_train_desc, x_test=X_val_desc, y_train=y_train_desc, y_test=y_val_desc)

Shape X_train = (5393,) 
 X_val = (1349,) 
 y_train = (5393, 4) 
 y_val= (1349, 4)

--- Starting trial: run-1
{'epochs': 10, 'batch_size': 16, 'batch_normalization': False, 'dropout': 0.1, 'optimizer': 'adam', 'early_stopping': 2, 'nb_columns': -1, 'steps_per_epoch': 5393}
x_train shape = (4289,), y_test shape = (4289, 4)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
x_test shape = (1073,), y_test shape = (1073, 4)
Accuracy found = 0.9258756190538406
Best Accuracy found = None

--- Starting trial: run-2
{'epochs': 10, 'batch_size': 16, 'batch_normalization': False, 'dropout': 0.1, 'optimizer': 'adam', 'early_stopping': 2, 'nb_columns': -1, 'steps_per_epoch': 168}
x_train shape = (4289,), y_test shape = (4289, 4)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
x_test shape = (1073,), y_test shape = (1073, 4)
Accuracy found = 0.927030012011528
Best Accuracy foun