In [1]:
import os
from typing import List

import numpy as np
import pandas as pd
import seaborn as sns
from comet_ml import Experiment
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
import tensorflow_hub as hub

In [2]:
df = pd.read_csv('tweets_50.csv')
X = df['text_tokenized']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=0)  # 0.8 * 0.25 = 0.2

class_names = y.unique()

In [3]:
batch_size = 128
embed_model="https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
embedding_dim = 20
activation = 'relu'
kernel_initializer = 'he_normal'
l2_lambda = 1e-3
dropout=0.5
filters = 64
kernel_sizes = [1, 2, 3]
n_convs_parallel = len(kernel_sizes)
padding='same'
pool_size = 2
strides = 1
epochs = 200
optimizer = 'nadam'

for embed_model in tfhub_embedding_models:
    def make_model():
        regularizers = keras.regularizers.l2(l2=l2_lambda)
        model = keras.Sequential([
            hub.KerasLayer(embed_model, dtype=tf.string, input_shape=[], output_shape=[embedding_dim]),
            keras.layers.Dense(128, activation=activation, kernel_initializer=kernel_initializer, kernel_regularizer=regularizers),
            keras.layers.Dense(len(class_names), activation="softmax")
        ])
        model.summary()

        model.compile(
            loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["acc"]
        )
        return model

    project_name = 'nlp_kerashub'
    experiment = Experiment(
        project_name=project_name,
        auto_param_logging=True,
        # auto_histogram_weight_logging=True,
        auto_histogram_gradient_logging=True,
        auto_histogram_activation_logging=True,
        api_key="HeH9EtfDC2KUlCOjeQaU1CuOM",
        workspace="henrystoll",
    )
    params = {
        'batch_size': batch_size,
        'embed_model': embed_model,
        'embedding_dim': embedding_dim,
        'filters': filters,
        'kernel_sizes': kernel_sizes,
        'pool_size': pool_size,
        'padding': padding,
        'strides': strides,
        'n_convs_parallel': n_convs_parallel,
        'activation': activation,
        'kernel_initializer': kernel_initializer,
        'l2_lambda': l2_lambda,
        'dropout': dropout,
        'optimizer': optimizer,
        'epochs': epochs,
    }

    experiment.log_parameters(params)

    model = make_model()
    # keras.utils.plot_model(model, "model.png", show_shapes=True)
    # experiment.log_asset("model.png")

    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_acc',
        patience=10,
        verbose=1,
        restore_best_weights=True)

    with experiment.train():
        history = model.fit(X_train, y_train,
                            batch_size=batch_size,
                            epochs=epochs,
                            validation_data=(X_val, y_val),
                            verbose=1,
                            callbacks=[early_stopping])

    with experiment.test():
        loss, accuracy = model.evaluate(X_test, y_test)
        print('acc : {:.3f}'.format(accuracy))
        metrics = {
            'loss': loss,
            'accuracy': accuracy
        }
        experiment.log_metrics(metrics)

    y_predicted = model.predict(X_test)
    y_predicted = y_predicted.argmax(axis=1)
    experiment.log_confusion_matrix(y_test.to_numpy(), y_predicted)

    experiment.end()

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/henrystoll/nlp-kerashub/ba9da4a43afd4daa831f18aaddf31ae9

COMET INFO: Ignoring automatic log_parameter('verbose') because 'keras:verbose' is in COMET_LOGGING_PARAMETERS_IGNORE


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 20)                400020    
_________________________________________________________________
dense (Dense)                (None, 128)               2688      
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 516       
Total params: 403,224
Trainable params: 3,204
Non-trainable params: 400,020
_________________________________________________________________
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.ml/henrystoll/nlp-kerashub/ba9da4a43afd4daa831f18aaddf31ae9
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     test_accuracy             : 0.7255308032035828
COMET INFO:     test_loss                 : 0.8751965761184692
COMET INFO:     train_acc [37]            : (0.39768606424331665, 0.7449490427970886)
COMET INFO:     train_batch_acc [185]     : (0.3794642984867096, 0.7535511255264282)
COMET INFO:     train_batch_loss [185]    : (0.7758073806762695, 4.043598175048828)
COMET INFO:     train_epoch_duration [37] : (2.039828163004131, 4.569479643992963)
COMET INFO:     train_loss [37]           : (0.8176402449607849, 2.1347265243530273)
COMET INFO:     train_val_acc [37]        : (0.39513206481933594, 0.6980838775634766)
COMET INFO:     train_v