In [None]:
from tqdm.autonotebook import tqdm
import os
import numpy as np
import pacmap
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns

import tensorflow as tf
from sklearn.utils import shuffle
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
import sentencepiece as spm
from IPython.display import clear_output
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import f1_score
import pickle

from tensorflow.keras.utils import plot_model
import subprocess
import sys
import time
from termcolor import colored, cprint
from sklearn import metrics
from sklearn.cluster import AgglomerativeClustering
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
from tqdm import tqdm
import pacmap

In [None]:
TEST_NAME="200k_dict"
BATCH_SIZE=32
EPOCHS=500

path = "./out" + TEST_NAME

In [None]:
def saveData(typ, obj):
    with open('./data/' +  TEST_NAME + "_" + typ + '.pkl', 'wb') as f:
        pickle.dump(obj, f)
        
def loadData(typ):
    ret = None
    with open('./data/' +  TEST_NAME + "_" + typ + '.pkl', 'rb') as f:
        ret = pickle.load(f)
    assert ret is not None
    return ret

In [None]:
d = loadData("training")  # numeric, weights, data, labels
inSize = len(d["numeric"][0])
outSize = len(d["weights"][0])
print(f"Samples: {len(d['numeric'])}, Input Size: {inSize}, Output Size: {outSize}")

In [None]:
def plotPac(corpus, maxNum=-1, colors = ['blue', 'orange'], names=['Nominal','Crash'], sz=1.5, alpha=0.5):
    if maxNum < 0:
        maxNum = len(corpus)
    embedding = pacmap.PaCMAP(n_components=2, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0) 
    xt = embedding.fit_transform(corpus[:maxNum], init="pca")
    cmap = mpl.colors.ListedColormap(colors)
    sns.set_style('darkgrid')

    fig, ax = plt.subplots(1, 1, figsize=(6, 6))
    ax.scatter(xt[:, 0], xt[:, 1], s=sz, alpha=alpha)




history = modelDense.fit(d['numeric'], d['weights'],
      batch_size=BATCH_SIZE,
      epochs=EPOCHS,
      validation_split=0.15,
      verbose=0,
      callbacks=[modelSaveDense])
saveData("denseHistory", history.history)
plotHistory(history.history)

In [None]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super(TransformerBlock, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.rate = rate
        
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        print(inputs)
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        #ffn_output = self.ffn(inputs + attn_output)
        
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
        #return (ffn_output)
        
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'embed_dim': self.embed_dim,
            'num_heads': self.num_heads,
            'ff_dim': self.ff_dim,
            'rate': self.rate,
            'att': self.att,
            'ffn': self.ffn
        })
        return config
    
    @classmethod
    def from_config(cls, config):
        return cls(**config)

In [None]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim, **kwargs):
        super(TokenAndPositionEmbedding, self).__init__()
        self.maxlen = maxlen
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'maxlen': self.maxlen,
            'vocab_size': self.vocab_size,
            'embed_dim': self.embed_dim,
            'token_emb': self.token_emb,
            'pos_emb': self.pos_emb
        })
        return config
    
    @classmethod
    def from_config(cls, config):
        print(config)
        return cls(**config)

In [None]:
def vizLayer(m, x_test):
  viz = m.predict(x_test)
  vizp = np.reshape(viz, (viz.shape[0],viz.shape[1]*viz.shape[2]))
  vp, lp = shuffle(vizp, list(range(len(vizp))))
  plotPac(vp)


In [None]:
EMBED = 32
NUM_HEADS = 8
FF_DIM = 32

LARGE_MODEL = False

if LARGE_MODEL:
    EMBED = 128
    NUM_HEADS = 32
    FF_DIM = 128
    

In [None]:
embed_dim = EMBED #32 # 128# Embedding size for each token
num_heads =  NUM_HEADS #8 #12  # Number of attention heads
ff_dim = FF_DIM #32 #128  # Hidden layer size in feed forward network inside transformer

inputs = layers.Input(shape=(inSize,))
print(inputs)
embedding_layer = TokenAndPositionEmbedding(inSize, inSize, embed_dim)
x = embedding_layer(inputs)
print(x)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.2)(x)
if LARGE_MODEL:
    x = layers.Dense(FF_DIM, activation="relu")(x)
    x = layers.Dropout(0.2)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(outSize)(x)
outputs = tf.keras.activations.sigmoid(outputs)

model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
#optimizer = keras.optimizers.Adam(lr=0.0001)


model.compile(optimizer="adam", loss="mse", metrics=["mae"])
#model.compile(optimizer=optimizer, loss="mse", metrics=["mae"])

MODEL_FILE_TRANSFORMER="./data/" + TEST_NAME + "_transformer.hdf5"
modelSaveTransformer = ModelCheckpoint(MODEL_FILE_TRANSFORMER, save_best_only=True, monitor='val_loss', mode='min')

In [None]:
earlyStop = EarlyStopping(monitor='val_loss', patience=10)

In [None]:
trainX, trainY = shuffle(d["numeric"], d["weights"])

In [None]:
print(trainX.shape)
print(trainY.shape)

In [None]:
history = model.fit(trainX, trainY,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          verbose=1,
          shuffle=True,
          callbacks=[modelSaveTransformer, earlyStop],
          validation_split=0.15)

In [None]:
def plotHistory(history_dict):
    loss_values = history_dict["loss"]
    val_loss_values = history_dict["val_loss"]
    epochs = range(1, len(loss_values) + 1)
    plt.figure(figsize=(10, 4), dpi=100)
    sns.set_style('darkgrid')
    plt.suptitle("Transformer Model Training")

    plt.subplot(1, 2, 1)


    plt.plot(epochs, loss_values, "b", label="Training Loss")
    plt.plot(epochs, val_loss_values, "orange", label="Validation Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    #plt.savefig("loss.png")
    #plt.show()

    plt.subplot(1, 2, 2)
    acc_values = history_dict["mae"]
    val_acc_values = history_dict["val_mae"]
    epochs = range(1, len(loss_values) + 1)
    #plt.figure(figsize=(5, 3), dpi=100)

    plt.plot(epochs, acc_values, "b", label="Training Error")
    plt.plot(epochs, val_acc_values, "orange", label="Mean Absolute Error")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.savefig("lossAccuracy.png")
    plt.show()


In [None]:
#saveData("transformerHistory", history.history)
plotHistory(history.history)


In [None]:
embedOnly_model = keras.Model(model.inputs,[model.layers[1].output])
transformerOnly_model = keras.Model(model.inputs, model.layers[2].output)

#transformerOnly_model = keras.Model(model.inputs, model.get_layer("transformer_block_1").output)
transformerOnly_model.summary()

In [None]:
vizLayer(embedOnly_model, d["numeric"])

In [None]:
vizLayer(transformerOnly_model, d["numeric"])

In [None]:
test = keras.models.load_model(MODEL_FILE_TRANSFORMER
     , custom_objects={"TokenAndPositionEmbedding": TokenAndPositionEmbedding
     , "TransformerBlock": TransformerBlock})

In [None]:
test.evaluate(d["numeric"], d["weights"])