#Posoition encoder

In [22]:
import tensorflow as tf
import numpy as np

def positional_encoding(length, depth):
  depth = depth/2
  positions = np.arange(length)[:, np.newaxis]
  depths = np.arange(depth)[np.newaxis, :]/depth
  angle_rates = 1 / (10000**depths)
  angle_rads = positions * angle_rates
  pos_encoding = np.concatenate([np.sin(angle_rads), np.cos(angle_rads)], axis=-1)
  return tf.cast(pos_encoding, dtype=tf.float32)


class PositionalEncoder(tf.keras.layers.Layer):
  def __init__(self, vocab_size, d_model):
    super().__init__()
    self.d_model = d_model

    self.pos_encoding = positional_encoding(length=2048, depth=d_model)


  def call(self, x):
    length = tf.shape(x)[1]
    x = self.pos_encoding[tf.newaxis, :length, :]
    return x

In [24]:
# import tensorflow as tf
# from pe import PositionalEncoder



class Speech_Sampling(tf.keras.layers.Layer):
  def __init__(self, d_model, vocab_size, name="Speech_Sampling", **kwargs):
    super(Speech_Sampling, self).__init__(name=name, **kwargs)
    self.d_model = d_model
    self.vocab_size = vocab_size


    self.conv1 = tf.keras.layers.Conv1D(filters=4096, kernel_size=3, strides=1, padding='same')
    self.conv2 = tf.keras.layers.Conv1D(filters=1024, kernel_size=3, strides=2, padding='same')
    self.permute = tf.keras.layers.Permute((2, 1))

    self.lstm = tf.keras.layers.LSTM(units=d_model, activation='tanh', recurrent_activation='sigmoid', use_bias=True, unit_forget_bias=True, dropout=dropout_rate, return_sequences=True, stateful=False)

    self.pe = PositionalEncoder(vocab_size=vocab_size, d_model=d_model)

  def call(self, x):
    x = tf.nn.gelu(self.conv1(x))
    x = tf.nn.gelu(self.conv2(x))
    x = self.permute(x)
    x = self.lstm(x) + self.pe(x)
    return x

class Text_Sampling(tf.keras.layers.Layer):
  def __init__(self, d_model, vocab_size, name="Text_Sampling", **kwargs):
    super(Text_Sampling, self).__init__(name=name, **kwargs)
    self.d_model = d_model
    self.vocab_size = vocab_size

    self.embedding = tf.keras.layers.Embedding(vocab_size, d_model, mask_zero=True)
    self.pe = PositionalEncoder(vocab_size=vocab_size, d_model=d_model)


  def call(self, x):
    x =  self.embedding(x)
    x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
    x = x + self.pe(x)
    return x


#Attentation Layer

In [5]:
import tensorflow as tf

class BaseAttention(tf.keras.layers.Layer):
  def __init__(self, **kwargs):
    super().__init__()
    self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
    self.layernorm = tf.keras.layers.LayerNormalization()
    self.add = tf.keras.layers.Add()


class CrossAttention(BaseAttention):
  def call(self, x, context):
    attn_output, attn_scores = self.mha(query=x, key=context, value=context, return_attention_scores=True)

    # Cache the attention scores for plotting later.
    self.last_attn_scores = attn_scores

    x = self.add([x, attn_output])
    x = self.layernorm(x)

    return x

class GlobalSelfAttention(BaseAttention):
  def call(self, x):
    attn_output = self.mha(query=x, value=x, key=x)
    x = self.add([x, attn_output])
    x = self.layernorm(x)
    return x

class CausalSelfAttention(BaseAttention):
  def call(self, x):
    attn_output = self.mha(query=x, value=x, key=x)
    x = self.add([x, attn_output])
    x = self.layernorm(x)
    return x

class Attentive_Fusion(tf.keras.layers.Layer):
  def __init__(self, num_dim, name="Attentive Fusion", **kwargs):
    super(Attentive_Fusion, self).__init__()
    self.num_dim = num_dim
    self.wq = tf.keras.layers.Dense(num_dim)
    self.wk = tf.keras.layers.Dense(num_dim)

  def call(self, x1, x2):
    q = self.wq(x1)
    k = self.wk(x2)
    qk = tf.linalg.matmul(q, k, transpose_a=False, transpose_b=True)
    weights = tf.math.exp(tf.math.tanh(qk))

    weights /= tf.cast(tf.math.reduce_sum(weights, axis=1, keepdims=True) + tf.keras.backend.epsilon(), dtype=tf.float32)
    weights = weights * qk
    weights = tf.math.reduce_sum(weights, axis=1)
    return weights


In [6]:
import tensorflow as tf

class Sequential_Sampling(tf.keras.layers.Layer):
  def __init__(self, d_model, dff, dropout_rate, name="Sequential_Sampling", **kwargs):
    super(Sequential_Sampling, self).__init__(name=name, **kwargs)
    self.d_model = d_model
    self.dff = dff
    self.dropout_rate = dropout_rate

    self.dense1 = tf.keras.layers.Dense(dff, activation='relu')
    self.dense2 = tf.keras.layers.Dense(d_model)
    self.dropout = tf.keras.layers.Dropout(dropout_rate)


  def call(self, x):
    x = self.dense1(x)
    x = self.dense2(x)
    x = self.dropout(x)
    return x



class FeedForward(tf.keras.layers.Layer):
  def __init__(self, d_model, dff, dropout_rate=0.1):
    super().__init__()
    self.d_model = d_model
    self.dff = dff
    self.dropout_rate = dropout_rate

    self.seq = Sequential_Sampling(d_model, dff, dropout_rate)
    self.add = tf.keras.layers.Add()
    self.layer_norm = tf.keras.layers.LayerNormalization()

  def call(self, x):

    x = self.add([x, self.seq(x)])
    x = self.layer_norm(x)
    return x



#learning rate 

In [7]:
import tensorflow as tf


class LearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
  def __init__(self, d_model, warmup_steps):
    super().__init__()

    self.d_model = d_model
    self.d_model = tf.cast(self.d_model, tf.float32)

    self.warmup_steps = warmup_steps

  def __call__(self, step):
    step = tf.cast(step, dtype=tf.float32)
    arg1 = tf.math.rsqrt(step)
    arg2 = step * (self.warmup_steps ** -1.5)

    return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)



#Encoder layer

In [8]:
import tensorflow as tf

# from attention import GlobalSelfAttention
# from pe import PositionalEncoder
# from ff import FeedForward

class EncoderLayer(tf.keras.layers.Layer):
  def __init__(self,*, d_model, num_heads, dff, dropout_rate=0.1):
    super().__init__()



    self.self_attention = GlobalSelfAttention(num_heads=num_heads, key_dim=d_model, dropout=dropout_rate)
    self.ffn = FeedForward(d_model, dff)

  def call(self, x):
    x = self.self_attention(x)
    x = self.ffn(x)
    return x


class Encoder(tf.keras.layers.Layer):
  def __init__(self, *, num_layers, d_model, num_heads, dff, dropout_rate=0.1):
    super().__init__()

    self.d_model = d_model
    self.num_layers = num_layers


    self.enc_layers = [
        EncoderLayer(d_model=d_model,
                     num_heads=num_heads,
                     dff=dff,
                     dropout_rate=dropout_rate)
        for _ in range(num_layers)]
    self.dropout = tf.keras.layers.Dropout(dropout_rate)

  def call(self, x):

    x = self.dropout(x)

    for i in range(self.num_layers):
      x = self.enc_layers[i](x)

    return x




#decoderlayer

In [9]:
# import tensorflow as tf

# from attention import  CrossAttention, CausalSelfAttention, GlobalSelfAttention
# from ff import FeedForward
# from pe import PositionalEncoder

class DecoderLayer(tf.keras.layers.Layer):
  def __init__(self, *, d_model, num_heads, dff, dropout_rate=0.1):
    super(DecoderLayer, self).__init__()

    self.causal_self_attention = CausalSelfAttention(num_heads=num_heads, key_dim=d_model, dropout=dropout_rate)
    #self.self_attention = GlobalSelfAttention(num_heads=num_heads, key_dim=d_model, dropout=dropout_rate)

    self.cross_attention = CrossAttention(num_heads=num_heads, key_dim=d_model, dropout=dropout_rate)

    self.ffn = FeedForward(d_model, dff)

  def call(self, x, context):
    x = self.causal_self_attention(x=x)
    #x = self.self_attention(x)
    #x = self.ffn(x)
    x = self.cross_attention(x=x, context=context)

    self.last_attn_scores = self.cross_attention.last_attn_scores

    x = self.ffn(x)
    return x


class Decoder(tf.keras.layers.Layer):
  def __init__(self, *, num_layers, d_model, num_heads, dff, dropout_rate=0.1):
    super(Decoder, self).__init__()

    self.d_model = d_model
    self.num_layers = num_layers

    self.dropout = tf.keras.layers.Dropout(dropout_rate)
    self.dec_layers = [
        DecoderLayer(d_model=d_model, num_heads=num_heads,
                     dff=dff, dropout_rate=dropout_rate)
        for _ in range(num_layers)]

    self.last_attn_scores = None

  def call(self, x, context):
    x = self.dropout(x)

    for i in range(self.num_layers):
      x  = self.dec_layers[i](x, context)

    self.last_attn_scores = self.dec_layers[-1].last_attn_scores
    return x



In [10]:
class BaseClassifier_1(tf.keras.models.Model):
  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, num_classes, dropout_rate, **kwargs):
    super(BaseClassifier_1, self).__init__()

    self.num_layers = num_layers
    self.d_model = d_model
    self.num_heads = num_heads
    self.dff = dff

    self.input_vocab_size = input_vocab_size
    self.target_vocab_size = target_vocab_size

    self.num_classes = num_classes
    self.dropout_rate = dropout_rate

    self.speech_pre = Speech_Sampling(d_model=d_model, vocab_size=input_vocab_size)
    self.text_pre = Text_Sampling(d_model=d_model, vocab_size=target_vocab_size)

    self.encoder = Encoder(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, dropout_rate=dropout_rate)
    self.decoder = Decoder(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, dropout_rate=dropout_rate)

  def get_config(self):
    config = super().get_config().copy()
    return config

  def call(self, inputs):
    inp1, inp2  =  inputs

    inp1 = self.speech_pre(inp1)
    inp2 = self.text_pre(inp2)

    inp1 = self.encoder(inp1)
    out = self.decoder(inp2, inp1)

    return out

class BaseClassifier_2(tf.keras.models.Model):
  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, num_classes, dropout_rate, **kwargs):
    super(BaseClassifier_2, self).__init__()

    self.num_layers = num_layers
    self.d_model = d_model
    self.num_heads = num_heads
    self.dff = dff

    self.input_vocab_size = input_vocab_size
    self.target_vocab_size = target_vocab_size

    self.num_classes = num_classes
    self.dropout_rate = dropout_rate

    self.speech_pre = Speech_Sampling(d_model=d_model, vocab_size=input_vocab_size)
    self.text_pre = Text_Sampling(d_model=d_model, vocab_size=target_vocab_size)

    self.encoder = Encoder(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, dropout_rate=dropout_rate)
    self.decoder = Decoder(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, dropout_rate=dropout_rate)

  def get_config(self):
    config = super().get_config().copy()
    return config

  def call(self, inputs):
    inp1, inp2  =  inputs

    inp1 = self.speech_pre(inp1)
    inp2 = self.text_pre(inp2)

    inp2 = self.encoder(inp2)
    out = self.decoder(inp1, inp2)

    return out

class Classifier(tf.keras.models.Model):
  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, num_classes, dropout_rate, **kwargs):
    super(Classifier, self).__init__()

    self.num_layers = num_layers
    self.d_model = d_model
    self.num_heads = num_heads
    self.dff = dff

    self.input_vocab_size = input_vocab_size
    self.target_vocab_size = target_vocab_size

    self.num_classes = num_classes
    self.dropout_rate = dropout_rate

    self.BaseClassifier_1 = BaseClassifier_1(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, input_vocab_size=input_vocab_size, target_vocab_size=target_vocab_size, num_classes=num_classes, dropout_rate=dropout_rate)
    self.BaseClassifier_2 = BaseClassifier_2(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, input_vocab_size=input_vocab_size, target_vocab_size=target_vocab_size, num_classes=num_classes, dropout_rate=dropout_rate)

    self.lstm_1 = tf.keras.layers.LSTM(units=d_model, activation='tanh', recurrent_activation='sigmoid', use_bias=True, unit_forget_bias=True, dropout=dropout_rate, return_sequences=True, stateful=False)
    self.lstm_2 = tf.keras.layers.LSTM(units=d_model, activation='tanh', recurrent_activation='sigmoid', use_bias=True, unit_forget_bias=True, dropout=dropout_rate, return_sequences=True, stateful=False)

    self.h_att = Attentive_Fusion(num_dim=d_model)

    self.final_layer = tf.keras.layers.Dense(num_classes)

  def get_config(self):
    config = super().get_config().copy()
    return config

  def call(self, inputs):

    out1 = self.BaseClassifier_1(inputs)
    out2 = self.BaseClassifier_2(inputs)
    out1 = self.lstm_1(out1)
    out2 = self.lstm_2(out2)
    out = self.h_att(out2, out1)
    out = self.final_layer(out)


    try:
      del out._keras_mask
    except AttributeError:
      pass


    return out

  def compute_loss(self, inputs, labels, training=True):
    if training:
      predictions = self(inputs, training=training)

      labels = tf.squeeze(tf.cast(labels, dtype=tf.int32), axis=-1)
      bincounts = tf.constant([7333, 2453])
      batch_weight = tf.cast(tf.gather(bincounts, labels), dtype=tf.float32)

      cal_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, predictions)
      loss = tf.math.reduce_sum(cal_loss*batch_weight)/(tf.math.reduce_sum(batch_weight) + tf.keras.backend.epsilon())
    else:
      predictions = self(inputs, training=training)
      labels = tf.squeeze(tf.cast(labels, dtype=tf.int32), axis=-1)
      loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, predictions)
    return loss


In [11]:
import tensorflow as tf



class BaseClassifier_1(tf.keras.models.Model):
  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, num_classes, dropout_rate, **kwargs):
    super(BaseClassifier_1, self).__init__()

    self.num_layers = num_layers
    self.d_model = d_model
    self.num_heads = num_heads
    self.dff = dff

    self.input_vocab_size = input_vocab_size
    self.target_vocab_size = target_vocab_size

    self.num_classes = num_classes
    self.dropout_rate = dropout_rate

    self.speech_pre = Speech_Sampling(d_model=d_model, vocab_size=input_vocab_size)
    self.text_pre = Text_Sampling(d_model=d_model, vocab_size=target_vocab_size)

    self.encoder = Encoder(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, dropout_rate=dropout_rate)
    self.decoder = Decoder(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, dropout_rate=dropout_rate)

  def get_config(self):
    config = super().get_config().copy()
    return config

  def call(self, inputs):
    inp1, inp2  =  inputs

    inp1 = self.speech_pre(inp1)
    inp2 = self.text_pre(inp2)

    inp1 = self.encoder(inp1)
    out = self.decoder(inp2, inp1)

    return out

class BaseClassifier_2(tf.keras.models.Model):
  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, num_classes, dropout_rate, **kwargs):
    super(BaseClassifier_2, self).__init__()

    self.num_layers = num_layers
    self.d_model = d_model
    self.num_heads = num_heads
    self.dff = dff

    self.input_vocab_size = input_vocab_size
    self.target_vocab_size = target_vocab_size

    self.num_classes = num_classes
    self.dropout_rate = dropout_rate

    self.speech_pre = Speech_Sampling(d_model=d_model, vocab_size=input_vocab_size)
    self.text_pre = Text_Sampling(d_model=d_model, vocab_size=target_vocab_size)

    self.encoder = Encoder(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, dropout_rate=dropout_rate)
    self.decoder = Decoder(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, dropout_rate=dropout_rate)

  def get_config(self):
    config = super().get_config().copy()
    return config

  def call(self, inputs):
    inp1, inp2  =  inputs

    inp1 = self.speech_pre(inp1)
    inp2 = self.text_pre(inp2)

    inp2 = self.encoder(inp2)
    out = self.decoder(inp1, inp2)

    return out

class Classifier(tf.keras.models.Model):
  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, num_classes, dropout_rate, **kwargs):
    super(Classifier, self).__init__()

    self.num_layers = num_layers
    self.d_model = d_model
    self.num_heads = num_heads
    self.dff = dff

    self.input_vocab_size = input_vocab_size
    self.target_vocab_size = target_vocab_size

    self.num_classes = num_classes
    self.dropout_rate = dropout_rate

    self.BaseClassifier_1 = BaseClassifier_1(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, input_vocab_size=input_vocab_size, target_vocab_size=target_vocab_size, num_classes=num_classes, dropout_rate=dropout_rate)
    self.BaseClassifier_2 = BaseClassifier_2(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, input_vocab_size=input_vocab_size, target_vocab_size=target_vocab_size, num_classes=num_classes, dropout_rate=dropout_rate)

    self.lstm_1 = tf.keras.layers.LSTM(units=d_model, activation='relu', recurrent_activation='sigmoid', use_bias=True, unit_forget_bias=True, dropout=dropout_rate, return_sequences=True, stateful=False)
    self.lstm_2 = tf.keras.layers.LSTM(units=d_model, activation='relu', recurrent_activation='sigmoid', use_bias=True, unit_forget_bias=True, dropout=dropout_rate, return_sequences=True, stateful=False)
    #  there is tanh activation function.
    self.h_att = Attentive_Fusion(num_dim=d_model)

    self.final_layer = tf.keras.layers.Dense(num_classes)

  def get_config(self):
    config = super().get_config().copy()
    return config

  def call(self, inputs):

    out1 = self.BaseClassifier_1(inputs)
    out2 = self.BaseClassifier_2(inputs)
    out1 = self.lstm_1(out1)
    out2 = self.lstm_2(out2)
    out = self.h_att(out2, out1)
    out = self.final_layer(out)


    try:
      del out._keras_mask
    except AttributeError:
      pass


    return out

  def compute_loss(self, inputs, labels, training=True):
    if training:
      predictions = self(inputs, training=training)

      labels = tf.squeeze(tf.cast(labels, dtype=tf.int32), axis=-1)
      #bincounts = tf.constant([144, 175, 127, 165, 100, 199])
      bincounts = tf.constant([106, 145, 94, 136, 82,165])
      batch_weight = tf.cast(tf.gather(bincounts, labels), dtype=tf.float32)

      cal_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, predictions)
      loss = tf.math.reduce_sum(cal_loss*batch_weight)/(tf.math.reduce_sum(batch_weight) + tf.keras.backend.epsilon())
    else:
      predictions = self(inputs, training=training)
      labels = tf.squeeze(tf.cast(labels, dtype=tf.int32), axis=-1)
      loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, predictions)
    return loss

#Traning Code

In [14]:
import tensorflow as tf
import numpy as np
from keras.activations import get
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_recall_fscore_support, f1_score

#from model_config import Classifier
#from lr import LearningRateSchedule

#import tensorflow_addons as tfa
#0 -> Not Hate
#1 -> Hate
Emo_Dict = ['Unknown', 'Angry', 'Happy', 'Disgust', 'Neutral', 'Sad']
print('Tensorflow version: ', tf.__version__)


EPOCHS = 100
batch_size = 4

ws = 2048
d_model = 128  #128 --> 256
dff = 64
dropout_rate = 0.2

num_heads = 4
num_layers = 4
num_classes = 6

input_vocab_size = 128
target_vocab_size = 64014




LRFileName = "LRlogs.txt"
checkpoint_path = "/content/drive/MyDrive/MscProject/CheckPoint"






##with tpu_strategy.scope():
model = Classifier(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, num_classes, dropout_rate)
optimizer = tf.keras.optimizers.Adam(LearningRateSchedule(d_model=d_model, warmup_steps=ws), beta_1=0.9, beta_2=0.98, epsilon=1e-6, decay=0.1)
#optimizer = tf.keras.optimizers.experimental.AdamW(LearningRateSchedule(d_model=d_model, warmup_steps=ws), weight_decay=0.1, beta_1=0.9, beta_2=0.98, epsilon=0.000001)
train_loss = tf.keras.metrics.Mean(name='train_loss')
ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=1)
if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print('Latest checkpoint restored; Model was trained for {} steps.'.format(ckpt.optimizer.iterations.numpy()))
else:
    print('Training from scratch!')


@tf.function()
def train_step(inp, tar):
  with tf.GradientTape() as tape:
    loss = model.compute_loss(inp, tar)
  scaled_gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(scaled_gradients, model.trainable_variables))
  train_loss.update_state(loss)

# @tf.function()
# def distributed_train_step(src_data, tar_data):
#   tpu_strategy.run(train_step, args=(src_data, tar_data,)) This is mainly use for Tpu because TPU has 8 partation so this need to split for TPU.



def _ds_parser(proto):
  feature_description = {
      'source_en': tf.io.VarLenFeature(tf.float32),
      'source_dec': tf.io.VarLenFeature(tf.int64),
      'target' : tf.io.VarLenFeature(tf.int64)
      }
  dataset_batch = tf.io.parse_single_example(proto, feature_description)

  source_aud = tf.sparse.to_dense(dataset_batch['source_en'])
  source_aud = tf.reshape(source_aud, [128, -1])


  source_txt = tf.sparse.to_dense(dataset_batch['source_dec'])
  source_txt = tf.reshape(source_txt, [-1,])

  target = tf.sparse.to_dense(dataset_batch['target'])
  target = tf.reshape(target, [-1,])
  return (source_aud, source_txt), target


def datasetLoader(dataSetPath, batch_size, is_training=True):
  dataset = tf.data.TFRecordDataset(tf.data.Dataset.list_files(dataSetPath))
  dataset = dataset.map(_ds_parser, num_parallel_calls=tf.data.AUTOTUNE)

  if is_training:
    dataset = dataset.shuffle(728, reshuffle_each_iteration=True)
    dataset = dataset.padded_batch(batch_size, (([128, 99], [500,]), [1,]), drop_remainder=False) #true
  else:
    dataset = dataset.padded_batch(batch_size, (([128, 99], [500,]), [1,]), drop_remainder=False)

  return dataset.prefetch(buffer_size=tf.data.AUTOTUNE)


train_dataset = datasetLoader([r"E:\Project\Transformer\tfrecord\Splittfrecord\record-3.tfrecord"], batch_size)
valid_dataset = datasetLoader([r"E:\Project\Transformer\tfrecord\Splittfrecord\record-4.tfrecord"], (32), is_training=False)
test_dataset = datasetLoader([r"E:\Project\Transformer\tfrecord\Splittfrecord\record-4.tfrecord"], (32), is_training=False)



Tensorflow version:  2.16.1




Training from scratch!


In [None]:
best_loss = float('inf')
best_f_score_valid = 0.9145
best_f_score_test = 0.9065
target_score_valid = 0.921
target_score_test = 0.934
early_stop_count = 0
early_stop_at = 15
#patience=3
for epoch in range(EPOCHS):
  if early_stop_count == early_stop_at:
    break
  else:
    print('Epoch: {}/{}'.format(epoch+1, EPOCHS))
    pbar = tf.keras.utils.Progbar(int(728/(batch_size)), width=30, interval=1)
    for step, (inp, tar) in enumerate(train_dataset):
      train_step(inp, tar)
      pbar.add(1, values=[("train_loss", train_loss.result()) ])
    #with open(LRFileName, "a") as file:
      #file.write('{:.8f}\n'.format(float(optimizer.lr(optimizer.iterations.numpy()))))

    y_true = []
    y_pred = []
    print('Evaluating Validation Dataset')
    for inp, tar in valid_dataset:
      for x in tar.numpy():
        for y in x:
          y_true.append(Emo_Dict[y])

      for z in model.predict(inp):
        y_pred.append(Emo_Dict[np.argmax(z)])


    mfscore_valid = f1_score(y_true, y_pred, labels = Emo_Dict, average='macro')     #calculate macro f1 score for test 

    y_true = []
    y_pred = []
    print('Evaluating Test Dataset')
    for inp, tar in test_dataset:
      for x in tar.numpy():
        for y in x:
          y_true.append(Emo_Dict[y])

      for z in model.predict(inp):
        y_pred.append(Emo_Dict[np.argmax(z)])

    mfscore_test = f1_score(y_true, y_pred, labels = Emo_Dict, average='macro')    #calculate macro f1 score for test 

    print('Train Loss: {}, macro F [Valid/Test]: {}/{}'.format(round(float(train_loss.result()), 4), round(mfscore_valid, 4), round(mfscore_test, 4)))
    if float(mfscore_valid) >= best_f_score_valid:
      print('Macro F-score [Valid]: {} ({}) \nMacro F-score [Test]: {} ({})'.format(float(round(mfscore_valid, 4)), round((mfscore_valid-target_score_valid),4), float(round(mfscore_test, 4)), round((mfscore_test-target_score_test),4)))
      best_f_score_test = float(mfscore_test)
      best_f_score_valid = float(mfscore_valid)
      ckpt_save_path = ckpt_manager.save()
      print('Saving checkpoint for {} step(s) at {}'.format(optimizer.iterations.numpy(), ckpt_save_path))
      early_stop_count = 0
    else:
      print('Macro F-score [Valid] did not improve. \nBest Score [Valid]: {} ({}) [Test]: {} ({})'.format(float(round(best_f_score_valid, 4)), round((best_f_score_valid-target_score_valid),4), float(round(best_f_score_test, 4)), round((best_f_score_test-target_score_test),4) ))
      early_stop_count = early_stop_count + 1
      print('Score did not improve for {} epoch(s). Current Steps: {}'.format(early_stop_count, optimizer.iterations.numpy()))
train_loss.reset_states()

print("------------------------------------")
    

Epoch: 1/100
[1m 45/182[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━━━━━━━━━━━[0m [1m23:52[0m 10s/step - train_loss: 2.0818Evaluating Validation Dataset
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 84s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 23s/step
Evaluating Test Dataset
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 22s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━

KeyboardInterrupt: 

In [21]:
best_loss = float('inf')
best_f_score_valid = 0.9145
best_f_score_test = 0.9065
target_score_valid = 0.921
target_score_test = 0.934
early_stop_count = 0
early_stop_at = 5

for epoch in range(EPOCHS):
  if early_stop_count == early_stop_at:
    break
  else:
    print('Epoch: {}/{}'.format(epoch+1, EPOCHS))
    pbar = tf.keras.utils.Progbar(int(728/(batch_size)), width=30, interval=1)
    for step, (inp, tar) in enumerate(train_dataset):
      train_step(inp, tar)
      pbar.add(1, values=[("train_loss", train_loss.result()) ])
    #with open(LRFileName, "a") as file:
      #file.write('{:.8f}\n'.format(float(optimizer.lr(optimizer.iterations.numpy()))))

    y_true = []
    y_pred = []
    print('Evaluating Validation Dataset')
    for inp, tar in valid_dataset:
      for x in tar.numpy():
        for y in x:
          y_true.append(Emo_Dict[y])

      for z in model.predict(inp):
        y_pred.append(Emo_Dict[np.argmax(z)])


    mfscore_valid = f1_score(y_true, y_pred, labels = Emo_Dict, average='macro')

    y_true = []
    y_pred = []
    print('Evaluating Test Dataset')
    for inp, tar in test_dataset:
      for x in tar.numpy():
        for y in x:
          y_true.append(Emo_Dict[y])

      for z in model.predict(inp):
        y_pred.append(Emo_Dict[np.argmax(z)])

    mfscore_test = f1_score(y_true, y_pred, labels = Emo_Dict, average='macro')

    print('Train Loss: {}, macro F [Valid/Test]: {}/{}'.format(round(float(train_loss.result()), 4), round(mfscore_valid, 4), round(mfscore_test, 4)))

    if float(mfscore_valid) >= best_f_score_valid:
      print('Macro F-score [Valid]: {} ({}) \nMacro F-score [Test]: {} ({})'.format(float(round(mfscore_valid, 4)), round((mfscore_valid-target_score_valid),4), float(round(mfscore_test, 4)), round((mfscore_test-target_score_test),4)))
      best_f_score_test = float(mfscore_test)
      best_f_score_valid = float(mfscore_valid)
      ckpt_save_path = ckpt_manager.save()
      print('Saving checkpoint for {} step(s) at {}'.format(optimizer.iterations.numpy(), ckpt_save_path))
      early_stop_count = 0
    else:
      print('Macro F-score [Valid] did not improve. \nBest Score [Valid]: {} ({}) [Test]: {} ({})'.format(float(round(best_f_score_valid, 4)), round((best_f_score_valid-target_score_valid),4), float(round(best_f_score_test, 4)), round((best_f_score_test-target_score_test),4) ))
      early_stop_count = early_stop_count + 1
      print('Score did not improve for {} epoch(s). Current Steps: {}'.format(early_stop_count, optimizer.iterations.numpy()))

    train_loss.reset_state()

    print("------------------------------------")

Epoch: 1/100
[1m  2/182[0m [37m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [1m18:33[0m 6s/step - train_loss: nan

KeyboardInterrupt: 