In [35]:
import numpy as np
import unicodedata
import re
import time
import pickle

from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import text_to_word_sequence
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers
from keras.utils.vis_utils import plot_model

tf.config.run_functions_eagerly(True)

In [36]:
#only if GPU is available
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [37]:
#data
meetings = np.load('../../data/obj/meetings.npz')['arr_0'] # (num_meetings, num_turns, seq_len)
summary = np.load('../../data/obj/summary.npz')['arr_0'] # (num_meetings, summary_len)

turns = np.load('../../data/obj/turns.npz')['arr_0'] # (num_meetings, num_turns)
role_vector = np.load('../../data/obj/role_vector.npz')['arr_0'] # (num_roles, MAX_LENGTH_BIN = 3)

sentence_embeddings = np.load('../../data/obj/sentence_embeddings.npz')['arr_0'] # (num_meetings, num_turns, sentence_embedding_dim)

with open('../../data/obj/tokenizer.pickle', 'rb') as file:
    tokenizer = pickle.load(file)
    
vocabulary_size = len(tokenizer.word_index) + 1
word_embedding_dimension = 100
sentence_embedding_dimension = 100

In [38]:
meetings.shape

(94, 70, 100)

In [39]:
def load_embeddings(file_path) :
    '''
    parameters : file_path - path of file where embeddings are stored (eg: '<path>/glove.6B/glove.6B.100d.txt')
    load the words and their respective embeddings from the GloVe file and set up a dictionary mapping 
    of words and their corresponding embeddings (embeddings will be stored in a numpy array of shape (d, ))
    returns : embedding_dict - dictionary mapping of {word:embedding}
    ''' 
    
    embedding_dict = {}
    file = open(file_path)
    for line in file :
        data = line.split(" ")
        word = data[0]
        embedding = np.asarray(data[1:], dtype='float32')
        embedding_dict[word] = embedding
    file.close()
    return embedding_dict

In [40]:
def set_embedding_matrix(embedding_dict, vocabulary, embedding_dimension) :
    '''
    parameters : embedding dict - dictionary mapping of {word:embedding} 
                 vocabulary - list of words in the training dataset
                 embedding_dimension - dimension of word embeddings used in the model
    initialises the embedding matrix with the ith row corresponding to the embedding of the ith word in the vocabulary
    dimension of the embedding depends on the 
    returns : embedding_matrix of shape (n, d) where n is the number of words in the vocabulary and d is the 
              dimension of the embeddings
    '''
    
    embedding_matrix = np.random.normal(0, 0.1, (len(vocabulary) + 1, embedding_dimension))
    for i, word in enumerate(vocabulary) :
        if word in embedding_dict.keys() :
            word_embedding = embedding_dict[word]
            embedding_matrix[i] = word_embedding
    return embedding_matrix

    

In [41]:
def get_rand_embedding_layer(vocabulary_size, embedding_dimension) :
    '''
    parameters : vocabulary_size - size of vocabulary used
                 embedding_dimension - integer which indicated the dimension of the word embeddings
                 max_length - maximum length of the input to the model(eg : maximum length of an input sentence)
    creates the embedding layer with trainable set to true so that weights cannot be changed during training.
    Weights of the embedding layer follow normal distribution with mean=0, stddev=0.1
    returns : embedding_layer 
    '''
    
    embedding_matrix = np.random.normal(0, 0.1, (vocabulary_size, embedding_dimension))
    embedding_layer = layers.Embedding(input_dim=vocabulary_size,
                                      output_dim=embedding_dimension,
                                      weights=[embedding_matrix],
                                      trainable=True,
                                      name='embedding_rand')
    return embedding_layer


def get_static_embedding_layer(embedding_matrix) :
    '''
    parameters : embedding_matrix - numpy array of shape (n, d) used to set the weights of the embedding layer
                 max_length - maximum length of the input to the model(eg : maximum length of an input sentence)
    creates the embedding layer and sets its weights with trainable set to false 
    so that weights cannot be changed during training
    returns : embedding_layer 
    '''
    
    embedding_layer = layers.Embedding(input_dim=embedding_matrix.shape[0],
                                      output_dim=embedding_matrix.shape[1],
                                      weights=[embedding_matrix],
                                      trainable=False,
                                      name='embedding_static')
    return embedding_layer


def get_dynamic_embedding_layer(embedding_matrix) :
    '''
    parameters : embedding_matrix - numpy array of shape (n, d) used to set the weights of the embedding layer
                 max_length - maximum length of the input to the model(eg : maximum length of an input sentence)
    creates the embedding layer and sets its weights with trainable set to true 
    so that weights can be changed or fine-tuned during training
    returns : embedding_layer 
    '''
    
    embedding_layer = layers.Embedding(input_dim=embedding_matrix.shape[0],
                                      output_dim=embedding_matrix.shape[1],
                                      weights=[embedding_matrix],
                                      trainable=True,
                                      name='embedding_dynamic')
    return embedding_layer

In [42]:
class PositionalEmbedding(layers.Layer) :
    def __init__(self) :
        super(PositionalEmbedding, self).__init__()
        
    def call(self, word_embeddings) :
        '''
        parameters : word_embeddings - tensor of shape (num_turns, seq_len, embed_dim)
        returns : embeddings_with_position - tensor of shape (num_turns, seq_len, embed_dim)
        '''
        positional_embeddings = np.zeros((word_embeddings.shape[1], word_embeddings.shape[2]))
        for i  in range(positional_embeddings.shape[0]) :
            if i % 2 == 0 :
                positional_embeddings[i] = np.array([np.sin(i/(1000 ** (2 * j / positional_embeddings.shape[1]))) for j in range(positional_embeddings.shape[1])])
            else :
                positional_embeddings[i] = np.array([np.cos(i/(1000 ** (2 * j / positional_embeddings.shape[1]))) for j in range(positional_embeddings.shape[1])])
        
        positional_embeddings = np.repeat(positional_embeddings[np.newaxis, :, :], word_embeddings.shape[0], axis=0)
        
        embeddings_with_position = positional_embeddings + word_embeddings
        return embeddings_with_position

In [43]:
class ScaledDotProductAttention(layers.Layer) :
    def __init__(self, is_mask=False) :
        super(ScaledDotProductAttention, self).__init__()
        self.is_mask = is_mask
        
    def call(self, query, key, value) :
        '''
        parameters : query - tensor of shape (num_turns, num_heads, seq_len_q, dim) 
                     key - tensor of shape (num_turns, num_heads, seq_len_k, dim) 
                     value - tensor of shape (num_turns, num_heads, seq_len_v, dim) 
                     **seq_len_k == seq_len_v
        returns : attention - tensor of shape (num_turns, num_heads, seq_len, dim) 
        '''
        # (num_turns, num_heads, seq_len_q, seq_len_k)
        pre_attention = tf.linalg.matmul(query, key, transpose_b=True) / np.sqrt(key.shape[1])

        if self.is_mask is True :
            mask = np.zeros((pre_attention.shape[-2], pre_attention.shape[-1]))
            mask.fill(-1e10)            
            mask = np.triu(mask, k=1)
            pre_attention = tf.math.multiply(pre_attention, mask)
            
        attention_weights = tf.nn.softmax(pre_attention, axis=-1)
        
        # (num_turns, num_heads, seq_len_q, dim)
        attention = tf.linalg.matmul(attention_weights, value)
        
        return attention

In [44]:
class MultiHeadAttention(layers.Layer) :
    def __init__(self, embedding_dimension, num_heads, is_mask=False) :
        super(MultiHeadAttention, self).__init__()
        
        self.num_heads = num_heads
        self.embedding_dimension = embedding_dimension
        self.dim = self.embedding_dimension // self.num_heads
        self.is_mask = is_mask
        
        assert(self.dim * self.num_heads == self.embedding_dimension), "embedding_dimension should be divisible by num_heads."

        self.query_layer = layers.Dense(self.embedding_dimension)
        self.key_layer = layers.Dense(self.embedding_dimension)
        self.value_layer = layers.Dense(self.embedding_dimension)
        
        self.scaled_dot_product_attention = ScaledDotProductAttention(is_mask=self.is_mask)
        
        self.linear_layer = layers.Dense(self.embedding_dimension)
    
    
    def split_heads(self, input_tensor) :
        '''
        parameters : input_tensor - tensor of shape (num_turns, seq_len, embedding_dimension)
        returns : input_tensor - resize tensor of shape (num_turns, num_heads, seq_len, dim)
        '''
        input_tensor = tf.reshape(input_tensor, (input_tensor.shape[0], -1, self.num_heads, self.dim))
        return tf.transpose(input_tensor, [0,2,1,3])
        
        
    def call(self, query, key, value) :
        '''
        parameters : query - tensor of shape (num_turns, seq_len, embedding_dimension)
                     key - tensor of shape (num_turns, seq_len, embedding_dimension)
                     value - tensor of shape (num_turns, seq_len, embedding_dimension)
        returns : res - tensor of shape (num_turns, seq_len, embedding_dimension)
        '''

        query = self.query_layer(query)
        key = self.key_layer(key)
        value = self.value_layer(value)
        
        query = self.split_heads(query)
        key = self.split_heads(key)
        value = self.split_heads(value)
        
        attention = self.scaled_dot_product_attention(query, key, value) # (num_turns, num_heads, seq_len, dim)
        
        attention = tf.transpose(attention, perm=[0, 2, 1, 3]) # (num_turns, seq_len, num_heads, dim)
        concat_attention = tf.reshape(attention, (attention.shape[0], -1, self.embedding_dimension)) # (num_turns, seq_len, embedding_dimension)
        
        res = self.linear_layer(concat_attention) # (num_turns, seq_len, embedding_dimension)
        return res

In [45]:
class AddandNorm(layers.Layer) :
    def __init__(self) :
        super(AddandNorm, self).__init__()
        self.layer_norm = layers.LayerNormalization(epsilon=1e-6)
        
    def call(self, input_tensor, skip_connection) :
        '''
        parameters : input_tensor - tensor of shape (num_turns, seq_len, embedding_dimension)
                     skip_connection - tensor of shape (num_turns, seq_len, embedding_dimension)
        returns : res - normalized tensor of shape (num_turns, seq_len, embedding_dimension)
        '''
        res = input_tensor + skip_connection
        res = self.layer_norm(res)
        return res

In [46]:
class FeedForward(layers.Layer) :
    def __init__(self, hidden_dim, output_dim) :
        super(FeedForward, self).__init__()
        self.layer_1 = layers.Dense(hidden_dim, activation='relu')
        self.layer_2 = layers.Dense(output_dim)
        
    def call(self, input_tensor) :
        '''
        parameters : input_tensor - tensor of shape (num_turns, seq_len, embedding_dimension)
        returns : input_tensor - tensor of shape (num_turns, seq_len, embedding_dimension)
        '''
        res = self.layer_1(input_tensor)
        res = self.layer_2(res)
        return res

In [47]:
def concat_role_vector(x, role_vector, turn_seq) :
    '''
    parameters : x - tensor of shape (1, num_turns, embed_dim)
                 role_vector - tensor of shape (num_roles, role_vector_size)
                 turn_seq - np array of shape (num_turns, ) representing the sequence of turns in a meeting
    returns : concat_vector - concatenated vector of sentence embedding and role_vector for each turn
                              of shape (1, num_turns, embed_dim + role_vector_size)
    '''


#     unpacked_turn_seq = tf.unstack(turn_seq)
#     unpacked_turn_seq = turn_seq.numpy()
    
    role = tf.expand_dims(tf.convert_to_tensor([role_vector[j] for j in turn_seq]), 0)
    turn_with_role = tf.concat([x, role], axis=2) # (1, num_turns, embed_dim + role_vector_size)
    
    return turn_with_role

# x = tf.random.uniform((1, 10, 512))
# role_vector = tf.random.uniform((2, 32))
# turn_seq = [0,1,1,0,1,0,1,0,1,0]

# print(concat_role_vector(x, role_vector, turn_seq).shape)         

In [48]:
def get_clusters(num_clusters, data) :
    '''
    parameters : num_clusters - number of clusters for K-means clustering
                 data - tensor of shape (num_turns, sent_embed_dim + role_vector_size)
    returns : closest_data - np array of shape (num_clusters, ) containing indices of top 'num_cluster' sentences
    '''
    
    num_clusters = num_clusters

    m_km = KMeans(n_clusters=num_clusters, random_state=42)
    m_km = m_km.fit(data.numpy())
    m_clusters = m_km.labels_.tolist()

    centers = np.array(m_km.cluster_centers_)

    closest_data = []
    for i in range(num_clusters):
        center_vec = centers[i]
        data_idx_within_i_cluster = [ idx for idx, clu_num in enumerate(m_clusters) if clu_num == i ]

        one_cluster_tf_matrix = np.zeros( (  len(data_idx_within_i_cluster) , centers.shape[1] ) )
        for row_num, data_idx in enumerate(data_idx_within_i_cluster):
            one_row = data[data_idx]
            one_cluster_tf_matrix[row_num] = one_row

        closest, _ = pairwise_distances_argmin_min(center_vec.reshape(1, -1), one_cluster_tf_matrix)
        closest_idx_in_one_cluster_tf_matrix = closest[0]
        closest_data_row_num = data_idx_within_i_cluster[closest_idx_in_one_cluster_tf_matrix]

        closest_data.append(closest_data_row_num)
    return np.sort(np.array(closest_data))

input_tensor = tf.random.uniform((1, 70, 132), dtype=tf.float64, minval=0, maxval=200)
print(get_clusters(5, input_tensor[0]))

[33 34 64 66 67]


In [49]:
class EncoderBlock(layers.Layer) :
    def __init__(self, embedding_dimension, num_heads) :
        super(EncoderBlock, self).__init__()
        self.multi_head_attention = MultiHeadAttention(embedding_dimension, num_heads)
        self.add_and_norm_1 = AddandNorm()
        self.feed_forward = FeedForward(200, embedding_dimension)
        self.add_and_norm_2 = AddandNorm()
        
    def call(self, input_tensor) :
        '''
        parameters : input_tensor - tensor of shape (num_turns, seq_len, embedding_dimension)
        returns : input_tensor - tensor of shape (num_turns, seq_len, embedding_dimension)
        '''
        res = self.multi_head_attention(input_tensor, input_tensor, input_tensor)
        res_skip = self.add_and_norm_1(res, input_tensor)
        res = self.feed_forward(res_skip)
        res = self.add_and_norm_2(res, res_skip)
        return res

In [50]:
class DecoderBlock(layers.Layer) :
    def __init__(self, embedding_dimension, num_heads) :
        super(DecoderBlock, self).__init__()
        self.masked_multi_head_attention = MultiHeadAttention(embedding_dimension, num_heads, is_mask=True)
        self.add_and_norm_1 = AddandNorm()
        
        self.multi_head_attention_1 = MultiHeadAttention(embedding_dimension, num_heads)
        self.add_and_norm_2 = AddandNorm()
        
        self.multi_head_attention_2 = MultiHeadAttention(embedding_dimension, num_heads)
        self.add_and_norm_3 = AddandNorm()
        
        self.feed_forward = FeedForward(200, embedding_dimension)
        self.add_and_norm_4 = AddandNorm()
        
    def call(self, input_tensor, sentence_level_encoder_output, turn_level_encoder_output) :
        res = self.masked_multi_head_attention(input_tensor, input_tensor, input_tensor)
        res_skip = self.add_and_norm_1(res, input_tensor)
        
        res = self.multi_head_attention_1(res_skip, sentence_level_encoder_output, sentence_level_encoder_output)
        res_skip = self.add_and_norm_2(res, res_skip)
        
        res = self.multi_head_attention_2(res_skip, turn_level_encoder_output, turn_level_encoder_output)
        res_skip = self.add_and_norm_3(res, res_skip)
        
        res = self.feed_forward(res_skip)
        res = self.add_and_norm_4(res, res_skip)
        return res

In [51]:
class Encoder(layers.Layer) :
    def __init__(self, 
                 num_blocks, 
                 embedding_dimension, 
                 num_heads) :
        
        super(Encoder, self).__init__()
        self.num_blocks = num_blocks
        
        self.encoder_blocks = [EncoderBlock(embedding_dimension, num_heads) for _ in range(num_blocks)]
        
    def call(self, input_tensor) :
        '''
        parameters : input_tensor : tensor of shape (num_turns, seq_len, embed_dim) / (num_turns, 1, embed_dim + role_vector_size)
        returns : x - tensor of shape (num_turns, seq_len, embed_dim) / (num_turns, 1, embed_dim + role_vector_size)
        '''
        x = input_tensor
        
        for i in range(self.num_blocks) :
            x = self.encoder_blocks[i](x) # (num_turns, seq_len, embed_dim) 
            
        return x

In [52]:
sample_encoder = Encoder(num_blocks=2, 
                         embedding_dimension=100, 
                         num_heads=10)

input_tensor = tf.random.uniform((10, 5, 100), dtype=tf.float64, minval=0, maxval=200)

output_tensor = sample_encoder(input_tensor)

output_tensor.shape  # (num_turns, seq_len, embed_dim) 

TensorShape([10, 5, 100])

In [53]:
class Decoder(layers.Layer) :
    def __init__(self, 
                 num_blocks, 
                 embedding_dimension, 
                 num_heads) :
        
        super(Decoder, self).__init__()
        self.num_blocks = num_blocks
        
        self.decoder_blocks = [DecoderBlock(embedding_dimension, num_heads) for _ in range(num_blocks)]
        
    def call(self, input_tensor, word_level_encoder_output, turn_level_encoder_output) :
        '''
        parameters : input_tensor : tensor of shape (batch_size=1, target_seq_len, embed_dim)
        returns : x - tensor of shape (batch_size=1, target_seq_len, embed_dim) 
        '''
        x = input_tensor
        for i in range(self.num_blocks) :
            x = self.decoder_blocks[i](x, word_level_encoder_output, turn_level_encoder_output)
            
        return x

In [54]:
sample_decoder = Decoder(num_blocks=2, 
                         embedding_dimension=100, 
                         num_heads=10)

input_tensor = tf.random.uniform((1, 7, 100), dtype=tf.float64, minval=0, maxval=200)
word_level_tensor = tf.random.uniform((10, 5, 100), dtype=tf.float64, minval=0, maxval=200)
turn_level_tensor = tf.random.uniform((10, 1, 132), dtype=tf.float64, minval=0, maxval=200)

output_tensor = sample_decoder(input_tensor, word_level_tensor, turn_level_tensor)

output_tensor.shape  # (batch_size=1, target_seq_len, embed_dim) 

TensorShape([10, 7, 100])

In [55]:
class MTNet(tf.keras.Model) :
    def __init__(self, 
                 num_blocks, 
                 word_embedding_dimension,
                 sentence_embedding_dimension,
                 num_heads_word, 
                 num_heads_dec, 
                 vocabulary_size,
                 embedding_matrix,
                 role_vector_size, 
                 init_role_vector, 
                 num_clusters=50,
                 mode='static') :
        
        super(MTNet, self).__init__()
        
        self.init_role_vector = init_role_vector
        self.num_clusters = num_clusters
        
        self.embedding_layer = None
        if mode == 'static' :
            self.embedding_layer = get_static_embedding_layer(embedding_matrix)
        elif mode == 'dynamic' :
            self.embedding_layer = get_dynamic_embedding_layer(embedding_matrix)
        elif mode == 'rand' :
            self.embedding_layer = get_rand_embedding_layer(vocabulary_size, word_embedding_dimension) 
            
        self.positional_embedding_layer = PositionalEmbedding()
                                                            
        
        self.encoder = Encoder(num_blocks, 
                                          sentence_embedding_dimension + role_vector_size, 
                                          num_heads_word)
        
        self.role_vector = layers.Dense(role_vector_size)
        
        self.decoder = Decoder(num_blocks, 
                               word_embedding_dimension, 
                               num_heads_dec)
        
        self.fully_connected_layer = layers.Dense(vocabulary_size)
        
    def call(self, sentence_embedding, input_tensor, target_tensor, turn_seq) :

        sentence_embedding_input = self.positional_embedding_layer(sentence_embedding) # (1, num_turns, sent_embed_dim)
        
        input_role_vector = self.role_vector(self.init_role_vector) # (num_roles, role_vector_size)

        # (1, num_turns, sent_embed_dim + role_vector_size)
        x1_concat = concat_role_vector(sentence_embedding_input, input_role_vector, turn_seq) 
        
        x1 = self.encoder(x1_concat) # (1, num_turns, sent_embed_dim + role_vector_size)

        #for a single meeting or batch
        closest_points_index = get_clusters(self.num_clusters, x1[0]) # (num_turns)
        
        clustered_turns = np.zeros((self.num_clusters, input_tensor.shape[-1])) # (num_clusters, seq_len)

        for index, cluster_point in enumerate(closest_points_index) :
            clustered_turns[index] = input_tensor[0][cluster_point]
        
        x2 = self.embedding_layer(clustered_turns) # (num_clusters, seq_len, word_embed_dim)
        x2 = tf.reshape(x2, [1, -1, x2.shape[-1]]) # (1, num_clusters * seq_len, word_embed_dim)
        
        target_x = self.embedding_layer(target_tensor) # (batch_size=1, target_seq_len, word_embed_dim)
        target_x = self.positional_embedding_layer(target_x) # (batch_size=1, target_seq_len, word_embed_dim)
        
        x = self.decoder(target_x, x1, x2) # (batch_size=1, target_seq_len, embed_dim)
        x = self.fully_connected_layer(x)
        
        return x

In [56]:
trial_role_vector = tf.random.uniform((2, 2))
turn_seq = np.array([0,1,1,0,1,0,1,0,1,0])
temp_embedding_matrix = np.random.normal(0, 0.1, (vocabulary_size, word_embedding_dimension))

sample_mtnet = MTNet(num_blocks=2, 
                     word_embedding_dimension=100, 
                     sentence_embedding_dimension=100,
                     num_heads_word=11, 
                     num_heads_dec=10, 
                     vocabulary_size=vocabulary_size,
                     role_vector_size=32, 
                     init_role_vector=trial_role_vector, 
                     embedding_matrix=temp_embedding_matrix,
                     num_clusters=5,
                     mode='static')

sentence_embedding = tf.random.uniform((1, 10, 100), dtype=tf.float64, minval=0, maxval=200)
temp_input = tf.random.uniform((1, 10, 5), dtype=tf.int64, minval=0, maxval=200)
temp_target = tf.random.uniform((1, 3), dtype=tf.int64, minval=0, maxval=200)

fn_out = sample_mtnet(sentence_embedding, temp_input, temp_target, turn_seq)

fn_out.shape  # (batch_size, tar_seq_len, vocab_size)

TensorShape([1, 3, 5061])

In [57]:
dataset = tf.data.Dataset.from_tensor_slices((sentence_embeddings, meetings, summary))
dataset = dataset.batch(1)

# meetings1, summary1= next(iter(dataset))
# sentence_embeddings.shape, meetings1.shape, summary1.shape

In [58]:
# load Glove embeddings(100 dimensional) and convert it into a dictionary with mapping {word:embedding}

file_path = '../../../GloVe/glove.6B/glove.6B.100d.txt'
embedding_dict = load_embeddings(file_path)

In [59]:
vocabulary = tokenizer.word_index.keys()
embedding_matrix = set_embedding_matrix(embedding_dict, vocabulary, 100)

In [60]:
optimizer = tf.keras.optimizers.Adam(0.1, beta_1=0.9, beta_2=0.98,epsilon=1e-9)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

In [61]:
def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_sum(loss_)/tf.reduce_sum(mask)


def accuracy_function(real, pred):
    accuracies = tf.equal(real, tf.argmax(pred, axis=2))

    mask = tf.math.logical_not(tf.math.equal(real, 0))
    accuracies = tf.math.logical_and(mask, accuracies)

    accuracies = tf.cast(accuracies, dtype=tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    return tf.reduce_sum(accuracies)/tf.reduce_sum(mask)

In [62]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.Mean(name='train_accuracy')

In [63]:
mtnet = MTNet(num_blocks=2, 
             word_embedding_dimension=100,
             sentence_embedding_dimension =100,
             num_heads_word=11, 
             num_heads_dec=10, 
             vocabulary_size=vocabulary_size,  
             embedding_matrix=embedding_matrix,
             role_vector_size=32, 
             init_role_vector=role_vector,
             num_clusters=5,
             mode='static')

In [67]:
EPOCHS = 5

# The @tf.function trace-compiles train_step into a TF graph for faster
# execution. The function specializes to the precise shape of the argument
# tensors. To avoid re-tracing due to the variable sequence lengths or variable
# batch sizes (the last batch is smaller), use input_signature to specify
# more generic shapes.

train_step_signature = [
    tf.TensorSpec(shape=(1, 70, 100), dtype=tf.float32),
    tf.TensorSpec(shape=(1, 70, 100), dtype=tf.int64),
    tf.TensorSpec(shape=(1, 100), dtype=tf.int64),
    tf.TensorSpec(shape=(70), dtype=tf.int64),
]

@tf.autograph.experimental.do_not_convert
@tf.function(input_signature=train_step_signature)
def train_step(sentence_embeddings, input_tensor, target_tensor, turn_seq):
        
    target_inp = target_tensor[:, :-1]
    target_real = target_tensor[:, 1:]


    with tf.GradientTape() as tape:
        predictions = mtnet(sentence_embeddings, input_tensor, target_inp, turn_seq)
        loss = loss_function(target_real, predictions)

    gradients = tape.gradient(loss, mtnet.trainable_variables)
    optimizer.apply_gradients(zip(gradients, mtnet.trainable_variables))

    train_loss(loss)
    train_accuracy(accuracy_function(target_real, predictions))

In [68]:
# test input
# sentence_embeddings = np.array(tf.random.uniform((1, 10, 100), dtype=tf.float32, minval=0, maxval=200))
# input_tensor = np.array(tf.random.uniform((1, 10, 9), dtype=tf.int64, minval=0, maxval=200))
# target_tensor = np.array(tf.random.uniform((1, 6), dtype=tf.int64, minval=0, maxval=200))
# input_role_vector = np.array(tf.random.uniform((2, 2)))
# turn_seq = [0,1,1,0,1,0,1,0,1,0]

In [69]:
for epoch in range(EPOCHS):
    start = time.time()

    train_loss.reset_states()
    train_accuracy.reset_states()

    for (batch, (sentence_embeddings1, meetings1, summary1)) in enumerate(dataset):

        train_step(sentence_embeddings1, meetings1, summary1, turns[batch])
        
        if batch % 5 == 0 : 
            print(f'Epoch {epoch + 1} Batch {batch} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')


    print(f'Epoch {epoch + 1} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')

    print(f'Time taken for 1 epoch: {time.time() - start:.2f} secs\n')

Epoch 1 Batch 0 Loss 3.6331 Accuracy 0.0909
Epoch 1 Batch 5 Loss 3.6331 Accuracy 0.0909
Epoch 1 Batch 10 Loss 3.6331 Accuracy 0.0909
Epoch 1 Batch 15 Loss 3.6331 Accuracy 0.0909
Epoch 1 Batch 20 Loss 3.6330 Accuracy 0.0909
Epoch 1 Batch 25 Loss 3.6330 Accuracy 0.0909
Epoch 1 Batch 30 Loss 3.6330 Accuracy 0.0909
Epoch 1 Batch 35 Loss 3.6330 Accuracy 0.0909
Epoch 1 Batch 40 Loss 3.6330 Accuracy 0.0909
Epoch 1 Batch 45 Loss 3.6330 Accuracy 0.0909
Epoch 1 Batch 50 Loss 3.6330 Accuracy 0.0909
Epoch 1 Batch 55 Loss 3.6331 Accuracy 0.0909
Epoch 1 Batch 60 Loss 3.6343 Accuracy 0.0909
Epoch 1 Batch 65 Loss 3.6349 Accuracy 0.0909
Epoch 1 Batch 70 Loss 3.6352 Accuracy 0.0909
Epoch 1 Batch 75 Loss 3.6353 Accuracy 0.0909
Epoch 1 Batch 80 Loss 3.6356 Accuracy 0.0909
Epoch 1 Batch 85 Loss 3.6357 Accuracy 0.0909
Epoch 1 Batch 90 Loss 3.6357 Accuracy 0.0909
Epoch 1 Loss 3.6357 Accuracy 0.0909
Time taken for 1 epoch: 13.60 secs

Epoch 2 Batch 0 Loss 3.6355 Accuracy 0.0909
Epoch 2 Batch 5 Loss 3.6344 Acc