In [1]:
%tensorflow_version 1.1x
from datetime import datetime
%load_ext tensorboard

`%tensorflow_version` only switches the major version: 1.x or 2.x.
You set: `1.1x`. This will be interpreted as: `1.x`.


TensorFlow 1.x selected.


In [0]:
import numpy as np
import pandas as pd
import pickle
import os, sys
from collections import Counter, defaultdict
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, LSTM, Conv1D, MaxPooling1D, Dropout, Activation
from tensorflow.keras.layers import Embedding
from functools import cmp_to_key
import pickle

x = pickle.load(open("/content/drive/My Drive/MCA_Project/data/pickles/data_{}.p".format("emotion"),"rb"))
revs, W, word_idx_map, vocab, _, label_index = x[0], x[1], x[2], x[3], x[4], x[5]

def get_word_indices(data_x):
  length = len(data_x.split())
  return np.array([word_idx_map[word] for word in data_x.split()] + [0]*(50-length))[:50]

def comp_id(x, y):
  xd = int(x[:x.find('_')])
  xu = int(x[x.find('_')+1:])

  yd = int(y[:y.find('_')])
  yu = int(y[y.find('_')+1:])

  if xd != yd:
    return xd - yd
  else:
    return xu - yu


def preprocess():

  train_data, val_data, test_data = {},{},{}

  counts_train = np.zeros((5,1))
  counts_test = np.zeros((5,1))
  counts_val = np.zeros((5,1))

  for i in range(len(revs)):

    utterance_id = revs[i]['dialog']+"_"+revs[i]['utterance']
    
    sentence_word_indices = get_word_indices(revs[i]['text'])
    
    label = label_index[revs[i]['y']]

    if label != 0 and label != 3 and label != 4 and label != 6:
      continue

    if label == 0:
      label = 0
    elif label == 3:
      label = 1
    elif label == 4:
      label = 2
    else:
      label = 3 

    if revs[i]['split']=="train" and counts_train[label] > 1000:
      continue

    if revs[i]['split']=="train":
        train_data[utterance_id]=(sentence_word_indices,label)
        counts_train[label] += 1
    elif revs[i]['split']=="val":
        val_data[utterance_id]=(sentence_word_indices,label)
        counts_val[label] += 1
    elif revs[i]['split']=="test":
        test_data[utterance_id]=(sentence_word_indices,label)
        counts_test[label] += 1

  dialogs = []
  utrs = -1
  d_cur = -1

  t_d = {}
  t_map = {}
  sorted_tr_keys = sorted(train_data.keys(), key=cmp_to_key(comp_id))

  for i in sorted_tr_keys:
    d = i[:i.find('_')]
    u = i[i.find('_') + 1:]
    ouid = d + '_' + u

    if d not in dialogs:
      d_cur += 1
      utrs = 0
      dialogs.append(d)
    else:
      utrs += 1

    df = d_cur
    uf = utrs

    uid = str(df) +'_' + str(uf)
    t_d[uid] = train_data[i]

    t_map[uid] = ouid

  print(t_map)
  dialogs = []
  utrs = -1
  d_cur = -1

  v_d = {}
  v_map = {}
  sorted_val_keys = sorted(val_data.keys(), key=cmp_to_key(comp_id))

  for i in sorted_val_keys:
    d = i[:i.find('_')]
    u = i[i.find('_') + 1:]
    ouid = d + '_' + u

    if d not in dialogs:
      d_cur += 1
      utrs = 0
      dialogs.append(d)
    else:
      utrs += 1

    df = d_cur
    uf = utrs

    uid = str(df) +'_' + str(uf)
    v_d[uid] = val_data[i]
    v_map[uid] = ouid

  dialogs = []
  utrs = -1
  d_cur = -1

  ts_d = {}
  ts_map = {}
  sorted_ts_keys = sorted(test_data.keys(), key=cmp_to_key(comp_id))

  for i in sorted_ts_keys:
    d = i[:i.find('_')]
    u = i[i.find('_') + 1:]
    ouid = d + '_' + u

    if d not in dialogs:
      d_cur += 1
      utrs = 0
      dialogs.append(d)
    else:
      utrs += 1

    df = d_cur
    uf = utrs

    uid = str(df) +'_' + str(uf)
    ts_d[uid] = test_data[i]
    ts_map[uid] = ouid
  
  return t_d, v_d, ts_d, t_map, v_map, ts_map


#preprocess()

In [0]:

max_length=50 # Maximum length of the sentence

class Dataloader:
    
    def __init__(self, mode=None):

        try:
            assert(mode is not None)
        except AssertionError as e:
            print("Set mode as 'Sentiment' or 'Emotion'")
            exit()

        self.MODE = mode # Sentiment or Emotion classification mode
        self.max_l = max_length

        """
            Loading the dataset: 
                - revs is a dictionary with keys/value: 
                    - text: original sentence
                    - split: train/val/test :: denotes the which split the tuple belongs to
                    - y: label of the sentence
                    - dialog: ID of the dialog the utterance belongs to
                    - utterance: utterance number of the dialog ID
                    - num_words: number of words in the utterance
                - W: glove embedding matrix
                - vocab: the vocabulary of the dataset
                - word_idx_map: mapping of each word from vocab to its index in W
                - label_index: mapping of each label (emotion or sentiment) to its assigned index, eg. label_index['neutral']=0
        """
        x = pickle.load(open("/content/drive/My Drive/MCA_Project/data/pickles/data_{}.p".format(self.MODE.lower()),"rb"))
        self.revs, self.W, self.word_idx_map, self.vocab, _, label_index = x[0], x[1], x[2], x[3], x[4], x[5]
        
        self.num_classes = 4
        print("Labels used for this classification: ", label_index)

        self.train_data, self.val_data, self.test_data, self.tr_map, self.v_map, self.ts_map = preprocess()

        # Creating dialogue:[utterance_1, utterance_2, ...] ids
        self.train_dialogue_ids = self.get_dialogue_ids(self.train_data.keys())
        self.val_dialogue_ids = self.get_dialogue_ids(self.val_data.keys())
        self.test_dialogue_ids = self.get_dialogue_ids(self.test_data.keys())

        # Max utternance in a dialog in the dataset
        self.max_utts = self.get_max_utts(self.train_dialogue_ids, self.val_dialogue_ids, self.test_dialogue_ids)

    def get_dialogue_ids(self, keys):
        ids=defaultdict(list)
        for key in keys:
            ids[key.split("_")[0]].append(int(key.split("_")[1]))
        for ID, utts in ids.items():
            ids[ID]=[str(utt) for utt in sorted(utts)]
        return ids

    def get_max_utts(self, train_ids, val_ids, test_ids):
        max_utts_train = max([len(train_ids[vid]) for vid in train_ids.keys()])
        max_utts_val = max([len(val_ids[vid]) for vid in val_ids.keys()])
        max_utts_test = max([len(test_ids[vid]) for vid in test_ids.keys()])
        return np.max([max_utts_train, max_utts_val, max_utts_test])

    def get_one_hot(self, label):
        label_arr = [0]*self.num_classes
        label_arr[label]=1
        return label_arr[:]

    def get_dialogue_text_embs(self):
        key = list(self.train_data.keys())[0]
        
        pad = [0]*len(self.train_data[key][0])

        def get_emb(dialogue_id, local_data):
            dialogue_text = []
            for vid in dialogue_id.keys():
                local_text = []
                for utt in dialogue_id[vid]:
                    local_text.append(local_data[vid+"_"+str(utt)][0][:])
                for _ in range(self.max_utts-len(local_text)):
                    local_text.append(pad[:])
                dialogue_text.append(local_text[:self.max_utts])
            return np.array(dialogue_text)

        self.train_dialogue_features = get_emb(self.train_dialogue_ids, self.train_data)
        self.val_dialogue_features = get_emb(self.val_dialogue_ids, self.val_data)
        self.test_dialogue_features = get_emb(self.test_dialogue_ids, self.test_data)

    def get_dialogue_labels(self):

        def get_labels(ids, data):
            dialogue_label=[]

            for vid, utts in ids.items():
                local_labels=[]
                for utt in utts:
                    local_labels.append(self.get_one_hot(data[vid+"_"+str(utt)][1]))
                for _ in range(self.max_utts-len(local_labels)):
                    local_labels.append(self.get_one_hot(1)) # Dummy label
                dialogue_label.append(local_labels[:self.max_utts])
            return np.array(dialogue_label)

        self.train_dialogue_label=get_labels(self.train_dialogue_ids, self.train_data)
        self.val_dialogue_label=get_labels(self.val_dialogue_ids, self.val_data)
        self.test_dialogue_label=get_labels(self.test_dialogue_ids, self.test_data)

    def get_dialogue_labels_audio(self):

        def get_labels(ids, data, map):
            dialogue_label=[]

            for vid, utts in ids.items():
                local_labels=[]
                for utt in utts:
                    print(vid+"_"+str(utt), map[vid+"_"+str(utt)])
                    local_labels.append(self.get_one_hot(data[map[vid+"_"+str(utt)]][1]))
                for _ in range(self.max_utts-len(local_labels)):
                    local_labels.append(self.get_one_hot(1)) # Dummy label
                dialogue_label.append(local_labels[:self.max_utts])
            return np.array(dialogue_label)

        self.train_dialogue_label=get_labels(self.train_dialogue_ids, self.train_data, self.tr_map)
        self.val_dialogue_label=get_labels(self.val_dialogue_ids, self.val_data, self.v_map)
        self.test_dialogue_label=get_labels(self.test_dialogue_ids, self.test_data, self.ts_map)

        
    def get_dialogue_lengths(self):

        self.train_dialogue_length, self.val_dialogue_length, self.test_dialogue_length=[], [], []
        for vid, utts in self.train_dialogue_ids.items():
            self.train_dialogue_length.append(len(utts))
        for vid, utts in self.val_dialogue_ids.items():
            self.val_dialogue_length.append(len(utts))
        for vid, utts in self.test_dialogue_ids.items():
            self.test_dialogue_length.append(len(utts))

    def get_masks(self):

        self.train_mask = np.zeros((len(self.train_dialogue_length), self.max_utts), dtype='float')
        for i in range(len(self.train_dialogue_length)):
            self.train_mask[i,:self.train_dialogue_length[i]]=1.0
        self.val_mask = np.zeros((len(self.val_dialogue_length), self.max_utts), dtype='float')
        for i in range(len(self.val_dialogue_length)):
            self.val_mask[i,:self.val_dialogue_length[i]]=1.0
        self.test_mask = np.zeros((len(self.test_dialogue_length), self.max_utts), dtype='float')
        for i in range(len(self.test_dialogue_length)):
            self.test_mask[i,:self.test_dialogue_length[i]]=1.0
        
    def load_text_data(self, ):

        self.get_dialogue_text_embs()
        self.get_dialogue_lengths()
        self.get_dialogue_labels()
        self.get_masks()

    def load_audio_data(self, ):

        AUDIO_PATH = "/content/drive/My Drive/MCA_Project/data/pickles/audio_embeddings_feature_selection_{}.pkl".format(self.MODE.lower())
        self.train_audio_emb, self.val_audio_emb, self.test_audio_emb = pickle.load(open(AUDIO_PATH,"rb"))

        self.get_dialogue_audio_embs()
        self.get_dialogue_lengths()
        #self.get_dialogue_labels_audio()
        self.get_dialogue_labels()
        self.get_masks()

    def get_dialogue_audio_embs(self):
        key = list(self.train_audio_emb.keys())[0]
        pad = [0]*len(self.train_audio_emb[key])

        def get_emb(dialogue_id, audio_emb, map):
            dialogue_audio=[]
            for vid in dialogue_id.keys():
                local_audio=[]
                for utt in dialogue_id[vid]:
                    try:
                        local_audio.append(audio_emb[map[vid+"_"+str(utt)]][:])
                    except:
                        print("oops")
                        print(vid+"_"+str(utt))
                        local_audio.append(pad[:])
                for _ in range(self.max_utts-len(local_audio)):
                    local_audio.append(pad[:])
                dialogue_audio.append(local_audio[:self.max_utts])
            return np.array(dialogue_audio)

        self.train_dialogue_features = get_emb(self.train_dialogue_ids, self.train_audio_emb, self.tr_map)
        self.val_dialogue_features = get_emb(self.val_dialogue_ids, self.val_audio_emb, self.v_map)
        self.test_dialogue_features = get_emb(self.test_dialogue_ids, self.test_audio_emb, self.ts_map)


    



In [0]:
import argparse
from tensorflow.keras.layers import Input, Dense, Embedding, Conv2D, MaxPool2D, Lambda, LSTM, TimeDistributed, Masking, Bidirectional
from tensorflow.keras.layers import Reshape, Flatten, Dropout, Concatenate
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model, load_model
import tensorflow.keras.backend as K
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
import os, pickle
import numpy as np
import keras
import tensorflow as tf
#from keras_multi_head import MultiHeadAttention
#from keras_multi_head import MultiHead
#import tensorflow.estimator.MultiHead
'''
import argparse
from keras.layers import Input, Dense, Embedding, Conv2D, MaxPool2D, Lambda, LSTM, TimeDistributed, Masking, Bidirectional, concatenate
from keras.layers import Reshape, Flatten, Dropout, Concatenate
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.optimizers import Adam
from keras.models import Model, load_model
import keras.backend as K
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
import os, pickle
import numpy as np
import keras
import tensorflow as tf'''


class Network1:

	def __init__(self):
		self.classification_mode = "emotion"
		self.modality = "text"
		
    #self.PATH = "/content/drive/My Drive/Colab Notebooks/data/models/{}_weights_{}.hdf5".format("text",self.classification_mode.lower())
		#self.OUTPUT_PATH = "/content/drive/My Drive/Colab Notebooks/data/pickles/{}_{}.pkl".format("text",self.classification_mode.lower())
		print("Model initiated for {} classification".format(self.classification_mode))

	def load_data(self,m):
    
		print('Loading data')
    
		self.data = Dataloader(mode = self.classification_mode)
    
		if m == "text":
			self.data.load_text_data()
		elif m == "audio":
			self.data.load_audio_data()
		else:
			exit()
    
		self.train_x = self.data.train_dialogue_features
		self.val_x = self.data.val_dialogue_features
		self.test_x = self.data.test_dialogue_features
    
		self.train_y = self.data.train_dialogue_label
		self.val_y = self.data.val_dialogue_label
		self.test_y = self.data.test_dialogue_label
    
		self.train_mask = self.data.train_mask
		self.val_mask = self.data.val_mask
		self.test_mask = self.data.test_mask
    
		self.train_id = self.data.train_dialogue_ids.keys()
		self.val_id = self.data.val_dialogue_ids.keys()
		self.test_id = self.data.test_dialogue_ids.keys()
    
		self.sequence_length = self.train_x.shape[1]

		self.classes = self.train_y.shape[2]
    
		self.epochs = 20
		self.batch_size = 50

		if m == "text":
			self.train_x_text = self.train_x
			self.val_x_text = self.val_x
			self.test_x_text = self.test_x

			self.train_y_text = self.train_y
			self.val_y_text = self.val_y 
			self.test_y_text = self.test_y 
			
			self.train_mask_text = self.train_mask 
			self.val_mask_text = self.val_mask 
			self.test_mask_text = self.test_mask
			
			self.train_id_text = self.train_id 
			self.val_id_text = self.val_id 
			self.test_id_text = self.test_id 

			self.sequence_length_text = self.sequence_length

		if m == "audio":
			self.train_x_audio = self.train_x
			self.val_x_audio = self.val_x
			self.test_x_audio = self.test_x

			self.train_y_audio = self.train_y
			self.val_y_audio = self.val_y 
			self.test_y_audio = self.test_y 
			
			self.train_mask_audio = self.train_mask 
			self.val_mask_audio = self.val_mask 
			self.test_mask_audio = self.test_mask
			
			self.train_id_audio = self.train_id 
			self.val_id_audio = self.val_id 
			self.test_id_audio = self.test_id 
			self.sequence_length_audio = self.sequence_length

	def get_text_lstm(self):
		self.sentence_length = self.train_x.shape[2]
    
		self.embedding_dim = self.data.W.shape[1]
    
		self.vocabulary_size = self.data.W.shape[0]
		
		embedding = Embedding(input_dim=self.vocabulary_size, output_dim=self.embedding_dim, weights=[self.data.W], input_length=self.sentence_length, trainable=False)
    
		def slicer(x, index):
			return x[:,K.constant(index, dtype='int32'),:]
    
		def slicer_output_shape(input_shape):
			shape = list(input_shape)
			assert len(shape) == 3  # batch, seq_len, sent_len
			new_shape = (shape[0], shape[2])
			return new_shape

		def reshaper(x):
			return K.expand_dims(x, axis=3)
    
		def flattener(x):
			x = K.reshape(x, [-1,x.shape[1]*x.shape[2]])
			return x

		def flattener_output_shape(input_shape):
			shape = list(input_shape)
			new_shape = (shape[0], shape[2]*shape[1])
			return new_shape

		inputs = Input(shape=(self.sequence_length, self.sentence_length), dtype='int32')
		
		
		output = []
		for ind in range(self.sequence_length):
			local_input = Lambda(slicer, output_shape=slicer_output_shape, arguments={"index":ind})(inputs) # Batch, word_indices

			emb_output = embedding(local_input)
			reshape = Lambda(reshaper)(emb_output)

			flatten = Lambda(flattener, output_shape=flattener_output_shape,)(reshape)

			output.append(flatten)

		def stack(x):
			return K.stack(x, axis=1)
      
		outputs = Lambda(stack)(output)
		masked = Masking(mask_value =0)(outputs)
		
		lstm = Bidirectional(LSTM(200, activation='relu', return_sequences = True, dropout=0.3), name = 'lstm_t')(masked)
		self.text_lstm_layer = lstm
		print("TEXT LSTM ", lstm)
		at_layer = HanAttention()
		at_layer.build(lstm.shape)
		attn_scores = at_layer.call([lstm])
		concat_output2 = Concatenate(axis=-1, name='concat_layer')([attn_scores,lstm])
		lstm = Bidirectional(LSTM(200, activation='relu', return_sequences = True, dropout=0.3), name="utter_t")(concat_output2)
		output = TimeDistributed(Dense(self.classes,activation='softmax',kernel_initializer='uniform'))(lstm)

		print("TEXT OUTPUT ", output)
		model = Model(inputs, output)

		#model.summary()

		self.text_lstm =  model

		return lstm, inputs

	def get_audio_lstm(self):

		self.embedding_dim = self.train_x.shape[2]

		print("Creating Model...")
		
		inputs = Input(shape=(self.sequence_length, self.embedding_dim), dtype='float32')
		masked = Masking(mask_value =0)(inputs)
		lstm = Bidirectional(LSTM(200, activation='tanh', return_sequences = True, dropout=0.4), name='lstm_a')(masked)
		self.audio_lstm_layer = lstm

		at_layer = HanAttention()
		at_layer.build(lstm.shape)
		attn_scores = at_layer.call([lstm])
		concat_output2 = Concatenate(axis=-1, name='concat_layer')([attn_scores,lstm])
	
		lstm = Bidirectional(LSTM(200, activation='tanh', return_sequences = True, dropout=0.4), name="utter_a")(concat_output2)
		output = TimeDistributed(Dense(self.classes,activation='softmax',kernel_initializer='uniform'))(lstm)

		model = Model(inputs, output)

		self.audio_lstm = model

		return lstm, inputs

	def get_final_model(self, tl, al, ti, ai):
		#attn_out = tensorflow.keras.layers.Attention()([tl, al])
	
		'''#concat_output = Concatenate(axis=-1, name='concat_layer')([tl, attn_out])
		concat_output = Concatenate(axis=-1, name='concat_layer')([tl, al])
		batch_size = tf.shape(concat_output)[0]
		W_3d = tf.tile(tf.expand_dims(self.W, axis=0), tf.stack([batch_size, 1, 1]))
	  #[batch_size, steps, features]
		input_projection = tf.matmul(concat_output,inputs, W_3d)
		
		input_projection = tf.tanh(input_projection)
	
		len_right = 27
		#print("PPPPPPPPPP ", len_right)
		len_left = 27
		#print("WWWWWWWWWW ", len_left)
		#tensor_left = tf.expand_dims(al, axis=2)
		#tensor_right = tf.expand_dims(tl, axis=1)
		#tensor_left = tf.tile(tensor_left, [1, 1, len_right, 1])
		#tensor_right = tf.tile(tensor_right, [1, len_left, 1, 2])
		#tensor_merged = tf.concat([tl, al], axis=-1)
		#tensor_merged = Concatenate(axis=-1, name='concat_layer')([tensor_right, tensor_left])
		#middle_output = TimeDistributed(Dense(self.classes, activation='tanh'))(tensor_merged)
		#middle_output = Dense(self.classes,activation = 'tanh')(tensor_merged)
		#attn_scores = TimeDistributed(Dense(1))(middle_output)
		attn_scores = Dense(1)(input_projection)
		attn_scores = tf.squeeze(attn_scores, axis=3)
		exp_attn_scores = tf.exp(attn_scores -
                             tf.reduce_max(attn_scores, axis=-1, keepdims=True))
		exp_sum = tf.reduce_sum(exp_attn_scores, axis=-1, keepdims=True)
		attention_weights = exp_attn_scores / exp_sum
		
		print("WEIGHTS ", attention_weights)

		concat_output = tf.matmul(attention_weights, tl)'''

		'''#concat_output = Concatenate(axis=-1, name='concat_layer')([tl, al])
		attention_layer = MyLayer()([tl,al])
		##attn_sum = 0
		#for g in range(attn_scores.shape[0]):
		#	attn_sum = attn_sum + np.exp(attn_scores[g])
		#attention_layer = tf.squeeze(attn_scores, axis=3)
		attention_layer.build()([tl,al])
		attn_scores = attention_layer.call()([tl,al])
		print("ATTENTION LAYER" , attn_scores)
		exp_attn_scores = tf.exp(attn_scores -
                             tf.reduce_max(attn_scores, axis=-1, keepdims=True))
		exp_sum = tf.reduce_sum(exp_attn_scores, axis=-1, keepdims=True)
		attention_weights = exp_attn_scores / exp_sum
		
		print("WEIGHTS ", attention_weights)

		concat_output = tf.matmul(attention_weights, tl)
	 
		attention_weights = exp_attn_scores / attn_sum
		
		print("WEIGHTS ", attention_weights)

		concat_output = tf.matmul(attention_weights, tl)



		#att_out=attention()(concat_output)
		#concat_output = Concatenate(axis=-1, name='concat_layer')([tl, attn_out])'''

		'''print("text_length ", tl.shape)
		print("audio_length ", al.shape)
	
		#u_weight = np.ones((tl.shape))
		#v_weight = np.ones((al.shape))
	
		#print("u_weight ", u_weight)

		al_tl = tl+al

		#print("al_tl_collective_length ", al_tl_collective.shape)
		
		alpha = TimeDistributed(Dense(self.classes,activation='tanh'))(al_tl)
		attn_scores = TimeDistributed(Dense(1))(alpha)
		
		exp_attn_scores = tf.exp(attn_scores -
                             tf.reduce_max(attn_scores, axis=-1, keepdims=True))
		exp_sum = tf.reduce_sum(exp_attn_scores, axis=-1, keepdims=True)
		attention_weights = exp_attn_scores / exp_sum
		print("WEIGHTS ", attention_weights)

		concat_output = tf.matmul(attention_weights, tl)
		
		print(tl.shape)
		print(concat_output.shape)
	
		concat_output2 = Concatenate(axis=-1, name='concat_layer')([tl, concat_output])

		print("concat2.shape ",concat_output2.shape)'''

		at_layer = HanAttention()
		at_layer.build(tl.shape)
		attn_scores = at_layer.call([tl,al])
	
		print("AT_OUTPUT ", attn_scores)
		
		#multi_head = tf.estimator.MultiHead(ti)(attn_scores)
		
		#exp_attn_scores = tf.exp(attn_scores -
    #                         tf.reduce_max(attn_scores, axis=-1, keepdims=True))
		#exp_sum = tf.reduce_sum(exp_attn_scores, axis=-1, keepdims=True)
		#attention_weights = exp_attn_scores / exp_sum
		#print("WEIGHTS ", attention_weights)

		#concat_output = tf.matmul(attention_weights, tl)
		
		#print(multi_head.shape)
		#print(tl.shape)
	
		#multi_head = tf.reshape(multi_head,tl.shape)
		concat_output2 = Concatenate(axis=-1, name='concat_layer')([attn_scores,tl])

		#multi_head = MultiHead( tf.keras.layers.LSTM(units=32), layer_num=5, name='Multi-Head')(concat_output2)
		lstm = Bidirectional(LSTM(200, activation='tanh', return_sequences = True, dropout=0.4), name='lstm_f')(concat_output2)
		output = TimeDistributed(Dense(self.classes,activation='softmax',kernel_initializer='uniform'))(lstm)

		print("ALL OUTPUT ", output)
		self.merged_model = Model([ti, ai], output)
		
	def train_lstm(self, m):
		if m == 'text':
			model = self.text_lstm
		elif m == 'audio':
			model = self.audio_lstm
		
		model.compile(optimizer='adam', loss='categorical_crossentropy', sample_weight_mode='temporal')
		early_stopping = EarlyStopping(monitor='val_loss', patience=20)

		model.fit(self.train_x, self.train_y,
		                epochs=self.epochs,
		                batch_size=self.batch_size,
		                sample_weight=self.train_mask,
		                shuffle=True, 
		                callbacks=[early_stopping],
		                validation_data=(self.val_x, self.val_y, self.val_mask))

		#logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
		#tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

		#summary = model.fit(self.train_x, self.train_y,
		#                epochs=self.epochs,
		#                batch_size=self.batch_size,
		#                sample_weight=self.train_mask,
		#                shuffle=True, 
		#                callbacks=[early_stopping, tensorboard_callback],
		#                validation_data=(self.val_x, self.val_y, self.val_mask))
		#filename = '/content/drive/My Drive/mca/MCA_Project/MELD_Dataset/audio_att_model2.sav'
		#pickle.dump(open(filename, 'wb'),model)
		
		#output5 = open('/content/drive/My Drive/Audio_Model_Attention'+ '.pickle', 'wb')
		#pickle.dump(model, output5)
	
		self.test_model(m)
		return model
	
	def train_network(self):
		model = self.merged_model
		#print("HELLOOO")
		#multi_head = MultiHeadAttention( head_num=5, name='Multi-Head' )(lstm)
		model.compile(optimizer='adam', loss='categorical_crossentropy', sample_weight_mode='temporal')
		early_stopping = EarlyStopping(monitor='loss', patience=10)
	
		
		logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
		tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
		
		#model.summary()
		print(self.train_x_text.shape)
		print(self.train_x_audio.shape)
		print(self.train_y.shape)
		print(self.epochs)
		print(self.batch_size)
		model.fit([self.train_x_text, self.train_x_audio], self.train_y, epochs=self.epochs,batch_size=self.batch_size)
	
		return model


	def test_model(self, m):
		if m == 'text':
			model = self.text_lstm
			#intermediate_layer_model = Model(input=model.input, output=model.get_layer("lstm_t").output)
		elif m == 'audio':
			model = self.audio_lstm
			#intermediate_layer_model = Model(input=model.input, output=model.get_layer("lstm_a").output)
		elif m == 'merged':
			model = self.merged_model
			#intermediate_layer_model = Model(input=model.input, output=model.get_layer("lstm_f").output)

		'''
		intermediate_output_train = intermediate_layer_model.predict(self.train_x)
		intermediate_output_val = intermediate_layer_model.predict(self.val_x)
		intermediate_output_test = intermediate_layer_model.predict(self.test_x)

		train_emb, val_emb, test_emb = {}, {}, {}
		for idx, ID in enumerate(self.train_id):
		    train_emb[ID] = intermediate_output_train[idx]
		for idx, ID in enumerate(self.val_id):
		    val_emb[ID] = intermediate_output_val[idx]
		for idx, ID in enumerate(self.test_id):
		    test_emb[ID] = intermediate_output_test[idx]
		'''

		filename2 = '/content/drive/My Drive/mca/MCA_Project/Final_files/test_x_text_text.pkl'
		outfile2 = open(filename2, 'wb')
		np.save(outfile2, self.test_x)
	
		filename5 = '/content/drive/My Drive/mca/MCA_Project/Final_files/test_y_text.pkl'
		outfile5 = open(filename5, 'wb')
		np.save(outfile5, self.test_y)

		filename6 = '/content/drive/My Drive/mca/MCA_Project/Final_files/test_mask_text.pkl'
		outfile6 = open(filename6, 'wb')
		np.save(outfile6, self.test_mask)


		calc_test_result(model.predict(self.test_x), self.test_y, self.test_mask)
		
def calc_test_result(pred_label, test_label, test_mask):

		true_label=[]
		predicted_label=[]

		for i in range(pred_label.shape[0]):
			for j in range(pred_label.shape[1]):
				if test_mask[i,j]==1:
					true_label.append(np.argmax(test_label[i,j] ))
					predicted_label.append(np.argmax(pred_label[i,j] ))
		print("Confusion Matrix :")
		print(confusion_matrix(true_label, predicted_label))
		print("Classification Report :")
		print(classification_report(true_label, predicted_label, digits=4))
		print('Weighted FScore: \n ', precision_recall_fscore_support(true_label, predicted_label, average='weighted'))



In [0]:
!pip install tensorflow.estimator.MultiHead

[31mERROR: Could not find a version that satisfies the requirement tensorflow.estimator.MultiHead (from versions: none)[0m
[31mERROR: No matching distribution found for tensorflow.estimator.MultiHead[0m


In [0]:
from tensorflow.keras.layers import Layer

class HanAttention(Layer):
  """
  Refer to [Hierarchical Attention Networks for Document Classification]
    (https://www.cs.cmu.edu/~hovy/papers/16HLT-hierarchical-attention-networks.pdf)
    wrap `with tf.variable_scope(name, reuse=tf.AUTO_REUSE):`
  Input shape: (Batch size, steps, features)
  Output shape: (Batch size, features)
  """

  def __init__(self,
               W_regularizer=None,
               u_regularizer=None,
               b_regularizer=None,
               W_constraint=None,
               u_constraint=None,
               b_constraint=None,
               use_bias=True,
               **kwargs):

    super().__init__(**kwargs)
    self.supports_masking = True
    self.init = tf.keras.initializers.get('glorot_uniform')

    self.W_regularizer = tf.keras.regularizers.get(W_regularizer)
    self.u_regularizer = tf.keras.regularizers.get(u_regularizer)
    self.b_regularizer = tf.keras.regularizers.get(b_regularizer)

    self.W_constraint = tf.keras.constraints.get(W_constraint)
    self.u_constraint = tf.keras.constraints.get(u_constraint)
    self.b_constraint = tf.keras.constraints.get(b_constraint)

    self.use_bias = use_bias

  def build(self, input_shape):
    # pylint: disable=attribute-defined-outside-init
    #assert len(input_shape) == 3

    self.W = self.add_weight(
        name='{}_W'.format(self.name),
        shape=(
            int(input_shape[-1]),
            int(input_shape[-1]),
        ),
        initializer=self.init,
        regularizer=self.W_regularizer,
        constraint=self.W_constraint)

    if self.use_bias:
      self.b = self.add_weight(
          name='{}_b'.format(self.name),
          shape=(int(input_shape[-1]),),
          initializer='zero',
          regularizer=self.b_regularizer,
          constraint=self.b_constraint)

    self.attention_context_vector = self.add_weight(
        name='{}_att_context_v'.format(self.name),
        shape=(int(input_shape[-1]),),
        initializer=self.init,
        regularizer=self.u_regularizer,
        constraint=self.u_constraint)
    self.built = True

  # pylint: disable=missing-docstring, no-self-use
  def compute_mask(self, inputs, mask=None):  # pylint: disable=unused-argument
    # do not pass the mask to the next layers
    return None

  
  def call(self, inputs, training=None, mask=None):
    batch_size = tf.shape(inputs)[1]
    W_3d = tf.tile(tf.expand_dims(self.W, axis=0), tf.stack([batch_size, 1, 1]))
    # [batch_size, steps, features]
    input_projection = tf.matmul(inputs, W_3d)

    if self.use_bias:
      input_projection += self.b

    input_projection = tf.tanh(input_projection)

    # [batch_size, steps, 1]
    similaritys = tf.reduce_sum(
        tf.multiply(input_projection, self.attention_context_vector),
        axis=2,
        keep_dims=True)

    # [batch_size, steps, 1]
    if mask is not None:
      attention_weights = masked_softmax(similaritys, mask, axis=1)
    else:
      attention_weights = tf.nn.softmax(similaritys, axis=1)

    # [batch_size, features]
    attention_output = tf.reduce_sum(
        tf.multiply(inputs, attention_weights), axis=0)
    return attention_output

  # pylint: disable=no-self-use

  def compute_output_shape(self, input_shape):
    """compute output shape"""
    return input_shape[0], input_shape[-1]

In [18]:
N = Network1()

"""FOR TEXT"""
N.load_data("text")
tl, ti = N.get_text_lstm()
M = N.train_lstm("text")

"""FOR AUDIO"""
#N.load_data("audio")
#al, ai = N.get_audio_lstm()
#M = N.train_lstm("audio")

 
"""BIMODAL"""
N.load_data("text")
tl, ti = N.get_text_lstm()

#N.load_data("audio")
#al, ai = N.get_audio_lstm()

#N.get_final_model(tl, al, ti, ai)

#M = N.train_network()

#model3 = N.merged_model

#calc_test_result(model3.predict([N.test_x_text, N.test_x_audio]), N.test_y, N.test_mask)



"""filename2 = '/content/drive/My Drive/mca/MCA_Project/Final_files/Bimodal_test_x_text.pkl'
outfile2 = open(filename2, 'wb')
np.save(outfile2, N.test_x_text)

filename3 = '/content/drive/My Drive/mca/MCA_Project/Final_files/Bimodal_test_x_audio.pkl'
outfile3 = open(filename3, 'wb')
np.save(outfile3, N.test_x_audio)

filename5 = '/content/drive/My Drive/mca/MCA_Project/Final_files/Bimodal_test_y.pkl'
outfile5 = open(filename5, 'wb')
np.save(outfile5, N.test_y)

filename6 = '/content/drive/My Drive/mca/MCA_Project/Final_files/Bimodal_test_mask.pkl'
outfile6 = open(filename6, 'wb')
np.save(outfile6, N.test_mask)"""





Model initiated for emotion classification
Loading data
Labels used for this classification:  {'neutral': 0, 'surprise': 1, 'fear': 2, 'sadness': 3, 'joy': 4, 'disgust': 5, 'anger': 6}
{'0_0': '0_0', '0_1': '0_1', '0_2': '0_2', '0_3': '0_3', '0_4': '0_5', '0_5': '0_6', '0_6': '0_7', '0_7': '0_8', '0_8': '0_9', '0_9': '0_11', '0_10': '0_13', '1_0': '1_1', '1_1': '1_4', '1_2': '1_5', '1_3': '1_8', '2_0': '2_0', '2_1': '2_1', '2_2': '2_2', '2_3': '2_3', '2_4': '2_5', '2_5': '2_7', '2_6': '2_8', '2_7': '2_9', '2_8': '2_10', '2_9': '2_11', '2_10': '2_12', '3_0': '3_3', '3_1': '3_4', '3_2': '3_5', '3_3': '3_8', '3_4': '3_9', '4_0': '4_1', '4_1': '4_3', '4_2': '4_4', '4_3': '4_5', '4_4': '4_6', '4_5': '4_7', '4_6': '4_8', '4_7': '4_9', '4_8': '4_10', '4_9': '4_11', '4_10': '4_12', '4_11': '4_13', '4_12': '4_14', '5_0': '5_0', '5_1': '5_1', '5_2': '5_2', '6_0': '6_0', '6_1': '6_2', '6_2': '6_3', '6_3': '6_4', '6_4': '6_5', '6_5': '6_6', '6_6': '6_7', '6_7': '6_9', '6_8': '6_10', '6_9': '6_11',

"filename2 = '/content/drive/My Drive/mca/MCA_Project/Final_files/Bimodal_test_x_text.pkl'\noutfile2 = open(filename2, 'wb')\nnp.save(outfile2, N.test_x_text)\n\nfilename3 = '/content/drive/My Drive/mca/MCA_Project/Final_files/Bimodal_test_x_audio.pkl'\noutfile3 = open(filename3, 'wb')\nnp.save(outfile3, N.test_x_audio)\n\nfilename5 = '/content/drive/My Drive/mca/MCA_Project/Final_files/Bimodal_test_y.pkl'\noutfile5 = open(filename5, 'wb')\nnp.save(outfile5, N.test_y)\n\nfilename6 = '/content/drive/My Drive/mca/MCA_Project/Final_files/Bimodal_test_mask.pkl'\noutfile6 = open(filename6, 'wb')\nnp.save(outfile6, N.test_mask)"

In [14]:
M.save("/content/drive/My Drive/mca/MCA_Project/Testing5.tf")

ValueError: ignored

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
from tensorflow.keras.models import load_model
model = load_model('/content/drive/My Drive/mca/MCA_Project/Final_files/Bimodal_AttentionLayer_Model.h5')

In [0]:
calc_test_result(model.predict(N.test_x), N.test_y, N.test_mask)

In [0]:
fn_dict = '/content/drive/My Drive/mca/MCA_Project/MELD_Dataset/Audio_AttentionLayer_Model.sav'
outfile = open(fn_dict, 'wb')
np.save(outfile,N.audio_lstm)

In [0]:
import tensorflow
print(tensorflow.__version__)

In [0]:
pip install keras-multi-head

In [0]:
%tensorboard --logdir logs/scalars