In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [3]:
import argparse
from keras.layers import Input, Dense, Embedding, Conv2D, MaxPool2D, Lambda, LSTM, TimeDistributed, Masking, Bidirectional
from keras.layers import Reshape, Flatten, Dropout, Concatenate
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.optimizers import Adam
from keras.models import Model, load_model
import keras.backend as K
from sklearn.model_selection import train_test_split#
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
import os, pickle, sys
import numpy as np
from collections import Counter, defaultdict
import pandas as pd

Using TensorFlow backend.


In [5]:
max_length=50 

class Dataloader:
    
    def __init__(self, mode=None):

        try:
            assert(mode is not None)
        except AssertionError as e:
            print("Set mode as 'Sentiment''Emotion'")
            exit()

        self.MODE = mode 
        self.max_l = max_length

        x = pickle.load(open("/content/drive/My Drive/MELD-master/data/pickles/data_{}.p".format(self.MODE.lower()),"rb"))
        revs, self.W, self.word_idx_map, self.vocab, _, label_index = x[0], x[1], x[2], x[3], x[4], x[5]
        self.num_classes = len(label_index)
        print("Labels used for this classification: ", label_index)


        
        self.train_data, self.val_data, self.test_data = {},{},{}
        for i in range(len(revs)):
            
            utterance_id = revs[i]['dialog']+"_"+revs[i]['utterance']
            sentence_word_indices = self.get_word_indices(revs[i]['text'])
            label = label_index[revs[i]['y']]

            if revs[i]['split']=="train":
                self.train_data[utterance_id]=(sentence_word_indices,label)
            elif revs[i]['split']=="val":
                self.val_data[utterance_id]=(sentence_word_indices,label)
            elif revs[i]['split']=="test":
                self.test_data[utterance_id]=(sentence_word_indices,label)


       
        self.train_dialogue_ids = self.get_dialogue_ids(self.train_data.keys())
        self.val_dialogue_ids = self.get_dialogue_ids(self.val_data.keys())
        self.test_dialogue_ids = self.get_dialogue_ids(self.test_data.keys())

        
        self.max_utts = self.get_max_utts(self.train_dialogue_ids, self.val_dialogue_ids, self.test_dialogue_ids)


    def get_word_indices(self, data_x):
        length = len(data_x.split())
        return np.array([self.word_idx_map[word] for word in data_x.split()] + [0]*(self.max_l-length))[:self.max_l]

    def get_dialogue_ids(self, keys):
        ids=defaultdict(list)
        for key in keys:
            ids[key.split("_")[0]].append(int(key.split("_")[1]))
        for ID, utts in ids.items():
            ids[ID]=[str(utt) for utt in sorted(utts)]
        return ids

    def get_max_utts(self, train_ids, val_ids, test_ids):
        max_utts_train = max([len(train_ids[vid]) for vid in train_ids.keys()])
        max_utts_val = max([len(val_ids[vid]) for vid in val_ids.keys()])
        max_utts_test = max([len(test_ids[vid]) for vid in test_ids.keys()])
        return np.max([max_utts_train, max_utts_val, max_utts_test])

    def get_one_hot(self, label):
        label_arr = [0]*self.num_classes
        label_arr[label]=1
        return label_arr[:]


    def get_dialogue_text_embs(self):
        key = list(self.train_data.keys())[0]
        pad = [0]*len(self.train_data[key][0])

        def get_emb(dialogue_id, local_data):
            dialogue_text = []
            for vid in dialogue_id.keys():
                local_text = []
                for utt in dialogue_id[vid]:
                    local_text.append(local_data[vid+"_"+str(utt)][0][:])
                for _ in range(self.max_utts-len(local_text)):
                    local_text.append(pad[:])
                dialogue_text.append(local_text[:self.max_utts])
            return np.array(dialogue_text)

        self.train_dialogue_features = get_emb(self.train_dialogue_ids, self.train_data)
        self.val_dialogue_features = get_emb(self.val_dialogue_ids, self.val_data)
        self.test_dialogue_features = get_emb(self.test_dialogue_ids, self.test_data)


    def get_dialogue_labels(self):

        def get_labels(ids, data):
            dialogue_label=[]

            for vid, utts in ids.items():
                local_labels=[]
                for utt in utts:
                    local_labels.append(self.get_one_hot(data[vid+"_"+str(utt)][1]))
                for _ in range(self.max_utts-len(local_labels)):
                    local_labels.append(self.get_one_hot(1)) # Dummy label
                dialogue_label.append(local_labels[:self.max_utts])
            return np.array(dialogue_label)

        self.train_dialogue_label=get_labels(self.train_dialogue_ids, self.train_data)
        self.val_dialogue_label=get_labels(self.val_dialogue_ids, self.val_data)
        self.test_dialogue_label=get_labels(self.test_dialogue_ids, self.test_data)

    def get_dialogue_lengths(self):

        self.train_dialogue_length, self.val_dialogue_length, self.test_dialogue_length=[], [], []
        for vid, utts in self.train_dialogue_ids.items():
            self.train_dialogue_length.append(len(utts))
        for vid, utts in self.val_dialogue_ids.items():
            self.val_dialogue_length.append(len(utts))
        for vid, utts in self.test_dialogue_ids.items():
            self.test_dialogue_length.append(len(utts))

    def get_masks(self):

        self.train_mask = np.zeros((len(self.train_dialogue_length), self.max_utts), dtype='float')
        for i in range(len(self.train_dialogue_length)):
            self.train_mask[i,:self.train_dialogue_length[i]]=1.0
        self.val_mask = np.zeros((len(self.val_dialogue_length), self.max_utts), dtype='float')
        for i in range(len(self.val_dialogue_length)):
            self.val_mask[i,:self.val_dialogue_length[i]]=1.0
        self.test_mask = np.zeros((len(self.test_dialogue_length), self.max_utts), dtype='float')
        for i in range(len(self.test_dialogue_length)):
            self.test_mask[i,:self.test_dialogue_length[i]]=1.0


    def load_text_data(self, ):

        self.get_dialogue_text_embs()
        self.get_dialogue_lengths()
        self.get_dialogue_labels()
        self.get_masks()


    

In [6]:
class bc_LSTM:

	def __init__(self, args):
		self.classification_mode = args.classify
		self.modality = args.modality
		self.PATH = "/content/drive/My Drive/MELD-master/data/models/{}_weights_{}.hdf5".format(args.modality,self.classification_mode.lower())
		self.OUTPUT_PATH = "/content/drive/My Drive/MELD-master/data/pickles/{}_{}.pkl".format(args.modality,self.classification_mode.lower())
		print("Model initiated for {} classification".format(self.classification_mode))


	def load_data(self,):

		print('Loading data')
		self.data = Dataloader(mode = self.classification_mode)

		self.data.load_text_data()

		self.train_x = self.data.train_dialogue_features
		self.val_x = self.data.val_dialogue_features
		self.test_x = self.data.test_dialogue_features

		self.train_y = self.data.train_dialogue_label
		self.val_y = self.data.val_dialogue_label
		self.test_y = self.data.test_dialogue_label

		self.train_mask = self.data.train_mask
		self.val_mask = self.data.val_mask
		self.test_mask = self.data.test_mask

		self.train_id = self.data.train_dialogue_ids.keys()
		self.val_id = self.data.val_dialogue_ids.keys()
		self.test_id = self.data.test_dialogue_ids.keys()

		self.sequence_length = self.train_x.shape[1]
		
		self.classes = self.train_y.shape[2]
			


	def calc_test_result(self, pred_label, test_label, test_mask):

		true_label=[]
		predicted_label=[]

		for i in range(pred_label.shape[0]):
			for j in range(pred_label.shape[1]):
				if test_mask[i,j]==1:
					true_label.append(np.argmax(test_label[i,j] ))
					predicted_label.append(np.argmax(pred_label[i,j] ))
		print("Confusion Matrix :")
		print(confusion_matrix(true_label, predicted_label))
		print("Classification Report :")
		print(classification_report(true_label, predicted_label, digits=4))
		print('Weighted FScore: \n ', precision_recall_fscore_support(true_label, predicted_label, average='weighted'))


	def get_text_model(self):

		# Modality specific hyperparameters
		self.epochs = 100
		self.batch_size = 50

		# Modality specific parameters
		self.embedding_dim = self.data.W.shape[1]

		self.vocabulary_size = self.data.W.shape[0]
		self.filter_sizes = [3,4,5]
		self.num_filters = 512


		print("Creating Model...")

		sentence_length = self.train_x.shape[2]


		embedding = Embedding(input_dim=self.vocabulary_size, output_dim=self.embedding_dim, weights=[self.data.W], input_length=sentence_length, trainable=False)
		conv_0 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[0], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')
		conv_1 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[1], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')
		conv_2 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[2], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')
		maxpool_0 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[0] + 1, 1), strides=(1,1), padding='valid')
		maxpool_1 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[1] + 1, 1), strides=(1,1), padding='valid')
		maxpool_2 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[2] + 1, 1), strides=(1,1), padding='valid')
		dense_func = Dense(100, activation='tanh', name="dense")
		dense_final = Dense(units=self.classes, activation='softmax')
		reshape_func = Reshape((sentence_length, self.embedding_dim, 1))

		def slicer(x, index):
			return x[:,K.constant(index, dtype='int32'),:]

		def slicer_output_shape(input_shape):
		    shape = list(input_shape)
		    assert len(shape) == 3  # batch, seq_len, sent_len
		    new_shape = (shape[0], shape[2])
		    return new_shape

		def reshaper(x):
			return K.expand_dims(x, axis=3)

		def flattener(x):
			x = K.reshape(x, [-1, x.shape[1]*x.shape[3]])
			return x

		def flattener_output_shape(input_shape):
		    shape = list(input_shape)
		    new_shape = (shape[0], 3*shape[3])
		    return new_shape

		inputs = Input(shape=(self.sequence_length, sentence_length), dtype='int32')
		cnn_output = []
		for ind in range(self.sequence_length):
			
			local_input = Lambda(slicer, output_shape=slicer_output_shape, arguments={"index":ind})(inputs) # Batch, word_indices
			
			#cnn-sent
			emb_output = embedding(local_input)
			reshape = Lambda(reshaper)(emb_output)
			concatenated_tensor = Concatenate(axis=1)([maxpool_0(conv_0(reshape)), maxpool_1(conv_1(reshape)), maxpool_2(conv_2(reshape))])
			flatten = Lambda(flattener, output_shape=flattener_output_shape,)(concatenated_tensor)
			dense_output = dense_func(flatten)
			dropout = Dropout(0.5)(dense_output)
			cnn_output.append(dropout)

		def stack(x):
			return K.stack(x, axis=1)
		cnn_outputs = Lambda(stack)(cnn_output)

		masked = Masking(mask_value =0)(cnn_outputs)
		lstm = Bidirectional(LSTM(300, activation='relu', return_sequences = True, dropout=0.3))(masked)
		lstm = Bidirectional(LSTM(300, activation='relu', return_sequences = True, dropout=0.3), name="utter")(lstm)
		output = TimeDistributed(Dense(self.classes,activation='softmax'))(lstm)

		model = Model(inputs, output)
		return model



	def train_model(self):

		checkpoint = ModelCheckpoint(self.PATH, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')

		if self.modality == "text":
			model = self.get_text_model()
			model.compile(optimizer='adadelta', loss='categorical_crossentropy', sample_weight_mode='temporal')
		

		early_stopping = EarlyStopping(monitor='val_loss', patience=10)
		model.fit(self.train_x, self.train_y,
		                epochs=self.epochs,
		                batch_size=self.batch_size,
		                sample_weight=self.train_mask,
		                shuffle=True, 
		                callbacks=[early_stopping, checkpoint],
		                validation_data=(self.val_x, self.val_y, self.val_mask))

		self.test_model()



	def test_model(self):

		model = load_model(self.PATH)
		intermediate_layer_model = Model(input=model.input, output=model.get_layer("utter").output)

		intermediate_output_train = intermediate_layer_model.predict(self.train_x)
		intermediate_output_val = intermediate_layer_model.predict(self.val_x)
		intermediate_output_test = intermediate_layer_model.predict(self.test_x)

		train_emb, val_emb, test_emb = {}, {}, {}
		for idx, ID in enumerate(self.train_id):
		    train_emb[ID] = intermediate_output_train[idx]
		for idx, ID in enumerate(self.val_id):
		    val_emb[ID] = intermediate_output_val[idx]
		for idx, ID in enumerate(self.test_id):
		    test_emb[ID] = intermediate_output_test[idx]
		pickle.dump([train_emb, val_emb, test_emb], open(self.OUTPUT_PATH, "wb"))

		self.calc_test_result(model.predict(self.test_x), self.test_y, self.test_mask)
		

In [7]:
if __name__ == "__main__":

	
	parser = argparse.ArgumentParser()
	parser.required=True
	parser.add_argument("-classify", help="Set the classifiction to be 'Emotion' or 'Sentiment'", required=True)
	parser.add_argument("-modality", help="Set the modality to be 'text' or 'audio' or 'bimodal'", required=True)

	args = parser.parse_args(args=['-classify', 'Emotion', '-modality', 'text'])

	

	args.classify = args.classify.title()
	args.modality = args.modality.lower()
	
	
	for directory in ["/content/drive/My Drive/MELD-master/data/pickles", "/content/drive/My Drive/MELD-master/data/models"]:
		if not os.path.exists(directory):
		    os.makedirs(directory)


	model = bc_LSTM(args)
	model.load_data()
 


Model initiated for Emotion classification
Loading data
Labels used for this classification:  {'neutral': 0, 'surprise': 1, 'fear': 2, 'sadness': 3, 'joy': 4, 'disgust': 5, 'anger': 6}


In [6]:
model.train_model()

Creating Model...
Train on 1038 samples, validate on 114 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.48811, saving model to /content/drive/My Drive/MELD-master/data/models/text_weights_emotion.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 0.48811 to 0.48430, saving model to /content/drive/My Drive/MELD-master/data/models/text_weights_emotion.hdf5
Epoch 3/100

Epoch 00003: val_loss improved from 0.48430 to 0.48306, saving model to /content/drive/My Drive/MELD-master/data/models/text_weights_emotion.hdf5
Epoch 4/100

Epoch 00004: val_loss improved from 0.48306 to 0.47972, saving model to /content/drive/My Drive/MELD-master/data/models/text_weights_emotion.hdf5
Epoch 5/100

Epoch 00005: val_loss improved from 0.47972 to 0.47836, saving model to /content/drive/My Drive/MELD-master/data/models/text_weights_emotion.hdf5
Epoch 6/100

Epoch 00006: val_loss improved from 0.47836 to 0.47695, saving model to /content/drive/My Drive/MELD-master/data/models/text_weight



Confusion Matrix :
[[1129   48    0   13   56    0   10]
 [  75  141    0    1   52    0   12]
 [  28    5    0    0    9    0    8]
 [ 141    9    0   14   23    0   21]
 [ 143   33    0    5  203    0   18]
 [  42    9    0    4    3    0   10]
 [ 126   59    0   10   75    0   75]]
Classification Report :
              precision    recall  f1-score   support

           0     0.6704    0.8989    0.7680      1256
           1     0.4638    0.5018    0.4821       281
           2     0.0000    0.0000    0.0000        50
           3     0.2979    0.0673    0.1098       208
           4     0.4822    0.5050    0.4933       402
           5     0.0000    0.0000    0.0000        68
           6     0.4870    0.2174    0.3006       345

    accuracy                         0.5985      2610
   macro avg     0.3430    0.3129    0.3077      2610
weighted avg     0.5349    0.5985    0.5460      2610

Weighted FScore: 
  (0.5349443107685684, 0.5984674329501916, 0.5459611896110692, None)


  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
model.test_model()



Confusion Matrix :
[[1129   48    0   13   56    0   10]
 [  75  141    0    1   52    0   12]
 [  28    5    0    0    9    0    8]
 [ 141    9    0   14   23    0   21]
 [ 143   33    0    5  203    0   18]
 [  42    9    0    4    3    0   10]
 [ 126   59    0   10   75    0   75]]
Classification Report :
              precision    recall  f1-score   support

           0     0.6704    0.8989    0.7680      1256
           1     0.4638    0.5018    0.4821       281
           2     0.0000    0.0000    0.0000        50
           3     0.2979    0.0673    0.1098       208
           4     0.4822    0.5050    0.4933       402
           5     0.0000    0.0000    0.0000        68
           6     0.4870    0.2174    0.3006       345

    accuracy                         0.5985      2610
   macro avg     0.3430    0.3129    0.3077      2610
weighted avg     0.5349    0.5985    0.5460      2610

Weighted FScore: 
  (0.5349443107685684, 0.5984674329501916, 0.5459611896110692, None)


  _warn_prf(average, modifier, msg_start, len(result))
