<a href="https://colab.research.google.com/github/anitayadav3/EmotionRecognitionInConversation/blob/master/Final_Run.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pickle

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


**Combining the pretrained ConceptNet Features **

In [5]:
with open('/content/processed_data1.pkl', 'rb') as f:
    processed_data1 = pickle.load(f)
with open('/content/processed_data2.pkl', 'rb') as f:
    processed_data2 = pickle.load(f)
with open('/content/processed_data3.pkl', 'rb') as f:
    processed_data3 = pickle.load(f)

In [6]:
processed_data = processed_data1 + processed_data2 + processed_data3

In [7]:
with open('/content/test_processed_data1.pkl', 'rb') as f:
    test_processed_data1 = pickle.load(f)
with open('/content/test_processed_data2.pkl', 'rb') as f:
    test_processed_data2 = pickle.load(f)
with open('/content/test_processed_data3.pkl', 'rb') as f:
    test_processed_data3 = pickle.load(f)

In [8]:
test_processed_data = test_processed_data1 + test_processed_data2 + test_processed_data3

In [9]:
with open('/content/gdrive/My Drive/iemocap/train/labels.pkl', 'rb') as f:
    labels = pickle.load(f)
with open('/content/gdrive/My Drive/iemocap/test/labels.pkl', 'rb') as f:
    test_labels = pickle.load(f)

In [10]:
def preprocessing(labels):
  processed_label=[]
  for i in labels:
    for j in i:
      processed_label.append(j)
  return processed_label

In [11]:
processed_label = preprocessing(labels)
test_processed_label = preprocessing(test_labels)

**BERT FE and Model Creation**

In [12]:
#Imports for Model
import tensorflow_hub as hub
import numpy as np
from keras.utils.np_utils import to_categorical 

In [13]:
!wget --quiet https://raw.githubusercontent.com/tensorflow/models/master/official/nlp/bert/tokenization.py
!pip install sentencepiece
import tokenization

Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/14/67/e42bd1181472c95c8cda79305df848264f2a7f62740995a46945d9797b67/sentencepiece-0.1.95-cp36-cp36m-manylinux2014_x86_64.whl (1.2MB)
[K     |▎                               | 10kB 23.7MB/s eta 0:00:01[K     |▌                               | 20kB 30.3MB/s eta 0:00:01[K     |▉                               | 30kB 23.8MB/s eta 0:00:01[K     |█                               | 40kB 27.6MB/s eta 0:00:01[K     |█▍                              | 51kB 28.1MB/s eta 0:00:01[K     |█▋                              | 61kB 30.4MB/s eta 0:00:01[K     |██                              | 71kB 20.3MB/s eta 0:00:01[K     |██▏                             | 81kB 21.5MB/s eta 0:00:01[K     |██▌                             | 92kB 20.2MB/s eta 0:00:01[K     |██▊                             | 102kB 19.8MB/s eta 0:00:01[K     |███                             | 112kB 19.8MB/s eta 0:00:01[K     |███▎        

In [14]:
def bert_encode(texts, tokenizer, max_len=512):
    all_tokens = []
    all_masks = []
    all_segments = []
    
    for text in texts:
        text = tokenizer.tokenize(text)
            
        text = text[:max_len-2]
        input_sequence = ["[CLS]"] + text + ["[SEP]"]
        pad_len = max_len - len(input_sequence)
        
        tokens = tokenizer.convert_tokens_to_ids(input_sequence)
        tokens += [0] * pad_len
        pad_masks = [1] * len(input_sequence) + [0] * pad_len
        segment_ids = [0] * max_len
        
        all_tokens.append(tokens)
        all_masks.append(pad_masks)
        all_segments.append(segment_ids)
    
    return np.array(all_tokens), np.array(all_masks), np.array(all_segments)

In [15]:
def build_model(bert_layer, max_len=512):
    input_word_ids = Input(shape=(max_len,), dtype=tf.int32, name="input_word_ids")
    input_mask = Input(shape=(max_len,), dtype=tf.int32, name="input_mask")
    segment_ids = Input(shape=(max_len,), dtype=tf.int32, name="segment_ids")

    pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])
    clf_output = pooled_output
    clf_output1=tf.reshape(clf_output,[1,1,1024])
    gru_output1 = GRU(100, input_shape=(1024,), return_sequences=True)(clf_output1)
    dp_output1 = Dropout(0.2)(gru_output1)
    gru_output2 = GRU(100, return_sequences=True)(dp_output1)
    dp_output2 = Dropout(0.2)(gru_output2)
    gru_output3 = GRU(100)(dp_output2)
    dp_output3 = Dropout(0.2)(gru_output3)
    out = Dense(6, activation='softmax')(dp_output3)
    
    model = Model(inputs=[input_word_ids, input_mask, segment_ids], outputs=out)
    model.compile(Adam(lr=2e-6), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [16]:
%%time
module_url = "https://tfhub.dev/tensorflow/bert_en_uncased_L-24_H-1024_A-16/1"
bert_layer = hub.KerasLayer(module_url, trainable=True)

CPU times: user 20.9 s, sys: 4.33 s, total: 25.2 s
Wall time: 30.5 s


**Processing the text for passing to BERT pretrained model**

In [17]:
processed_data=np.asarray(processed_data)
test_processed_data=np.asarray(test_processed_data)
Y=to_categorical(processed_label, num_classes=6)
test_Y=to_categorical(test_processed_label, num_classes=6)

In [18]:
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)

In [19]:
train_input = bert_encode(processed_data, tokenizer, max_len=458)
test_input = bert_encode(test_processed_data, tokenizer, max_len=458)
train_labels = Y
test_labels = test_Y

**Creating the Model**

In [25]:
#Import for model
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, GRU, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

In [21]:
model = build_model(bert_layer, max_len=458)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_word_ids (InputLayer)     [(None, 458)]        0                                            
__________________________________________________________________________________________________
input_mask (InputLayer)         [(None, 458)]        0                                            
__________________________________________________________________________________________________
segment_ids (InputLayer)        [(None, 458)]        0                                            
__________________________________________________________________________________________________
keras_layer (KerasLayer)        [(None, 1024), (None 335141889   input_word_ids[0][0]             
                                                                 input_mask[0][0]             

In [22]:
train_history = model.fit(
    train_input, train_labels,
    epochs=6,
    batch_size=1
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [27]:
y_pred=model.predict(test_input, batch_size=1)

In [28]:
y_pred1=np.argmax(y_pred,axis=1)
test_processed_label=np.asarray(test_processed_label)
print("Accuracy : " + str(accuracy_score(test_processed_label, y_pred1)))
print("Weighted F1-score : " + str(f1_score(test_processed_label, y_pred1, average='weighted')))

Accuracy : 0.4824399260628466
Weighted F1-score : 0.4567251926053884
