<a href="https://colab.research.google.com/github/anitayadav3/EmotionRecognitionInConversation/blob/master/Run.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Required Imports**

In [20]:
import pickle
import string
import nltk
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')
import numpy as np
from keras.utils.np_utils import to_categorical  

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


**Loading the training and test data**

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
with open('/content/gdrive/My Drive/Dataset/iemocap/train/sentences.pkl', 'rb') as f:
    data = pickle.load(f)
with open('/content/gdrive/My Drive/Dataset/iemocap/train/labels.pkl', 'rb') as f:
    labels = pickle.load(f)
with open('/content/gdrive/My Drive/Dataset/iemocap/test/sentences.pkl', 'rb') as f:
    test_data = pickle.load(f)
with open('/content/gdrive/My Drive/Dataset/iemocap/test/labels.pkl', 'rb') as f:
    test_labels = pickle.load(f)
with open('/content/gdrive/MyDrive/Dataset/iemocap/train/conversation_length.pkl', 'rb') as f:
    train_convlen = pickle.load(f)
with open('/content/gdrive/MyDrive/Dataset/iemocap/test/conversation_length.pkl', 'rb') as f:
    test_convlen = pickle.load(f)

**Preprocessing the Training Data**

Function to arrange sentences and create an array of string of sentences

In [4]:
def arrange_sentences(data,data2,data3):
  startlen=0
  final_sentences=[]
  final_labels=[]
  for i in data:
    odd_sentences=[]
    odd_labels=[]
    for j in range(0,i):
      if j%2==0:
        final_sentences.append(data2[startlen])
        final_labels.append(data3[startlen])
        startlen=startlen+1
      else:
        odd_sentences.append(data2[startlen])
        odd_labels.append(data3[startlen])
        startlen=startlen+1
    for i in range(0,len(odd_sentences)):
      final_sentences.append(odd_sentences[i])
      final_labels.append(odd_labels[i])
  return final_sentences,final_labels

def preprocessing(data,labels):
  processed_data=[]
  processed_label=[]
  for i in range(0,len(data)):
    for j in range(0,len(data[i])):
      intermediate_data=[]
      intermediate_label=[]
      for k in range(0,len(data[i][j])):
        text=data[i][j][k]
        if text != '<eos>'and text!='<pad>':
          intermediate_data.append(text)
      processed_data.append(intermediate_data)
  for i in labels:
    for j in i:
      processed_label.append(j)
  return processed_data,processed_label

In [5]:
processed_data,processed_label = preprocessing(data,labels)
for i in range(0,len(processed_data)):
  processed_data[i]= ' '.join(processed_data[i])
processed_data,processed_label=arrange_sentences(train_convlen,processed_data,processed_label)

**Adding ConceptNet Features**

In [8]:
def ConceptNet_Sentences(data,labels):
  conceptnet_data=[]
  conceptnet_labels=[]
  for sentences in range(0,len(data)):
      current_sentence=data[sentences]
      conceptnet_data.append(current_sentence)
      conceptnet_labels.append(labels[sentences])
      cleaned_sentences=data[sentences].translate(str.maketrans('', '', string.punctuation))
      tokens_with_sw=nltk.word_tokenize(cleaned_sentences)
      tokens= [word for word in tokens_with_sw if not word in stopwords.words()]
      for i in range(1,3):
        temp=current_sentence
        for j in tokens:
          try:
            obj = requests.get('http://api.conceptnet.io/related/c/en/' + j + '?filter=/c/en').json()
            response=obj['related'][i]['@id']
          except Exception:
            continue
          response=re.sub(r'[^\w]', ' ', response)
          response=response[6:]
          temp=temp.replace(j,response)
        conceptnet_data.append(temp)
        conceptnet_labels.append(labels[sentences])
  return conceptnet_data,conceptnet_labels

In [None]:
processed_data,processed_label = ConceptNet_Sentences(processed_data,processed_label)

**Adding Contextual Info from VADER**

In [11]:
!pip install vaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

Collecting vaderSentiment
[?25l  Downloading https://files.pythonhosted.org/packages/76/fc/310e16254683c1ed35eeb97386986d6c00bc29df17ce280aed64d55537e9/vaderSentiment-3.3.2-py2.py3-none-any.whl (125kB)
[K     |██▋                             | 10kB 23.2MB/s eta 0:00:01[K     |█████▏                          | 20kB 30.6MB/s eta 0:00:01[K     |███████▉                        | 30kB 25.8MB/s eta 0:00:01[K     |██████████▍                     | 40kB 29.0MB/s eta 0:00:01[K     |█████████████                   | 51kB 26.8MB/s eta 0:00:01[K     |███████████████▋                | 61kB 29.5MB/s eta 0:00:01[K     |██████████████████▏             | 71kB 18.7MB/s eta 0:00:01[K     |████████████████████▉           | 81kB 19.7MB/s eta 0:00:01[K     |███████████████████████▍        | 92kB 18.3MB/s eta 0:00:01[K     |██████████████████████████      | 102kB 18.1MB/s eta 0:00:01[K     |████████████████████████████▋   | 112kB 18.1MB/s eta 0:00:01[K     |██████████████████████████

Function to add VADER features

In [14]:
def add_Vader_Features(sentences):
  vader_features=[]
  for sentence in sentences:
    vs = analyzer.polarity_scores(sentence)
    temp=[]
    temp.append(sentence)
    if vs['neg'] > 0.2:
      temp.append('negative')
    if vs['pos'] > 0.2:
      temp.append('positive')
    if vs['neu'] > 0.2:
      temp.append('neutral')
    if vs['compound'] > 0.2:
      temp.append('compound')
    temp=' '.join(temp)
    vader_features.append(temp)
  return vader_features

In [15]:
processed_data = add_Vader_Features(processed_data)

Converting data for making it possible to pass through the model

In [21]:
processed_data=np.asarray(processed_data)
Y=to_categorical(processed_label, num_classes=6)