In [19]:
# build RNN model, Deep Belief Network model and compare their performance

import pandas as pd
import numpy as np

# Read in the data from the csv file
emotionData = pd.read_csv("./emotion_data_v1.csv")

emotionData.head()

Unnamed: 0,text,emotion
0,lovely,3
1,ok sound goood hehe,3
2,egg holder make ash wood shop link egg holder ...,3
3,buy book review help get amazon new release su...,4
4,eeek come im soo excite see thursday,1


In [20]:
def convertToEmotion(emot):
    if emot == 0:
        return "neutral"
    elif emot == 1:
        return "happy"
    elif emot == 2:
        return "sad"
    elif emot == 3:
        return "love"
    elif emot == 4:
        return "anger"
    elif emot == 5:
        return "fear"
    else:
        return "unknown"

In [21]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# Prepare the data
texts = emotionData.iloc[:, 0].values
emotions = emotionData.iloc[:, 1].values
texts = [str(text) for text in texts]


In [22]:
# build the tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# convert the texts to sequences
sequences = tokenizer.texts_to_sequences(texts)

# pad the sequences
maxlen = 100
padded_sequences = pad_sequences(sequences, maxlen=maxlen)


In [23]:
#  emotions are encoded as integers from 0 to 5
# convert to one-hot encoding
emotions = to_categorical(emotions)

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, emotions, test_size=0.2, random_state=42)

# build the model
model = Sequential()
model.add(Embedding(len(tokenizer.word_index)+1, 128, input_length=maxlen))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(6, activation='softmax'))
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])

# train the model
model.fit(X_train, y_train, batch_size=32, epochs=10, validation_split=0.2)

# evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: %f' % (accuracy*100))
print('Loss: %f' % (loss*100))




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 65.603852
Loss: 166.123366


In [None]:
y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)

In [24]:
#  build the Deep Belief Network model this sucks and takes forever
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
import numpy as np

rbm = BernoulliRBM(random_state=0, verbose=True, n_iter=40)
rbm.learning_rate = 0.06

y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)

logistic = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')
classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])

# Training RBM-Logistic Pipeline
classifier.fit(X_train, y_train)

# Evaluation
y_pred = classifier.predict(X_test)

print()
print("Classification report: \n%s\n"
      % (metrics.classification_report(y_test, y_pred)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred))





[BernoulliRBM] Iteration 1, pseudo-likelihood = -685392331.63, time = 4.03s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -1372598534.66, time = 7.06s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -2059804729.74, time = 7.10s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -2747010928.69, time = 7.07s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -3434217132.71, time = 7.19s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -4121423331.43, time = 7.30s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -4808629534.34, time = 7.48s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -5495835731.53, time = 7.09s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -6183041933.78, time = 7.00s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -6870248133.38, time = 6.97s
[BernoulliRBM] Iteration 11, pseudo-likelihood = -7557454335.85, time = 6.99s
[BernoulliRBM] Iteration 12, pseudo-likelihood = -8244660534.58, time = 6.94s
[BernoulliRBM] Iteration 13, pseudo-likelihood = -8931866734.40, time = 7.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
