In [68]:
if 'google.colab' in str(get_ipython()):
  from google.colab import drive
  drive.mount('/content/gdrive', force_remount=True)
  data_path = 'gdrive/MyDrive/cds-project/data/emotions-nlp/' # Replace with path to datasets in your g drive
  glove_path = 'gdrive/MyDrive/cds-project/glove.6B.100d.txt'
else:
  data_path = '../data/emotions-nlp/' # Replace with path to datasets on local machine
  glove_path = '../glove_model/glove.6B.100d.txt'
print("Reading dataset from path " + data_path)

Mounted at /content/gdrive
Reading dataset from path gdrive/MyDrive/cds-project/data/emotions-nlp/


In [69]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [70]:
import pandas as pd

# Get data
df_train = pd.read_csv(data_path + 'processed_train.csv')
df_test = pd.read_csv(data_path + 'processed_test.csv')
df_val = pd.read_csv(data_path + 'processed_val.csv')

df_train.head(10)

Unnamed: 0,text,emotion
0,didnt feel humiliated,sadness
1,go feeling hopeless damned hopeful around some...,sadness
2,im grabbing minute post feel greedy wrong,anger
3,ever feeling nostalgic fireplace know still pr...,love
4,feeling grouchy,anger
5,ive feeling little burdened lately wasnt sure,sadness
6,ive taking milligrams times recommended amount...,surprise
7,feel confused life teenager jaded year old man,fear
8,petronas years feel petronas performed well ma...,joy
9,feel romantic,love


In [71]:
# Get x and y columns
X_train = df_train['text']
y_train = df_train['emotion']

X_test = df_test['text']
y_test = df_test['emotion']

# Logistic Regression Model

In [72]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression

# Logistic Regression Pipeline
lr_pipe = Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression(solver='liblinear'))])

# Train and fit data
lr_pipe.fit(X_train, y_train)
lr_y_pred = lr_pipe.predict(X_test)
lr_y_pred_probs = lr_pipe.predict_log_proba(X_test)

# Naive Bayes Model

In [73]:
cv = CountVectorizer()

X_all = pd.concat([X_train, X_test])
X_all_mat = cv.fit_transform(X_all)

In [74]:
from sklearn.model_selection import train_test_split

test_size = X_test.count()

X_train_mat, X_test_mat = train_test_split(X_all_mat, test_size=test_size, shuffle=False)

In [75]:
from sklearn.naive_bayes import MultinomialNB

nv_model = MultinomialNB()
nv_model.fit(X_train_mat, y_train)

nv_y_pred = nv_model.predict(X_test_mat)
nv_y_pred_probs = nv_model.predict_log_proba(X_test_mat)

# Evaluate models

In [76]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, log_loss

def evaluate_model(true, pred, pred_probs, classes):
    conf = confusion_matrix(true, pred, labels=classes)
    print("Confusion Matrix:")
    print(conf)

    acc_score = accuracy_score(true, pred)
    ce_loss = log_loss(true, pred_probs)
    print('\nAccuracy: {:.3f}'.format(acc_score))
    print('Cross entropy loss: {:.3f}'.format(ce_loss))

    print('\nClassification Report\n')
    print(classification_report(true, pred))

In [79]:
# Evaluate Logistic Regression Model
classes = list(df_test['emotion'].unique())

print("Evaluating Logistic Regress Model\n")
evaluate_model(y_test.values, lr_y_pred, lr_y_pred_probs, classes)

Evaluating Logistic Regress Model

Confusion Matrix:
[[542  14   7  14   2   2]
 [  5 652   4   4  26   4]
 [ 15   4 189   9   1   6]
 [ 17   9   9 238   2   0]
 [  1  35   0   3 119   1]
 [  2  12  13   1   0  38]]

Accuracy: 0.889
Cross entropy loss: 1.792

Classification Report

              precision    recall  f1-score   support

       anger       0.88      0.87      0.88       275
        fear       0.85      0.84      0.85       224
         joy       0.90      0.94      0.92       695
        love       0.79      0.75      0.77       159
     sadness       0.93      0.93      0.93       581
    surprise       0.75      0.58      0.65        66

    accuracy                           0.89      2000
   macro avg       0.85      0.82      0.83      2000
weighted avg       0.89      0.89      0.89      2000



In [80]:
# Evaluate Naive Bayes Model
print("Evaluating Naive Bayes Model\n")
evaluate_model(y_test.values, nv_y_pred, nv_y_pred_probs, classes)

Evaluating Naive Bayes Model

Confusion Matrix:
[[539  31   3   6   1   1]
 [ 16 664   5   1   8   1]
 [ 45  28 144   7   0   0]
 [ 55  30   7 182   0   1]
 [ 25  73   1   4  56   0]
 [ 15  29  14   1   0   7]]

Accuracy: 0.796
Cross entropy loss: 1.792

Classification Report

              precision    recall  f1-score   support

       anger       0.91      0.66      0.76       275
        fear       0.83      0.64      0.72       224
         joy       0.78      0.96      0.86       695
        love       0.86      0.35      0.50       159
     sadness       0.78      0.93      0.84       581
    surprise       0.70      0.11      0.18        66

    accuracy                           0.80      2000
   macro avg       0.81      0.61      0.65      2000
weighted avg       0.80      0.80      0.78      2000

