In [1]:
import pandas as pd
import numpy as np

# Read in the data from the csv file
emotionData = pd.read_csv("./emotion_data_v1.csv")

emotionData.head()

Unnamed: 0,text,emotion
0,lovely,3
1,ok sound goood hehe,3
2,egg holder make ash wood shop link egg holder ...,3
3,buy book review help get amazon new release su...,4
4,eeek come im soo excite see thursday,1


In [2]:
def convertToEmotion(emot):
    if emot == 0:
        return "neutral"
    elif emot == 1:
        return "happy"
    elif emot == 2:
        return "sad"
    elif emot == 3:
        return "love"
    elif emot == 4:
        return "anger"
    elif emot == 5:
        return "fear"
    else:
        return "unknown"

In [3]:
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Prepare the data
texts = emotionData.iloc[:, 0].values
emotions = emotionData.iloc[:, 1].values




In [4]:
# convert all the text to str
texts = [str(text) for text in texts]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(texts, emotions, test_size=0.2, random_state=42)

# Vectorize the text data
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)



In [5]:
# Build the SVM model
svm_model = SVC()
svm_model.fit(X_train_vectorized, y_train)

In [6]:
# Evaluate the model
y_pred = svm_model.predict(X_test_vectorized)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.49      0.26      0.34      2099
           1       0.66      0.82      0.74      5718
           2       0.63      0.73      0.68      4108
           3       0.80      0.59      0.68      2086
           4       0.92      0.70      0.79      1940
           5       0.84      0.72      0.78       452

    accuracy                           0.68     16403
   macro avg       0.72      0.64      0.67     16403
weighted avg       0.69      0.68      0.67     16403



In [7]:
print ("Accuracy: ", svm_model.score(X_test_vectorized, y_test))

Accuracy:  0.6819484240687679


In [8]:
# predict the emotion of a new text
new_text = ["i feel today is a good day"]
new_text_vectorized = vectorizer.transform(new_text)
new_pred = svm_model.predict(new_text_vectorized)

emotion = convertToEmotion(new_pred[0])

print(emotion)

happy


In [9]:
# save all the models
import pickle

pickle.dump(svm_model, open("svm_model.sav", 'wb'))
pickle.dump(vectorizer, open("vectorizer.sav", 'wb'))

In [16]:
# load the models and vectorizer and predict the emotion of a text from user input

import pickle

svm_model = pickle.load(open("svm_model.sav", 'rb'))
vectorizer = pickle.load(open("vectorizer.sav", 'rb'))

def convertToEmotion(emot):
    if emot == 0:
        return "neutral"
    elif emot == 1:
        return "happy"
    elif emot == 2:
        return "sad"
    elif emot == 3:
        return "love"
    elif emot == 4:
        return "anger"
    elif emot == 5:
        return "fear"
    else:
        return "unknown"
    
def predictEmotion(text):
    new_text = [text]
    new_text_vectorized = vectorizer.transform(new_text)
    new_pred = svm_model.predict(new_text_vectorized)
    emotion = convertToEmotion(new_pred[0])
    return emotion

text = "i am afraid of the dark"
emotion = predictEmotion(text)
print("The emotion of your text is: ", emotion)


The emotion of your text is:  fear


: 