In [1]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk import WordNetLemmatizer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import classification_report
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [2]:
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")
train_data.head()

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [3]:
test_data.shape

(2000, 2)

In [4]:
emotion_mapping = {
    'joy': 0,
    'sadness': 1,
    'anger': 2,
    'fear': 3,
    'love': 4,
    'surprise': 5
}

In [5]:
train_data.value_counts('Emotion')

Emotion
joy         5362
sadness     4666
anger       2159
fear        1937
love        1304
surprise     572
Name: count, dtype: int64

In [6]:
train_data.replace({"Emotion": emotion_mapping}, inplace=True)
test_data.replace({"Emotion": emotion_mapping}, inplace=True)

  train_data.replace({"Emotion": emotion_mapping}, inplace=True)
  test_data.replace({"Emotion": emotion_mapping}, inplace=True)


In [7]:
port_stem = PorterStemmer()

In [8]:
def stemming(content):
    stemmed_content = re.sub('[^a-zA-Z]',' ',content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [port_stem.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)
    return stemmed_content

lemmatizer = WordNetLemmatizer()
def preprocess(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters
    text = text.lower()
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stopwords.words('english')]
    return ' '.join(words)

In [9]:
train_data['Text'] = train_data['Text'].apply(preprocess)

In [10]:
text_train = train_data['Text'].values
emotion_train = train_data['Emotion'].values

In [11]:
test_data['Text'] = test_data['Text'].apply(preprocess)
text_test = test_data['Text'].values
emotion_test = test_data['Emotion'].values

In [12]:
vectorizer = TfidfVectorizer(max_features=2000,stop_words='english')
text_train = vectorizer.fit_transform(text_train).toarray()
text_test = vectorizer.transform(text_test).toarray()

In [13]:
param_grid = {'C': [1, 10], 'kernel': ['linear']}
svm = SVC()
model = GridSearchCV(svm, param_grid, cv=3, n_jobs=-1)
model.fit(text_train, emotion_train)
best_model = model.best_estimator_

In [14]:
train_prediction = best_model.predict(text_train)
train_acc = accuracy_score(train_prediction, emotion_train)
print("Training Accuracy:", train_acc * 100)

Training Accuracy: 92.70625


In [15]:
test_prediction = best_model.predict(text_test)
test_acc = accuracy_score(test_prediction, emotion_test)
print("Testing Accuracy:", test_acc * 100)

Testing Accuracy: 87.8


In [16]:
print("Confusion Matrix:\n", confusion_matrix(emotion_test, test_prediction))
print("Classification Report:\n", classification_report(emotion_test, test_prediction))

Confusion Matrix:
 [[649   8   4   6  23   5]
 [ 27 522  18   9   2   3]
 [ 15  14 237   6   2   1]
 [  4  11  10 194   1   4]
 [ 41   1   0   0 115   2]
 [ 11   2   0  14   0  39]]
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.93      0.90       695
           1       0.94      0.90      0.92       581
           2       0.88      0.86      0.87       275
           3       0.85      0.87      0.86       224
           4       0.80      0.72      0.76       159
           5       0.72      0.59      0.65        66

    accuracy                           0.88      2000
   macro avg       0.84      0.81      0.83      2000
weighted avg       0.88      0.88      0.88      2000



## **Prediction System**
---

In [17]:
while True:
    text_input = input("Enter a review (or type 'exit' to quit): ")
    if text_input.lower() == 'exit':
        break
    text_input = preprocess(text_input)
    text_input = vectorizer.transform([text_input]).toarray()
    prediction = best_model.predict(text_input)

    # Map prediction back to emotion label
    emotion_labels = {v: k for k, v in emotion_mapping.items()}
    print("Prediction:", emotion_labels[prediction[0]])

Prediction: sadness
Prediction: joy
Prediction: joy
Prediction: joy
Prediction: joy
Prediction: joy
Prediction: sadness
Prediction: joy
Prediction: joy
Prediction: joy
Prediction: sadness
Prediction: joy
Prediction: joy
Prediction: joy
Prediction: joy
Prediction: joy
Prediction: joy
Prediction: joy
Prediction: joy
Prediction: joy
Prediction: joy
