In [None]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk import WordNetLemmatizer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import classification_report
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [None]:
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")
train_data.head()

In [None]:
test_data.shape

In [None]:
emotion_mapping = {
    'joy': 0,
    'sadness': 1,
    'anger': 2,
    'fear': 3,
    'love': 4,
    'surprise': 5
}

In [None]:
train_data.value_counts('Emotion')

In [None]:
train_data.replace({"Emotion": emotion_mapping}, inplace=True)
test_data.replace({"Emotion": emotion_mapping}, inplace=True)

In [None]:
port_stem = PorterStemmer()

In [None]:
def stemming(content):
    stemmed_content = re.sub('[^a-zA-Z]',' ',content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [port_stem.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)
    return stemmed_content

lemmatizer = WordNetLemmatizer()
def preprocess(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters
    text = text.lower()
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stopwords.words('english')]
    return ' '.join(words)

In [None]:
train_data['Text'] = train_data['Text'].apply(preprocess)

In [None]:
text_train = train_data['Text'].values
emotion_train = train_data['Emotion'].values

In [None]:
test_data['Text'] = test_data['Text'].apply(preprocess)
text_test = test_data['Text'].values
emotion_test = test_data['Emotion'].values

In [None]:
vectorizer = TfidfVectorizer(max_features=2000,stop_words='english')
text_train = vectorizer.fit_transform(text_train).toarray()
text_test = vectorizer.transform(text_test).toarray()

In [None]:
param_grid = {'C': [1, 10], 'kernel': ['linear']}
svm = SVC()
model = GridSearchCV(svm, param_grid, cv=3, n_jobs=-1)
model.fit(text_train, emotion_train)
best_model = model.best_estimator_

In [None]:
train_prediction = best_model.predict(text_train)
train_acc = accuracy_score(train_prediction, emotion_train)
print("Training Accuracy:", train_acc * 100)

In [None]:
test_prediction = best_model.predict(text_test)
test_acc = accuracy_score(test_prediction, emotion_test)
print("Testing Accuracy:", test_acc * 100)

In [None]:
print("Confusion Matrix:\n", confusion_matrix(emotion_test, test_prediction))
print("Classification Report:\n", classification_report(emotion_test, test_prediction))

## **Prediction System**
---

In [None]:
while True:
    text_input = input("Enter a review (or type 'exit' to quit): ")
    if text_input.lower() == 'exit':
        break
    text_input = preprocess(text_input)
    text_input = vectorizer.transform([text_input]).toarray()
    prediction = best_model.predict(text_input)

    # Map prediction back to emotion label
    emotion_labels = {v: k for k, v in emotion_mapping.items()}
    print("Prediction:", emotion_labels[prediction[0]])