In [19]:
import pandas as pd
import numpy as np
import re
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from nltk.corpus import stopwords
nltk.download('stopwords')

# Loading the dataset
dataset = pd.read_csv('nlp_dataset.csv')

[nltk_data] Downloading package stopwords to C:\Users\Windows
[nltk_data]     10\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [20]:
# Displaying the first few rows of the dataset
print("Dataset Head:")
print(dataset.head())


Dataset Head:
                                             Comment Emotion
0  i seriously hate one subject to death but now ...    fear
1                 im so full of life i feel appalled   anger
2  i sit here to write i start to dig out my feel...    fear
3  ive been really angry with r and i feel like a...     joy
4  i feel suspicious if there is no one outside l...    fear


In [21]:
# Preprocessing the Text
# Defining a function to clean and preprocess text

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    words = text.split()
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    return ' '.join(words)

# Apply preprocessing to the dataset
dataset['cleaned_text'] = dataset['Comment'].apply(preprocess_text)

# Display cleaned text
print("\nCleaned Dataset:")
print(dataset[['Comment', 'cleaned_text']].head())


Cleaned Dataset:
                                             Comment  \
0  i seriously hate one subject to death but now ...   
1                 im so full of life i feel appalled   
2  i sit here to write i start to dig out my feel...   
3  ive been really angry with r and i feel like a...   
4  i feel suspicious if there is no one outside l...   

                                        cleaned_text  
0  seriously hate one subject death feel reluctan...  
1                         im full life feel appalled  
2  sit write start dig feelings think afraid acce...  
3  ive really angry r feel like idiot trusting fi...  
4  feel suspicious one outside like rapture happe...  


In [22]:
# Feature Extraction using TfidfVectorizer
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(dataset['cleaned_text'])  
y = dataset['Emotion']  

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
#  Naive Bayes Classifier
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
y_pred_nb = nb_model.predict(X_test)

In [25]:
# Support Vector Machine Classifier
svm_model = SVC(kernel='linear') 
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)

In [26]:
# Model Comparison and Evaluation

In [27]:
# Naive Bayes Evaluation
print("\nNaive Bayes Model Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_nb):.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred_nb))


Naive Bayes Model Evaluation:
Accuracy: 0.91
Classification Report:
              precision    recall  f1-score   support

       anger       0.88      0.95      0.91       392
        fear       0.92      0.92      0.92       416
         joy       0.94      0.87      0.90       380

    accuracy                           0.91      1188
   macro avg       0.91      0.91      0.91      1188
weighted avg       0.91      0.91      0.91      1188



In [28]:
# SVM Evaluation
print("\nSVM Model Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_svm):.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred_svm))



SVM Model Evaluation:
Accuracy: 0.94
Classification Report:
              precision    recall  f1-score   support

       anger       0.93      0.95      0.94       392
        fear       0.97      0.91      0.94       416
         joy       0.94      0.97      0.95       380

    accuracy                           0.94      1188
   macro avg       0.94      0.95      0.94      1188
weighted avg       0.95      0.94      0.94      1188

