In [1]:
import pandas as pd
df = pd.read_csv('nlp_dataset.csv')
df

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear
...,...,...
5932,i begun to feel distressed for you,fear
5933,i left feeling annoyed and angry thinking that...,anger
5934,i were to ever get married i d have everything...,joy
5935,i feel reluctant in applying there because i w...,fear


In [2]:
import re
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Text cleaning function
def preprocess_text(text):
    # Lowercase conversion
    text = text.lower()
    # Removing special characters
    text = re.sub(r'\W', ' ', text)
    # Tokenization
    tokens = text.split()
    # Removing stopwords
    filtered_tokens = [word for word in tokens if word not in stop_words]
    return ' '.join(filtered_tokens)

df['cleaned_text'] = df['Comment'].apply(preprocess_text)
df

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\rinto\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,Comment,Emotion,cleaned_text
0,i seriously hate one subject to death but now ...,fear,seriously hate one subject death feel reluctan...
1,im so full of life i feel appalled,anger,im full life feel appalled
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feelings think afraid acce...
3,ive been really angry with r and i feel like a...,joy,ive really angry r feel like idiot trusting fi...
4,i feel suspicious if there is no one outside l...,fear,feel suspicious one outside like rapture happe...
...,...,...,...
5932,i begun to feel distressed for you,fear,begun feel distressed
5933,i left feeling annoyed and angry thinking that...,anger,left feeling annoyed angry thinking center stu...
5934,i were to ever get married i d have everything...,joy,ever get married everything ready offer got to...
5935,i feel reluctant in applying there because i w...,fear,feel reluctant applying want able find company...


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['cleaned_text'])
df

Unnamed: 0,Comment,Emotion,cleaned_text
0,i seriously hate one subject to death but now ...,fear,seriously hate one subject death feel reluctan...
1,im so full of life i feel appalled,anger,im full life feel appalled
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feelings think afraid acce...
3,ive been really angry with r and i feel like a...,joy,ive really angry r feel like idiot trusting fi...
4,i feel suspicious if there is no one outside l...,fear,feel suspicious one outside like rapture happe...
...,...,...,...
5932,i begun to feel distressed for you,fear,begun feel distressed
5933,i left feeling annoyed and angry thinking that...,anger,left feeling annoyed angry thinking center stu...
5934,i were to ever get married i d have everything...,joy,ever get married everything ready offer got to...
5935,i feel reluctant in applying there because i w...,fear,feel reluctant applying want able find company...


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, df['Emotion'], test_size=0.2)

# Naive Bayes
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
nb_preds = nb_model.predict(X_test)
nb_preds
# SVM
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_preds = svm_model.predict(X_test)
svm_preds

array(['anger', 'fear', 'joy', ..., 'fear', 'anger', 'joy'], dtype=object)

In [6]:
nb_accuracy = accuracy_score(y_test, nb_preds)
nb_f1 = f1_score(y_test, nb_preds, average='weighted')

svm_accuracy = accuracy_score(y_test, svm_preds)
svm_f1 = f1_score(y_test, svm_preds, average='weighted')

print(f"Naive Bayes Accuracy: {nb_accuracy}, F1-score: {nb_f1}")
print(f"SVM Accuracy: {svm_accuracy}, F1-score: {svm_f1}")


Naive Bayes Accuracy: 0.9090909090909091, F1-score: 0.9091540548982993
SVM Accuracy: 0.9242424242424242, F1-score: 0.9241272419135236
