In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report


In [31]:
data = pd.read_csv('data/text_data.csv')
data

Unnamed: 0,Post,Emotion
0,Just finished watching the sunset. Feeling gra...,Positive
1,Can't believe how rude some people can be. Had...,Negative
2,Excited to announce that I got the job I've be...,Positive
3,Feeling lonely tonight. Missing my friends and...,Negative
4,Enjoying a relaxing day at the beach.,Positive
...,...,...
299,"Chasing rainbows in the storm, finding hope in...",Positive
300,"Trapped in the labyrinth of my thoughts, seeki...",Negative
301,"Embracing the journey of self-discovery, where...",Positive
302,"Haunted by the echoes of the past, yearning to...",Negative


In [32]:
data['Post'] = data['Post'].apply(lambda x: x.lower())
data['Post'] = data['Post'].str.lower().str.replace('.', '')
data['Emotion'] = data['Emotion'].apply(lambda x: x.lower())
data

Unnamed: 0,Post,Emotion
0,just finished watching the sunset feeling grat...,positive
1,can't believe how rude some people can be had ...,negative
2,excited to announce that i got the job i've be...,positive
3,feeling lonely tonight missing my friends and ...,negative
4,enjoying a relaxing day at the beach,positive
...,...,...
299,"chasing rainbows in the storm, finding hope in...",positive
300,"trapped in the labyrinth of my thoughts, seeki...",negative
301,"embracing the journey of self-discovery, where...",positive
302,"haunted by the echoes of the past, yearning to...",negative


In [36]:
tfidf_vectorizer = TfidfVectorizer(max_features=1000)  
X = tfidf_vectorizer.fit_transform(data['Post'])
y = data['Emotion']

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [38]:
# MNB
mnb_model = MultinomialNB()
mnb_model.fit(X_train, y_train)

In [8]:
y_pred_mnb = mnb_model.predict(X_test)
print(classification_report(y_test, y_pred_mnb))

              precision    recall  f1-score   support

    negative       0.94      0.91      0.92        33
    positive       0.90      0.93      0.91        28

    accuracy                           0.92        61
   macro avg       0.92      0.92      0.92        61
weighted avg       0.92      0.92      0.92        61



In [9]:
#SVC
svc_model = SVC(kernel='linear')
svc_model.fit(X_train, y_train)

In [10]:
y_pred_svc = svc_model.predict(X_test)
print(classification_report(y_test, y_pred_svc))

              precision    recall  f1-score   support

    negative       0.97      0.91      0.94        33
    positive       0.90      0.96      0.93        28

    accuracy                           0.93        61
   macro avg       0.93      0.94      0.93        61
weighted avg       0.94      0.93      0.93        61



In [19]:
from joblib import dump
dump(mnb_model, "mnb_model.joblib")
dump(svc_model, "svc_model.joblib")

['svc_model.joblib']

In [80]:
test_data = {'post' : ["i want to be happy"]}
test_df = pd.DataFrame(test_data)
test_x = tfidf_vectorizer.transform(test_df['post'])
test_pred = mnb_model.predict(test_x)

In [81]:
test_pred

array(['negative'], dtype='<U8')