In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import string
import nltk
print('done')

done


In [2]:
df = pd.read_csv('emotiondataset.csv',names=['text','emotion'])

In [3]:
df.head()

Unnamed: 0,text,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [4]:
df['emotion'].value_counts()

emotion
joy         5362
sadness     4666
anger       2159
fear        1937
love        1304
surprise     572
Name: count, dtype: int64

In [5]:
mapping = {
    "joy": 0,
    "sadness": 1,
    "anger": 2,
    "fear": 3,
    "love": 4,
    "surprise": 5
}
df['emotion'] = df['emotion'].map(mapping)

In [6]:
df.isnull().sum()

text       0
emotion    0
dtype: int64

In [7]:
df.shape

(16000, 2)

In [8]:
df.duplicated().sum()

np.int64(1)

In [9]:
df.drop_duplicates(inplace=True)

In [10]:
df.shape

(15999, 2)

# Converting all text to lowercase

In [11]:
df['text'] = df['text'].apply(lambda x : x.lower())

# Removing punctuation

In [12]:
def remove_pun(txt):
    return txt.translate(str.maketrans('','',string.punctuation))

In [13]:
df['text'] = df['text'].apply(remove_pun)

# Removing numbers

In [14]:
def remove_num(text):
    new = ""
    for i in text:
        if not i.isdigit():
            new = new + i
    return new
df['text'] = df['text'].apply(remove_num)
df['text'] = df['text'].str.strip()

# Removing emojis/ special characters

In [15]:
def remove_emoji(txt):
    new = ""
    for i in txt:
        if i.isascii():
            new = new + i
    return new
df['text'] = df['text'].apply(remove_emoji)

# Removing stop words

In [16]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
stopwords = set(stopwords.words('English'))

In [17]:
def remove_stopwords(txt):
    word = word_tokenize(txt)
    clean = []
    for i in word:
        if not i in stopwords:
            clean.append(i)
    return ' '.join(clean)

In [18]:
df['text'] = df['text'].apply(remove_stopwords)

# Model training

In [19]:
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.model_selection import train_test_split

In [20]:
X_train, X_test, y_train, y_test = train_test_split(df['text'],df['emotion'], test_size=0.33, random_state=42)

In [21]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(10719,) (5280,) (10719,) (5280,)


In [22]:
bow = CountVectorizer()
X_train_bow = bow.fit_transform(X_train)
X_test_bow = bow.transform(X_test)

In [23]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix 

# Naive bayes(MultinomialNB)

In [24]:
model_nb = MultinomialNB()
model_nb.fit(X_train_bow,y_train)
y_pred_nb = model_nb.predict(X_test_bow)
print("Accuracy score",accuracy_score(y_test,y_pred_nb),end='\n'*2)
print("Classification report",classification_report(y_test,y_pred_nb))

Accuracy score 0.7691287878787879

Classification report               precision    recall  f1-score   support

           0       0.73      0.96      0.83      1756
           1       0.76      0.93      0.84      1575
           2       0.89      0.63      0.74       687
           3       0.83      0.57      0.68       599
           4       0.92      0.25      0.39       484
           5       0.93      0.08      0.14       179

    accuracy                           0.77      5280
   macro avg       0.84      0.57      0.60      5280
weighted avg       0.80      0.77      0.74      5280



In [25]:
model_lr = LogisticRegression()
model_lr.fit(X_train_bow,y_train)
y_pred_lr = model_lr.predict(X_test_bow)
print("Accuracy score",accuracy_score(y_test,y_pred_lr),end='\n'*2)
print("Classification report",classification_report(y_test,y_pred_lr))

Accuracy score 0.8865530303030303

Classification report               precision    recall  f1-score   support

           0       0.88      0.94      0.91      1756
           1       0.92      0.92      0.92      1575
           2       0.89      0.85      0.87       687
           3       0.85      0.83      0.84       599
           4       0.85      0.74      0.79       484
           5       0.84      0.73      0.78       179

    accuracy                           0.89      5280
   macro avg       0.87      0.84      0.85      5280
weighted avg       0.89      0.89      0.89      5280



In [26]:
model_rfc = RandomForestClassifier()
model_rfc.fit(X_train_bow,y_train)
y_pred_rfc = model_rfc.predict(X_test_bow)
print("Accuracy score",accuracy_score(y_test,y_pred_rfc),end='\n'*2)
print("Classification report",classification_report(y_test,y_pred_rfc))

Accuracy score 0.8890151515151515

Classification report               precision    recall  f1-score   support

           0       0.90      0.92      0.91      1756
           1       0.93      0.93      0.93      1575
           2       0.86      0.88      0.87       687
           3       0.87      0.82      0.85       599
           4       0.84      0.76      0.80       484
           5       0.74      0.79      0.76       179

    accuracy                           0.89      5280
   macro avg       0.86      0.85      0.85      5280
weighted avg       0.89      0.89      0.89      5280



In [27]:
emotion_map = {
    0: "joy",
    1: "sadness",
    2: "anger",
    3: "fear",
    4: "love",
    5: "surprise"
}
user_input = ["i loved you"]
output = model_rfc.predict(bow.transform(user_input))
predicted_emotion = emotion_map[int(output[0])]
print("Predicted Emotion:", predicted_emotion)

Predicted Emotion: love


In [28]:
user_input = ["i'm scared but i love you"]
output = model_lr.predict(bow.transform(user_input))
print("Predicted Emotion:", output[0])

Predicted Emotion: 3


In [29]:
user_input = ["i'm scared"]
output = model_nb.predict(bow.transform(user_input))
print("Predicted Emotion:", output[0])

Predicted Emotion: 3


In [32]:
import pickle

In [33]:
pickle.dump(model_rfc,open('RFmodel.pkl','wb'))

In [34]:
pickle.dump(bow,open('bow.pkl','wb'))