In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df=pd.read_csv("/content/train.txt",sep=';',header=None,names=['text','emotions'])
df.head()

Unnamed: 0,text,emotions
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [4]:
df.shape

(16000, 2)

**Preprocessing**

In [7]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [8]:
import string
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

def preprocess_text(txt):
    txt = txt.lower()  # lowercase
    txt = txt.translate(str.maketrans('', '', string.punctuation))  # remove punctuation
    txt = ''.join([c for c in txt if not c.isdigit()])  # remove numbers
    txt = ''.join([c for c in txt if c.isascii()])  # remove emojis / non-ascii
    # remove stopwords
    words = txt.split()
    words = [w for w in words if w not in stop_words]
    return ' '.join(words)


In [9]:
df['text']=df['text'].apply(preprocess_text)

In [10]:
df.head()

Unnamed: 0,text,emotions
0,didnt feel humiliated,sadness
1,go feeling hopeless damned hopeful around some...,sadness
2,im grabbing minute post feel greedy wrong,anger
3,ever feeling nostalgic fireplace know still pr...,love
4,feeling grouchy,anger


In [11]:
df['emotions'].unique()

array(['sadness', 'anger', 'love', 'surprise', 'fear', 'joy'],
      dtype=object)

In [13]:
unique=df['emotions'].unique()
emotions_num={}
for i,emo in enumerate(unique,1):
  emotions_num[emo]=i
emotions_num



{'sadness': 1, 'anger': 2, 'love': 3, 'surprise': 4, 'fear': 5, 'joy': 6}

In [14]:
df['emotions']=df['emotions'].map(emotions_num)

In [15]:
df['emotions'].unique()

array([1, 2, 3, 4, 5, 6])

In [16]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(df['text'],df['emotions'],test_size=0.2,random_state=42)

In [17]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((12800,), (3200,), (12800,), (3200,))

In [18]:
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

In [19]:
bow_vectorizer=CountVectorizer()
X_train_bow=bow_vectorizer.fit_transform(X_train)
X_test_bow=bow_vectorizer.transform(X_test)

In [26]:
nb_model=MultinomialNB()

In [27]:
nb_model.fit(X_train_bow,y_train)
pred_bow=nb_model.predict(X_test_bow)
accuracy_score(y_test,pred_bow)

0.768125

In [29]:
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)


nb2_model = MultinomialNB()
nb2_model.fit(X_train_tfidf,y_train)
pred_bow2=nb2_model.predict(X_test_bow)
accuracy_score(y_test,pred_bow2)

0.6753125

In [30]:
from sklearn.linear_model import LogisticRegression
model_los=LogisticRegression(max_iter=1000)
model_los.fit(X_train_bow,y_train)
pred_bow=model_los.predict(X_test_bow)
accuracy_score(y_test,pred_bow)

0.8896875

**New text Prediction**

In [31]:
new_text = "I feel very happy and loved today!"

In [32]:
processed_text=preprocess_text(new_text)
processed_text


'feel happy loved today'

In [33]:
text_bow=bow_vectorizer.transform([processed_text])
text_bow

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 4 stored elements and shape (1, 13361)>

In [34]:
predicted_label = model_los.predict(text_bow)
predicted_label

array([6])

In [35]:
predicted_label=predicted_label[0]
print(predicted_label)

6


In [40]:
emotions_num

{'sadness': 1, 'anger': 2, 'love': 3, 'surprise': 4, 'fear': 5, 'joy': 6}

In [39]:
for k,v in emotions_num.items():
  if(v==predicted_label):
    print("Predicted Emotion : ",k)


Predicted Emotion :  joy


**Predictive system**

In [48]:
def prediction(txt):
  processed_text=preprocess_text(txt)
  text_bow=bow_vectorizer.transform([processed_text])
  pred_label = model_los.predict(text_bow)[0]
  for k,v in emotions_num.items():
    if(v==pred_label):
      print("Predicted Emotion : ",k)

In [49]:
prediction("i didnt feel humiliated")

Predicted Emotion :  sadness


In [53]:
prediction("feeling grouchy")

Predicted Emotion :  anger


In [54]:
prediction("feeling bad")

Predicted Emotion :  sadness


In [55]:
prediction("I feel very happy and loved today!")

Predicted Emotion :  joy




---



---


**Future Work**


---


*   Hyparameter Tuning To increase accuracy
*   Add ui

