In [168]:
import pandas as pd 
import nltk
import numpy as np 
from nltk.corpus import stopwords 
import re
from sklearn.model_selection import train_test_split 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier 
import pickle
from nltk.stem import WordNetLemmatizer

In [119]:
#!pip install emoji
import emoji

In [120]:
data=pd.read_csv("tweet_emotions.csv")
data

Unnamed: 0,tweet_id,sentiment,content
0,1956967341,empty,@tiffanylue i know i was listenin to bad habi...
1,1956967666,sadness,Layin n bed with a headache ughhhh...waitin o...
2,1956967696,sadness,Funeral ceremony...gloomy friday...
3,1956967789,enthusiasm,wants to hang out with friends SOON!
4,1956968416,neutral,@dannycastillo We want to trade with someone w...
...,...,...,...
39995,1753918954,neutral,@JohnLloydTaylor
39996,1753919001,love,Happy Mothers Day All my love
39997,1753919005,love,Happy Mother's Day to all the mommies out ther...
39998,1753919043,happiness,@niariley WASSUP BEAUTIFUL!!! FOLLOW ME!! PEE...


In [121]:
data['sentiment'].nunique()

13

In [122]:
sentiment_list=data['sentiment'].unique()
sentiment_list

array(['empty', 'sadness', 'enthusiasm', 'neutral', 'worry', 'surprise',
       'love', 'fun', 'hate', 'happiness', 'boredom', 'relief', 'anger'],
      dtype=object)

In [123]:
data['sentiment'].value_counts()

neutral       8638
worry         8459
happiness     5209
sadness       5165
love          3842
surprise      2187
fun           1776
relief        1526
hate          1323
empty          827
enthusiasm     759
boredom        179
anger          110
Name: sentiment, dtype: int64

In [162]:
nltk.download('stopwords')
STOPWORDS = stopwords.words("english")
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\SiYu\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\SiYu\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\SiYu\AppData\Roaming\nltk_data...


In [176]:
def clean(text):
    text = text.lower() 
    text=re.sub(r'@\w+',"", text)
    text = re.sub("[^\w\s]"," ",text) # Remove punctuations 
    text = " ".join(w for w in text.split() if w not in STOPWORDS)
    text = " ".join(lemmatizer.lemmatize(w) for w in text.split())
    return text

In [177]:
data['text'] = data['content'].apply(lambda x : clean(x)) 

In [178]:
data

Unnamed: 0,tweet_id,sentiment,content,text
0,1956967341,empty,@tiffanylue i know i was listenin to bad habi...,know listenin bad habit earlier started freaki...
1,1956967666,sadness,Layin n bed with a headache ughhhh...waitin o...,layin n bed headache ughhhh waitin call
2,1956967696,sadness,Funeral ceremony...gloomy friday...,funeral ceremony gloomy friday
3,1956967789,enthusiasm,wants to hang out with friends SOON!,want hang friend soon
4,1956968416,neutral,@dannycastillo We want to trade with someone w...,want trade someone houston ticket one
...,...,...,...,...
39995,1753918954,neutral,@JohnLloydTaylor,
39996,1753919001,love,Happy Mothers Day All my love,happy mother day love
39997,1753919005,love,Happy Mother's Day to all the mommies out ther...,happy mother day mommy woman man long momma so...
39998,1753919043,happiness,@niariley WASSUP BEAUTIFUL!!! FOLLOW ME!! PEE...,wassup beautiful follow peep new hit single ww...


In [None]:
#https://raw.githubusercontent.com/carpedm20/emoji/master/emoji/unicode_codes/data_dict.py

'''
array(['empty', 'sadness', 'enthusiasm', 'neutral', 'worry', 'surprise',
       'love', 'fun', 'hate', 'happiness', 'boredom', 'relief', 'anger'],
      dtype=object)
'''

dic={'empty':'tired_face',
    'sadness':"crying_face",
    'worry':'worried_face',
    'hate':'nauseated_face',
    'boredom':'flushed_face',
    'anger':'angry_face_with_horns',
    'neutral':'neutral_face',
    'relief':"relieved_face",
    'enthusiasm':"partying_face",
    'surprise':'face_with_hand_over_mouth',
    'love':'smiling_face_with_hearts',
    'fun':'face_with_tears_of_joy',
    'happiness':'kissing_face_with_closed_eyes'
    }

In [None]:
for key,value in dic.items():
    print(key,":",emoji.emojize(f':{value}:'))

empty : 😫
sadness : 😢
worry : 😟
hate : 🤢
boredom : 😳
anger : 👿
neutral : 😐
relief : 😌
enthusiasm : 🥳
surprise : 🤭
love : 🥰
fun : 😂
happiness : 😚


In [179]:
def sentiment2emoij(sentiment):
    return(emoji.emojize(f':{dic[sentiment]}:'))

In [180]:
data['emoji']=data['sentiment'].apply(lambda x :sentiment2emoij(x)) 

In [181]:
data[['emoji','sentiment','text']]

Unnamed: 0,emoji,sentiment,text
0,😫,empty,know listenin bad habit earlier started freaki...
1,😢,sadness,layin n bed headache ughhhh waitin call
2,😢,sadness,funeral ceremony gloomy friday
3,🥳,enthusiasm,want hang friend soon
4,😐,neutral,want trade someone houston ticket one
...,...,...,...
39995,😐,neutral,
39996,🥰,love,happy mother day love
39997,🥰,love,happy mother day mommy woman man long momma so...
39998,😚,happiness,wassup beautiful follow peep new hit single ww...


In [182]:
temp_data=data[['emoji','sentiment','text']].copy()

In [183]:
temp_data

Unnamed: 0,emoji,sentiment,text
0,😫,empty,know listenin bad habit earlier started freaki...
1,😢,sadness,layin n bed headache ughhhh waitin call
2,😢,sadness,funeral ceremony gloomy friday
3,🥳,enthusiasm,want hang friend soon
4,😐,neutral,want trade someone houston ticket one
...,...,...,...
39995,😐,neutral,
39996,🥰,love,happy mother day love
39997,🥰,love,happy mother day mommy woman man long momma so...
39998,😚,happiness,wassup beautiful follow peep new hit single ww...


In [184]:
x=np.array(data['text'])


Le = LabelEncoder()
y =  Le.fit_transform(np.array(data['sentiment']))

In [187]:
temp_data['Y_Encoder']=y

In [192]:
temp_data

Unnamed: 0,emoji,sentiment,text,Y_Encoder
0,😫,empty,know listenin bad habit earlier started freaki...,2
1,😢,sadness,layin n bed headache ughhhh waitin call,10
2,😢,sadness,funeral ceremony gloomy friday,10
3,🥳,enthusiasm,want hang friend soon,3
4,😐,neutral,want trade someone houston ticket one,8
...,...,...,...,...
39995,😐,neutral,,8
39996,🥰,love,happy mother day love,7
39997,🥰,love,happy mother day mommy woman man long momma so...,7
39998,😚,happiness,wassup beautiful follow peep new hit single ww...,5


In [193]:
print(list(zip([i for i in range(0,13)],list(Le.inverse_transform([i for i in range(0,13)])))))

[(0, 'anger'), (1, 'boredom'), (2, 'empty'), (3, 'enthusiasm'), (4, 'fun'), (5, 'happiness'), (6, 'hate'), (7, 'love'), (8, 'neutral'), (9, 'relief'), (10, 'sadness'), (11, 'surprise'), (12, 'worry')]


In [194]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [195]:
x_train.shape

(32000,)

In [196]:
y_train.shape

(32000,)

In [197]:
tf = TfidfVectorizer(analyzer='word',max_features=1000,ngram_range=(1,3))
x_tf = tf.fit_transform(x_train)
x_val_tf = tf.transform(x_test)

In [198]:
x_tf   = x_tf.toarray()
x_val_tf =  x_val_tf.toarray()

In [199]:
model =RandomForestClassifier()
model.fit(x_tf,y_train)

RandomForestClassifier()

In [200]:
#Save model
pickle.dump(model, open('model_RFClassifier.pkl', 'wb'))

In [201]:
#Load the existing model
# pickled_model = pickle.load(open('model_RFClassifier.pkl', 'rb'))

In [202]:
y_pred = model.predict(x_val_tf)

In [203]:
model.score(x_val_tf,y_test)

0.3195