In [1]:
import pandas as pd

df = pd.read_csv("Data Sets/tweet_emotions.csv")
df.shape

(40000, 3)

In [2]:
df['sentiment'].value_counts()

neutral       8638
worry         8459
happiness     5209
sadness       5165
love          3842
surprise      2187
fun           1776
relief        1526
hate          1323
empty          827
enthusiasm     759
boredom        179
anger          110
Name: sentiment, dtype: int64

In [3]:
ignored_items_list = [
    'surprise',
    'fun',
    'relief',
    'hate',
    'empty',
    'enthusiasm',
    'boredom',
    'anger'
]

df = df[~df['sentiment'].isin(ignored_items_list)]
df['sentiment'].value_counts()

neutral      8638
worry        8459
happiness    5209
sadness      5165
love         3842
Name: sentiment, dtype: int64

In [4]:
df['sentiment'] = '__label__' + df['sentiment'].astype(str)
df

Unnamed: 0,tweet_id,sentiment,content
1,1956967666,__label__sadness,Layin n bed with a headache ughhhh...waitin o...
2,1956967696,__label__sadness,Funeral ceremony...gloomy friday...
4,1956968416,__label__neutral,@dannycastillo We want to trade with someone w...
5,1956968477,__label__worry,Re-pinging @ghostridah14: why didn't you go to...
6,1956968487,__label__sadness,"I should be sleep, but im not! thinking about ..."
...,...,...,...
39995,1753918954,__label__neutral,@JohnLloydTaylor
39996,1753919001,__label__love,Happy Mothers Day All my love
39997,1753919005,__label__love,Happy Mother's Day to all the mommies out ther...
39998,1753919043,__label__happiness,@niariley WASSUP BEAUTIFUL!!! FOLLOW ME!! PEE...


In [5]:
df.sentiment.unique()

array(['__label__sadness', '__label__neutral', '__label__worry',
       '__label__love', '__label__happiness'], dtype=object)

In [6]:
df['sentiment_description'] = df['sentiment'] + ' ' + df['content']
df

Unnamed: 0,tweet_id,sentiment,content,sentiment_description
1,1956967666,__label__sadness,Layin n bed with a headache ughhhh...waitin o...,__label__sadness Layin n bed with a headache ...
2,1956967696,__label__sadness,Funeral ceremony...gloomy friday...,__label__sadness Funeral ceremony...gloomy fri...
4,1956968416,__label__neutral,@dannycastillo We want to trade with someone w...,__label__neutral @dannycastillo We want to tra...
5,1956968477,__label__worry,Re-pinging @ghostridah14: why didn't you go to...,__label__worry Re-pinging @ghostridah14: why d...
6,1956968487,__label__sadness,"I should be sleep, but im not! thinking about ...","__label__sadness I should be sleep, but im not..."
...,...,...,...,...
39995,1753918954,__label__neutral,@JohnLloydTaylor,__label__neutral @JohnLloydTaylor
39996,1753919001,__label__love,Happy Mothers Day All my love,__label__love Happy Mothers Day All my love
39997,1753919005,__label__love,Happy Mother's Day to all the mommies out ther...,__label__love Happy Mother's Day to all the mo...
39998,1753919043,__label__happiness,@niariley WASSUP BEAUTIFUL!!! FOLLOW ME!! PEE...,__label__happiness @niariley WASSUP BEAUTIFUL!...


In [7]:
import re

def preprocess_text(text):
    text = re.sub(r'[^\w\s\']', ' ', text)
    text = re.sub(r' +', ' ', text)
    return text.strip().lower()

df['sentiment_description'] = df['sentiment_description'].map(preprocess_text)
df

Unnamed: 0,tweet_id,sentiment,content,sentiment_description
1,1956967666,__label__sadness,Layin n bed with a headache ughhhh...waitin o...,__label__sadness layin n bed with a headache u...
2,1956967696,__label__sadness,Funeral ceremony...gloomy friday...,__label__sadness funeral ceremony gloomy friday
4,1956968416,__label__neutral,@dannycastillo We want to trade with someone w...,__label__neutral dannycastillo we want to trad...
5,1956968477,__label__worry,Re-pinging @ghostridah14: why didn't you go to...,__label__worry re pinging ghostridah14 why did...
6,1956968487,__label__sadness,"I should be sleep, but im not! thinking about ...",__label__sadness i should be sleep but im not ...
...,...,...,...,...
39995,1753918954,__label__neutral,@JohnLloydTaylor,__label__neutral johnlloydtaylor
39996,1753919001,__label__love,Happy Mothers Day All my love,__label__love happy mothers day all my love
39997,1753919005,__label__love,Happy Mother's Day to all the mommies out ther...,__label__love happy mother's day to all the mo...
39998,1753919043,__label__happiness,@niariley WASSUP BEAUTIFUL!!! FOLLOW ME!! PEE...,__label__happiness niariley wassup beautiful f...


In [8]:
df['sentiment_description'][1]

'__label__sadness layin n bed with a headache ughhhh waitin on your call'

In [9]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size=0.2)
train.shape, test.shape

((25050, 4), (6263, 4))

In [10]:
train.to_csv("Fast Text Data Files/sentiment.train", columns=['sentiment_description'], index=False, header=False)
test.to_csv("Fast Text Data Files/sentiment.test", columns=['sentiment_description'], index=False, header=False)

In [11]:
import fasttext
model = fasttext.train_supervised(input="Fast Text Data Files/sentiment.train")

Read 0M words
Number of words:  35105
Number of labels: 5
Progress: 100.0% words/sec/thread: 1061820 lr:  0.000000 avg.loss:  1.250116 ETA:   0h 0m 0s


In [12]:
model.test("Fast Text Data Files/sentiment.test")

(6263, 0.4418010538080792, 0.4418010538080792)

In [13]:
model.predict("@RobertF3 correct! I ADORE him. I just plucked him up and put him under my arm cuz he was cryin.  All better now! Hahaha")

(('__label__worry',), array([0.26392221]))

In [14]:
model.predict("I missed the bl***y bus!!!!!!!!")

(('__label__sadness',), array([0.81296849]))