# Emotions expressed in short-form text on social media.


In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("text.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,text,label
0,0,i just feel really helpless and heavy hearted,4
1,1,ive enjoyed being able to slouch about relax a...,0
2,2,i gave up my internship with the dmrg and am f...,4
3,3,i dont know i feel so lost,0
4,4,i am a kindergarten teacher and i am thoroughl...,4


In [3]:
df.shape

(416809, 3)

In [4]:
df.columns

Index(['Unnamed: 0', 'text', 'label'], dtype='object')

In [5]:
df['label'].value_counts()

1    141067
0    121187
3     57317
4     47712
2     34554
5     14972
Name: label, dtype: int64

In [6]:
df = df.drop(['Unnamed: 0'],axis='columns')
df.shape

(416809, 2)

In [7]:
df.head()

Unnamed: 0,text,label
0,i just feel really helpless and heavy hearted,4
1,ive enjoyed being able to slouch about relax a...,0
2,i gave up my internship with the dmrg and am f...,4
3,i dont know i feel so lost,0
4,i am a kindergarten teacher and i am thoroughl...,4


In [8]:
df.label.replace(0, "sadness", inplace=True)
df.label.replace(1, "joy", inplace=True)
df.label.replace(2, "love", inplace=True)
df.label.replace(3, "anger", inplace=True)
df.label.replace(4, "fear", inplace=True)
df.label.replace(5, "surprise", inplace=True)
df.head()

Unnamed: 0,text,label
0,i just feel really helpless and heavy hearted,fear
1,ive enjoyed being able to slouch about relax a...,sadness
2,i gave up my internship with the dmrg and am f...,fear
3,i dont know i feel so lost,sadness
4,i am a kindergarten teacher and i am thoroughl...,fear


# Data Cleaning: Handle NA values

In [9]:
df.isnull().sum()

text     0
label    0
dtype: int64

# Pre-procesing

Remove punctuation
Remove extra space
Make the entire sentence lower case

In [10]:
import re
def preprocess(text):
    text = re.sub(r'[^\w\s\']',' ', text)
    text = re.sub(' +', ' ', text)
    return text.strip().lower() 

In [11]:
df['text'] = df['text'].map(preprocess)
df.head()


Unnamed: 0,text,label
0,i just feel really helpless and heavy hearted,fear
1,ive enjoyed being able to slouch about relax a...,sadness
2,i gave up my internship with the dmrg and am f...,fear
3,i dont know i feel so lost,sadness
4,i am a kindergarten teacher and i am thoroughl...,fear


In [12]:
df['label'] = '__label__' + df['label'].astype(str)
df['text'][2]

'i gave up my internship with the dmrg and am feeling distraught'

In [13]:
df['label_text'] = df['label'] + ' ' + df['text']
df.head(3)

Unnamed: 0,text,label,label_text
0,i just feel really helpless and heavy hearted,__label__fear,__label__fear i just feel really helpless and ...
1,ive enjoyed being able to slouch about relax a...,__label__sadness,__label__sadness ive enjoyed being able to slo...
2,i gave up my internship with the dmrg and am f...,__label__fear,__label__fear i gave up my internship with the...


In [14]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.2)

In [15]:
train.shape, test.shape

((333447, 3), (83362, 3))

In [16]:
train.to_csv("emotions.train", columns=["label_text"], index=False, header=False)
test.to_csv("emotions.test", columns=["label_text"], index=False, header=False)

In [17]:

import fasttext

model = fasttext.train_supervised(input="emotions.train")
model.test("emotions.test")

Read 7M words
Number of words:  67869
Number of labels: 6
Progress: 100.0% words/sec/thread: 2386408 lr:  0.000000 avg.loss:  0.300338 ETA:   0h 0m 0s


(83362, 0.8960437609462345, 0.8960437609462345)

In [18]:
model.predict('i feel scared')

(('__label__fear',), array([1.00001001]))

In [19]:
from imblearn.over_sampling import SMOTE

df['label'].value_counts()

__label__joy         141067
__label__sadness     121187
__label__anger        57317
__label__fear         47712
__label__love         34554
__label__surprise     14972
Name: label, dtype: int64

In [20]:

smote=SMOTE("minority")
X,Y=smote.fit_sample(df.text, df.label)

Y.value_counts()

TypeError: __init__() takes 1 positional argument but 2 were given