# Emotions Database Analysis

In [23]:
import pandas as pd
import matplotlib.pyplot as plt

In [15]:
emotions_df = pd.read_csv("../raw_data/emotions_db.csv", sep=";")
emotions_df.head(10)

Unnamed: 0,english_words,portuguese_words,positive,negative,anger,anticipation,disgust,fear,joy,sadness,surprise,trust,score_positive,score_negative
0,aback,surpresa,0,0,0,0,0,0,0,0,0,0,0,0
1,abacus,ábaco,0,0,0,0,0,0,0,0,0,1,1,0
2,abandon,abandono,0,1,0,0,0,1,0,1,0,0,0,3
3,abandoned,abandonado,0,1,1,0,0,1,0,1,0,0,0,4
4,abandonment,abandono,0,1,1,0,0,1,0,1,1,0,1,4
5,abate,diminuir,0,0,0,0,0,0,0,0,0,0,0,0
6,abatement,redução,0,0,0,0,0,0,0,0,0,0,0,0
7,abba,abba,1,0,0,0,0,0,0,0,0,0,1,0
8,abbot,abade,0,0,0,0,0,0,0,0,0,1,1,0
9,abbreviate,abreviar,0,0,0,0,0,0,0,0,0,0,0,0


In [26]:
feelings = ['anger', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'trust']

In [28]:
list(emotions_df.columns[])

['english_words',
 'portuguese_words',
 'positive',
 'negative',
 'anger',
 'anticipation',
 'disgust',
 'fear',
 'joy',
 'sadness',
 'surprise',
 'trust',
 'score_positive',
 'score_negative']

In [6]:
def count_emotion(emotions_df, emotion):
    count = emotions_df[emotion]
    return count[count == 1].count()

In [9]:
result_positive = count_emotion(emotions_df, "positive")
result_positive

2312

In [10]:
result_negative = count_emotion(emotions_df, "negative")
result_negative

3324

### Cleaning Data

Check if there are duplicated words

In [11]:
len(emotions_df) # Check number of rows before removing duplicates

14182

In [12]:
emotions_df = emotions_df.drop_duplicates() # Remove duplicates
len(emotions_df)# Check new number of rows

14182

Check if there are null values

In [13]:
emotions_df.isnull().sum().sort_values(ascending=False) #NaN count for each column

english_words       1
portuguese_words    0
positive            0
negative            0
anger               0
anticipation        0
disgust             0
fear                0
joy                 0
sadness             0
surprise            0
trust               0
score_positive      0
score_negative      0
dtype: int64

In [16]:
emotions_df.tail(1)

Unnamed: 0,english_words,portuguese_words,positive,negative,anger,anticipation,disgust,fear,joy,sadness,surprise,trust,score_positive,score_negative
14181,True,VERDADE,1,0,0,0,0,0,1,0,0,1,3,0


In [22]:
emotions_df = emotions_df.drop(emotions_df.index[len(emotions_df)-1])
emotions_df.tail(1)

Unnamed: 0,english_words,portuguese_words,positive,negative,anger,anticipation,disgust,fear,joy,sadness,surprise,trust,score_positive,score_negative
14178,zoological,zoológico,0,0,0,0,0,0,0,0,0,0,0,0


## Making some charts

In [None]:
# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = 'anger', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'trust'
sizes = [15, 30, 45, 10]
explode = (0, 0.1, 0, 0)  # only "explode" the 2nd slice (i.e. 'Hogs')

fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()