# Balancing the Dataset

## Installing the Emotion dataset from Hugging Face

In [1]:
!pip install datasets



In [2]:
from datasets import load_dataset
import pandas as pd
import numpy as np

emotion_dataset = load_dataset("emotion")

Using custom data configuration default
Reusing dataset emotion (C:\Users\valsa\.cache\huggingface\datasets\emotion\default\0.0.0\348f63ca8e27b3713b6c04d723efe6d824a56fb3d1449794716c0f0296072705)


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
emotion_train = emotion_dataset['train']
print(emotion_train[0])
print(emotion_train.column_names)
print(emotion_train.features)

{'text': 'i didnt feel humiliated', 'label': 0}
['text', 'label']
{'text': Value(dtype='string', id=None), 'label': ClassLabel(num_classes=6, names=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'], names_file=None, id=None)}


In [4]:
emotion_dataset.set_format(type="pandas")
df1 = emotion_dataset["train"][:]
test = emotion_dataset["test"][:]
val = emotion_dataset["validation"][:]

## Renaming the Columns to Match ISEAR Dataset and Changing Emotion Labels

In [5]:
df1 = df1.rename(columns={'text':'sentence','label':'emotion'})

In [6]:
df1 = df1[["emotion", "sentence"]]

In [7]:
df1.emotion.unique()

array([0, 3, 2, 5, 4, 1], dtype=int64)

In [8]:
df1 = df1.replace([0,1,2,3,4,5],['sadness','joy','love','anger','fear','surprise'])

## Loading in ISEAR Dataset

In [9]:
df2 = pd.read_csv('G:\My Drive\AnjanaValsalan_ECNG 3020\Implementation Files\ISEAR csv\ISEAR.csv')

In [10]:
df2.dropna(axis=1, how="any", thresh=None, subset=None, inplace=True)

## Merging Isear Dataframe with Hugging Face Dataframe

In [11]:
frames = [df1, df2]
df = pd.concat(frames)

In [12]:
df = df.reset_index(drop=True)

## Deleting the Shame and Surprise Emotions

In [13]:
df = df[df.emotion != 'shame']

In [14]:
df = df[df.emotion != 'surprise']

In [15]:
df.emotion.unique()

array(['sadness', 'anger', 'love', 'fear', 'joy', 'disgust', 'guilt'],
      dtype=object)

In [16]:
df.groupby(['emotion']).count()

Unnamed: 0_level_0,sentence
emotion,Unnamed: 1_level_1
anger,3238
disgust,1066
fear,3013
guilt,1050
joy,6454
love,1304
sadness,5748


## Removing Sadness Data

In [17]:
sad_df = df.loc[df['emotion'] == 'sadness']

In [18]:
sad_list = sad_df.values.tolist()

In [19]:
rand = np.random.RandomState(32).permutation(len(sad_list))

In [20]:
sad_array = np.array(sad_df)[rand]

In [21]:
sad = sad_array[0: 1050]

In [22]:
sad_list = sad.tolist()

In [23]:
sad_df = pd.DataFrame(sad_list, columns=['emotion','sentence'])

## Removing Anger Data

In [24]:
ang_df = df.loc[df['emotion'] == 'anger']

In [25]:
ang_list = ang_df.values.tolist()

In [26]:
rand = np.random.RandomState(32).permutation(len(ang_list))

In [27]:
ang_array = np.array(ang_df)[rand]

In [28]:
ang = ang_array[0: 1050]

In [29]:
ang_list = ang.tolist()

In [30]:
ang_df = pd.DataFrame(ang_list, columns=['emotion','sentence'])

## Removing Disgust Data

In [31]:
dis_df = df.loc[df['emotion'] == 'disgust']

In [32]:
dis_list = dis_df.values.tolist()

In [33]:
rand = np.random.RandomState(32).permutation(len(dis_list))

In [34]:
dis_array = np.array(dis_df)[rand]

In [35]:
dis = dis_array[0: 1050]

In [36]:
dis_list = dis.tolist()

In [37]:
dis_df = pd.DataFrame(dis_list, columns=['emotion','sentence'])

In [38]:
dis_df

Unnamed: 0,emotion,sentence
0,disgust,I felt disgusted when I found out that someone...
1,disgust,The physical appearance of a meal.
2,disgust,Disgust with drunk people.
3,disgust,Being mistreated by my former primary school t...
4,disgust,The male friend of my mother (a widow) moved i...
...,...,...
1045,disgust,When a close relative got quite drunk. It was ...
1046,disgust,When I saw hundreds of snakes twisting over on...
1047,disgust,A colleague of mine asked a lecturer to put hi...
1048,disgust,I think I have hardly had a situation where I ...


## Removing Fear Data

In [39]:
fear_df = df.loc[df['emotion'] == 'fear']

In [40]:
fear_list = fear_df.values.tolist()

In [41]:
rand = np.random.RandomState(32).permutation(len(fear_list))

In [42]:
fear_array = np.array(fear_df)[rand]

In [43]:
fear = fear_array[0: 1050]

In [44]:
fear_list = fear.tolist()

In [45]:
fear_df = pd.DataFrame(fear_list, columns=['emotion','sentence'])

In [46]:
fear_df

Unnamed: 0,emotion,sentence
0,fear,i begun to feel distressed for you
1,fear,i feel like a person who tortured somebody bec...
2,fear,I was driving very fast on a small road and I ...
3,fear,i feel kind of reluctant and depressed when yo...
4,fear,i feel scared that i own it
...,...,...
1045,fear,i think my feelings remix is the result of how...
1046,fear,i was also feeling anxious around some of the ...
1047,fear,"I felt fear once, when the car in which I trav..."
1048,fear,i think that they pop up so automatically beca...


## Removing Guilt Data

In [47]:
guilt_df = df.loc[df['emotion'] == 'guilt']

In [48]:
guilt_list = fear_df.values.tolist()

In [49]:
rand = np.random.RandomState(32).permutation(len(guilt_list))

In [50]:
guilt_array = np.array(guilt_df)[rand]

In [51]:
guilt = guilt_array[0: 1050]

In [52]:
guilt_list = guilt.tolist()

In [53]:
guilt_df = pd.DataFrame(guilt_list, columns=['emotion','sentence'])

In [54]:
guilt_df

Unnamed: 0,emotion,sentence
0,guilt,Squeezed the puppy in the door.
1,guilt,I got up late and I could not go to my lessons...
2,guilt,I used to have two boyfriends in the past. The...
3,guilt,When I sided with my mother in a quarrel she h...
4,guilt,One day I was chatting with children at home. ...
...,...,...
1045,guilt,When I was 4-5 years old my mother gave me som...
1046,guilt,When I was in the puberty I smashed the favour...
1047,guilt,"Car accident. I let a 16-year old, incompetent..."
1048,guilt,I felt guilty when one of my secondary teacher...


## Removing Joy Data

In [55]:
joy_df = df.loc[df['emotion'] == 'joy']

In [56]:
joy_list = joy_df.values.tolist()

In [57]:
rand = np.random.RandomState(32).permutation(len(joy_list))

In [None]:
joy_array = np.array(joy_df)[rand]