# Import libraries

In [1]:
# drive access
from google.colab import drive
drive.mount('/content/drive')

# standard library
import numpy as np
import pandas as pd

Mounted at /content/drive


# Define reusable functions

In [2]:
def front_assertion(df):
  front_assertions = []

  for i, row in df.iterrows():
    if str(row['auto_transcription'])[:5].lower() == str(row['label'])[:5].lower():
      front_assertions.append('Y')
    else:
      front_assertions.append('N')

  assert len(front_assertions) == len(df)
  df['front_assertions'] = front_assertions

In [3]:
def back_assertion(df):
  back_assertions = []

  for i, row in df.iterrows():
    if str(row['auto_transcription'])[-5:].lower() == str(row['label']).replace('.','')[-5:].lower():
        back_assertions.append('Y')
    else:
      back_assertions.append('N')

  assert len(back_assertions) == len(df)
  df['back_assertions'] = back_assertions

# Clean EmoV_DB main.csv

## Read csv file

In [4]:
emov_df = pd.read_csv('/content/drive/MyDrive/266/Data/Clean_Data/EmoV_DB/main.csv')

emov_df.head()

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label
0,amused_29-45_0042.wav,HOW COULD HE EXPLAIN HIS POSSESSION OF THE SKETCH,amused,bea,female,42,How could he explain his possession of the ske...
1,amused_46-56_0046.wav,THE GIRL FACED HIM HER EYES SHINING WITH SUDDE...,amused,bea,female,46,"The girl faced him, her eyes shining with sudd..."
2,amused_1-15_0005.wav,WILL WE EVER FORGET IT,amused,bea,female,5,Will we ever forget it.
3,amused_281-308_0281.wav,I DO NOT BLAME YOU FOR ANYTHING REMEMBER THAT,amused,bea,female,281,I do not blame you for anything; remember that.
4,amused_225-252_0226.wav,THAT CAME BEFORE MY A V CS,amused,bea,female,226,That came before my A B C's.


In [5]:
emov_df['emotion'].value_counts()

sleepy     1720
neutral    1568
amused     1317
angry      1268
disgust    1019
Name: emotion, dtype: int64

In [6]:
emov_df['actor'].value_counts()

sam      2453
jenie    1790
bea      1786
josh      863
Name: actor, dtype: int64

In [7]:
emov_df['gender'].value_counts()

female    3576
male      3316
Name: gender, dtype: int64

## Find instances of label with digits & replace with spelled out words

In [8]:
emov_df[emov_df['label'].str.contains(r'\d')]['label'].value_counts()

At sea, Wednesday, March 18, 1908.    9
At sea, Monday, March 16, 1908.       9
Name: label, dtype: int64

In [9]:
emov_df['label'] = emov_df['label'].str.replace('1908', 'nineteen o eight')
emov_df['label'] = emov_df['label'].str.replace('16', 'sixteenth')
emov_df['label'] = emov_df['label'].str.replace('18', 'eighteenth')

In [10]:
assert len(emov_df[emov_df['label'].str.contains(r'\d')]) == 0

## Inspect very noisy auto-transcription

In [11]:
front_assertion(emov_df)
back_assertion(emov_df)

emov_df.head()

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions,back_assertions
0,amused_29-45_0042.wav,HOW COULD HE EXPLAIN HIS POSSESSION OF THE SKETCH,amused,bea,female,42,How could he explain his possession of the ske...,Y,Y
1,amused_46-56_0046.wav,THE GIRL FACED HIM HER EYES SHINING WITH SUDDE...,amused,bea,female,46,"The girl faced him, her eyes shining with sudd...",Y,Y
2,amused_1-15_0005.wav,WILL WE EVER FORGET IT,amused,bea,female,5,Will we ever forget it.,Y,Y
3,amused_281-308_0281.wav,I DO NOT BLAME YOU FOR ANYTHING REMEMBER THAT,amused,bea,female,281,I do not blame you for anything; remember that.,Y,Y
4,amused_225-252_0226.wav,THAT CAME BEFORE MY A V CS,amused,bea,female,226,That came before my A B C's.,Y,N


In [12]:
emov_failed = emov_df[(emov_df['front_assertions'] == 'N') & (emov_df['back_assertions'] == 'N')]

emov_failed

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions,back_assertions
25,amused_85-112_0110.wav,MUCH REPLIED JEAN AS TERSLEY,amused,bea,female,110,"Much, replied Jeanne, as tersely.",N,N
71,amused_253-280_0253.wav,HE WAS THAT LIVED TO FOUND THE FAMILY OF THE P...,amused,bea,female,253,He it was that lived to found the family of th...,N,N
84,amused_1-15_0004.wav,LORD BUT I'M GLAD TO SEE YOU AGAIN VELL,amused,bea,female,4,"Lord, but I'm glad to see you again, Phil.",N,N
92,amused_196-224_0206.wav,HAHA HE SAYS HE BOUGHT HIM OF JACK LEBULL,amused,bea,female,206,He says he bought him of Jacques Le Beau.,N,N
94,amused_1-15_0015.wav,HA HA HA IT'S THE AURORA BOREALICE,amused,bea,female,15,It's the aurora borealis.,N,N
...,...,...,...,...,...,...,...,...,...
6882,sleepiness_393-420_0403.wav,IF YOU ARE CUNNING GAVE HIM POISE HEDTHE SHRAW,sleepy,sam,male,403,His newborn cunning gave him poise and control.,N,N
6884,sleepiness_393-420_0397.wav,SOLL ME THEN AGAIN HE MURMURED ITSTILATLY,sleepy,sam,male,397,"Call me that again, he murmured ecstatically.",N,N
6885,sleepiness_421-448_0439.wav,I'D SEE WHA SAY MARG EIGHTEENT LOTITO LA,sleepy,sam,male,439,"At sea, Wednesday, March eighteenth, nineteen ...",N,N
6887,sleepiness_477-504_0484.wav,NO CERENE,sleepy,sam,male,484,No-sir-ee.,N,N


We can see that Josh and Sam's sleepy and amused sets have many instances of both front and back assertion failures, let's take a closer look to those instances

In [13]:
emov_failed_grouped = emov_failed[['actor','emotion']].groupby(['actor','emotion']).value_counts()

emov_failed_grouped

actor  emotion
bea    amused      8
       angry       2
       neutral     2
       sleepy     11
jenie  amused      1
       angry       6
       disgust     2
       neutral     3
       sleepy     10
josh   amused     48
       neutral     3
       sleepy     54
sam    amused     49
       angry       9
       disgust    15
       neutral    13
       sleepy     60
dtype: int64

There are 296 instances where both front and back assertion failed, around 4% of the total 6.9K instances

In [14]:
emov_failed_grouped.sum()

296

In [15]:
len(emov_df)

6892

In [16]:
emov_failed_grouped.sum() / len(emov_df)

0.042948345908299476

A good number of those failed instances for Josh and Sam Amused is due to 'speech laugh' which will be handled as part of Step 2 in the pipeline. The auto transcription is noisy but not unrecognizable, okay to leave as is.

In [17]:
emov_failed[(emov_failed['actor'] == 'josh') &  (emov_failed['emotion'] == 'amused')]

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions,back_assertions
3581,amused_225_252_0240.wav,AA WASH YOUR HANDS WITH ME,amused,josh,male,240,Wash your hands of me.,N,N
3582,amused_74_84_0079.wav,ISA LORDGANOEL,amused,josh,male,79,It was a large canoe.,N,N
3587,amused_85-112_0099.wav,AM I HAD NY JOY FOUND IT IN HIS VERY,amused,josh,male,99,A maddening joy pounded in his brain.,N,N
3590,amused_27-56_0031.wav,T IFTY HUNDRED YARDS APARK,amused,josh,male,31,They were three hundred yards apart.,N,N
3591,amused_85-112_0089.wav,AN THAT CROWIS PRESIDENCS UPI,amused,josh,male,89,The nightglow was treacherous to shoot by.,N,N
3594,amused_225_252_0247.wav,THE ARIPASER SOMETIMES A DAY,amused,josh,male,247,They ought to pass here some time today.,N,N
3609,amused_1-28_0016.wav,IIS FOR CHURCHILL ARRIVELSTOV YOND THE VIRGO S...,amused,josh,male,16,"There's Fort Churchill, a rifle-shot beyond th...",N,N
3610,amused_85-112_0103.wav,HA HA HA HA THE CAME NO CAMS ON THE BUCK OF TH...,amused,josh,male,103,But there came no promise from the bow of the ...,N,N
3621,amused_281-308_0298.wav,THE CHASCTHET PROMISE OF CONTINUED ACQUAINTANC...,amused,josh,male,298,This tacit promise of continued acquaintance g...,N,N
3622,amused_281-308_0304.wav,NCRATUFATED LASS OF A GLASS A HA,amused,josh,male,304,I graduated last of my class.,N,N


In [18]:
emov_failed[(emov_failed['actor'] == 'sam') &  (emov_failed['emotion'] == 'amused')]

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions,back_assertions
4451,amused_113-140_0124.wav,HE WAS GIEF SIMMING SOFTLY OVER SON THE ROTS,amused,sam,male,124,It was Jeanne singing softly over beyond the r...,N,N
4453,amused_85-112_0097.wav,AND YOU CAN ARRANGE YOURSELF COMFORTABLY AMONG...,amused,sam,male,97,Then you can arrange yourself comfortably amon...,N,N
4464,amused_477-504_0501.wav,YOU JOKING ME SIR THE OTHER MANAGED CHARGE TIT...,amused,sam,male,501,"You're joking me, sir, the other managed to ar...",N,N
4465,amused_225-252_0234.wav,WHY THE AVERAGER VIEW IS MORE NAUTIATING THAN ...,amused,sam,male,234,"Why, the average review is more nauseating tha...",N,N
4467,amused_421-448_0424.wav,HOW HE IS LAING IT WAS A DISEASE HAS SYOULD BE...,amused,sam,male,424,"Obviously, it was a disease that could be cont...",N,N
4470,amused_421-448_0422.wav,HALF WAY AROUND THE TRACK ONE TOUH HE GOT INTO...,amused,sam,male,422,Halfway around the track one donkey got into a...,N,N
4497,amused_477-504_0483.wav,PE ON DISPUTE CORY HUTCHENSON AND MARRIED MABE...,amused,sam,male,483,"Beyond dispute, Corry Hutchinson had married M...",N,N
4511,amused_477-504_0478.wav,ALSO SHE HAS FORPIDDEN THOS SMOKING THEIR PATE...,amused,sam,male,478,"Also, she has forbidden them smoking their pip...",N,N
4521,amused_421-448_0423.wav,MAGVE HE WHEN HE RETURNS FROM A TRIP DOHATO LULU,amused,sam,male,423,McVeigh when he returns from a trip to Honolulu.,N,N
4525,amused_225-252_0251.wav,I I MAY MANAGED TO FREIGHT O CARTO BACK HIS WHILE,amused,sam,male,251,I may manage to freight a cargo back as well.,N,N


The sleepy instances are certainly more noisy than others, we can see some instances where the auto transcription is basically unrecognizable due to the actor yawning while speaking, especially at the beginning of the sentence. However, majority of them are still not 'too' noisy, and compared to ~10K total instances (EmoV_DB + Arctic), the very noisy ones are only a small fraction, propose to leave them as is.

In [19]:
emov_failed[(emov_failed['actor'] == 'josh') &  (emov_failed['emotion'] == 'sleepy')]

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions,back_assertions
4184,sleepiness_169-196_0195.wav,THE STRANGETI HAPPENS,sleepy,josh,male,195,But a strange thing happened.,N,N
4187,sleepiness_29-56_0056.wav,PERS'S LITTLE EYES WERE FIXED ON HIM SREWEDLY,sleepy,josh,male,56,Pearce's little eyes were fixed on him shrewdly.,N,N
4188,sleepiness_29-56_0039.wav,THEE SHIPS TO BE IN WITH IT A WEEK VERY INDY,sleepy,josh,male,39,The ship should be in within a week or ten days.,N,N
4198,sleepiness_197-224_0200.wav,HE LIVED AGAIN AND THE COAB GAUT A WOONCEMORE,sleepy,josh,male,200,"He leapt again, and the club caught him once m...",N,N
4204,sleepiness_253_280_0265.wav,THE SKOVER YE SEEM TO AVMEN NOW SPURROW OOLI,sleepy,josh,male,265,The discovery seemed to have been made on the ...,N,N
4205,sleepiness_253_280_0267.wav,EI HARDING ASKED THE SHANK SAW HER TO FALL,sleepy,josh,male,267,"Eli Harding asked, as Shunk started to follow.",N,N
4207,sleepiness_169-196_0192.wav,HEDID NOT RUSH HIM,sleepy,josh,male,192,He did not rush in.,N,N
4214,sleepiness_281-308_0308.wav,AS IF HER HERRY WER FEVEN KNOWING VACARLY,sleepy,josh,male,308,His infernal chattering worries me even now as...,N,N
4215,sleepiness_85-112_0095.wav,HER BIG GAVOUS TENT WAS THE FIRST THING TO COM...,sleepy,josh,male,95,A big canvas tent was the first thing to come ...,N,N
4219,sleepiness_253_280_0270.wav,AFTER THE BATH A SHAVE WOULD NOT BE SOBERRED,sleepy,josh,male,270,And after the bath a shave would not be bad.,N,N


In [20]:
emov_failed[(emov_failed['actor'] == 'sam') &  (emov_failed['emotion'] == 'sleepy')]

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions,back_assertions
6397,sleepiness_85-112_0110.wav,MUCH REPLIED JEAN AS TRUTHLY,sleepy,sam,male,110,"Much, replied Jeanne, as tersely.",N,N
6405,sleepiness_336-364_0355.wav,A PURSE OF LAUGHTER WITH HIS RUR,sleepy,sam,male,355,A burst of laughter was his reward.,N,N
6406,sleepiness_336-364_0341.wav,WHY TARGON YOU ALL SAGETGAN,sleepy,sam,male,341,"Why, doggone you all, shake again.",N,N
6420,sleepiness_336-364_0347.wav,FER GREATLY DELIGHTED WOR THIRTY THE THAT IS B...,sleepy,sam,male,347,They are greatly delighted with anything that ...,N,N
6437,sleepiness_336-364_0346.wav,SUT DOWN AN TOVEN,sleepy,sam,male,346,Get down and dig in.,N,N
6440,sleepiness_336-364_0363.wav,HIS THOT IT HAD SO DESPATCH THE MERGIET,sleepy,sam,male,363,It is not an attempt to smash the market.,N,N
6458,sleepiness_449-476_0467.wav,AT SO EARLY IN THE VOYAGE TOO,sleepy,sam,male,467,"And so early in the voyage, too.",N,N
6470,sleepiness_225-252_0226.wav,HATI BEFORE WY HE SEES,sleepy,sam,male,226,That came before my A B C's.,N,N
6474,sleepiness_225-252_0246.wav,YOUVE HEARD AOYS HOW HE WAS THE LOVER OF THE P...,sleepy,sam,male,246,You have heard always how he was the lover of ...,N,N
6494,sleepiness_253-280_0258.wav,SOTON SEY WATER THEY ALL SET THE HEAT RACE,sleepy,sam,male,258,Soaked in seawater they offset the heat rays.,N,N


# Clean Arctic main.csv

## Read csv file

In [21]:
arctic_df = pd.read_csv('/content/drive/MyDrive/266/Data/Clean_Data/Arctic/main.csv')

arctic_df.head()

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label
0,arctic_a0004.wav,LORD BUT I'M GLAD TO SEE YOU AGAIN PHIL,neutral,bdl,male,a0004,"Lord, but I'm glad to see you again, Phil."
1,arctic_a0050.wav,IN SPITE OF THEIR ABSURDITY THE WORDS AFFECTED...,neutral,bdl,male,a0050,In spite of their absurdity the words affected...
2,arctic_a0102.wav,HE WILL FOLLOW US SOON,neutral,bdl,male,a0102,He will follow us soon.
3,arctic_a0072.wav,BUT WHO WAS ILEN'S DOUBLE,neutral,bdl,male,a0072,But who was Eileen's double.
4,arctic_a0124.wav,IT WAS JEAN SINGING SOFTLY OVER BEYOND THE ROCKS,neutral,bdl,male,a0124,It was Jeanne singing softly over beyond the r...


In [22]:
arctic_df['emotion'].value_counts()

neutral    4528
Name: emotion, dtype: int64

In [23]:
arctic_df['actor'].value_counts()

bdl    1132
clb    1132
rms    1132
slt    1132
Name: actor, dtype: int64

In [24]:
arctic_df['gender'].value_counts()

male      2264
female    2264
Name: gender, dtype: int64

## Find instances of label with digits & replace with spelled out words

In [25]:
arctic_df[arctic_df['label'].str.contains(r'\d')]['label'].value_counts()

At sea, Monday, March 16, 1908.       4
At sea, Wednesday, March 18, 1908.    4
The 29th very foggy.                  4
At sea, Tuesday, March 17, 1908.      4
Name: label, dtype: int64

In [26]:
arctic_df['label'] = arctic_df['label'].str.replace('1908', 'nineteen o eight')
arctic_df['label'] = arctic_df['label'].str.replace('29th', 'twenty ninth')
arctic_df['label'] = arctic_df['label'].str.replace('16', 'sixteenth')
arctic_df['label'] = arctic_df['label'].str.replace('17', 'seventeenth')
arctic_df['label'] = arctic_df['label'].str.replace('18', 'eighteenth')

In [27]:
assert len(arctic_df[arctic_df['label'].str.contains(r'\d')]) == 0

## Inspect very noisy auto-transcription

In [28]:
front_assertion(arctic_df)
back_assertion(arctic_df)

arctic_df.head()

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions,back_assertions
0,arctic_a0004.wav,LORD BUT I'M GLAD TO SEE YOU AGAIN PHIL,neutral,bdl,male,a0004,"Lord, but I'm glad to see you again, Phil.",N,Y
1,arctic_a0050.wav,IN SPITE OF THEIR ABSURDITY THE WORDS AFFECTED...,neutral,bdl,male,a0050,In spite of their absurdity the words affected...,Y,Y
2,arctic_a0102.wav,HE WILL FOLLOW US SOON,neutral,bdl,male,a0102,He will follow us soon.,Y,Y
3,arctic_a0072.wav,BUT WHO WAS ILEN'S DOUBLE,neutral,bdl,male,a0072,But who was Eileen's double.,Y,Y
4,arctic_a0124.wav,IT WAS JEAN SINGING SOFTLY OVER BEYOND THE ROCKS,neutral,bdl,male,a0124,It was Jeanne singing softly over beyond the r...,Y,Y


In [29]:
arctic_failed = arctic_df[(arctic_df['front_assertions'] == 'N') & (arctic_df['back_assertions'] == 'N')]

arctic_failed

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions,back_assertions
207,arctic_a0484.wav,NO SUREE,neutral,bdl,male,a0484,No-sir-ee.,N,N
236,arctic_a0478.wav,ALSO SHE HAS FORBIDDEN THEM SMOKING THEIR PIPE...,neutral,bdl,male,a0478,"Also, she has forbidden them smoking their pip...",N,N
299,arctic_a0423.wav,MAKE VAY WHEN HE RETURNS FROM A TRIP TO HANNALULU,neutral,bdl,male,a0423,McVeigh when he returns from a trip to Honolulu.,N,N
369,arctic_a0551.wav,ALL IN APPEARANCE CAN KNOW AS MARAGE,neutral,bdl,male,a0551,All an appearance can know is mirage.,N,N
416,arctic_a0506.wav,THE PLAUDIN WAS LEAVING NEXT MORNING FOR HANALULU,neutral,bdl,male,a0506,The Claudine was leaving next morning for Hono...,N,N
442,arctic_a0523.wav,HEY COULD NOT CONTINUE THEIR METHOD OF PRODUCI...,neutral,bdl,male,a0523,They could not continue their method of produc...,N,N
673,arctic_b0087.wav,HEY WILL SEARCH OR US BETWEEN THEIR CAMP AND C...,neutral,bdl,male,b0087,They will search for us between their camp and...,N,N
675,arctic_b0156.wav,OR THAT REASON LIBEAU HAD CHOSEN HIM TO FIGHT ...,neutral,bdl,male,b0156,For that reason Le Beau had chosen him to figh...,N,N
1420,arctic_a0423.wav,MAC VAY WHEN HE RETURNS FROM A TRIP TO HANNALULU,neutral,clb,female,a0423,McVeigh when he returns from a trip to Honolulu.,N,N
1477,arctic_a0478.wav,ALSO SHE HAS FORBIDDEN THEM SMOKING THEIR PIPE...,neutral,clb,female,a0478,"Also, she has forbidden them smoking their pip...",N,N


We can see that the dataset is fairly clean without many noisy instances

In [30]:
arctic_failed_grouped = arctic_failed[['actor','emotion']].groupby(['actor','emotion']).value_counts()

arctic_failed_grouped

actor  emotion
bdl    neutral    8
clb    neutral    4
rms    neutral    5
slt    neutral    6
dtype: int64

There are 23 instances where both front and back assertion failed, around 0.5% of the total 4.5K instances

In [31]:
arctic_failed_grouped.sum()

23

In [32]:
len(arctic_df)

4528

In [33]:
arctic_failed_grouped.sum() / len(arctic_df)

0.0050795053003533566

# Merge EmoV_DB and Arctic to one dataset

In [34]:
assert emov_df.columns.equals(arctic_df.columns)

In [35]:
merged_df = pd.concat([emov_df, arctic_df])

merged_df.head()

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions,back_assertions
0,amused_29-45_0042.wav,HOW COULD HE EXPLAIN HIS POSSESSION OF THE SKETCH,amused,bea,female,42,How could he explain his possession of the ske...,Y,Y
1,amused_46-56_0046.wav,THE GIRL FACED HIM HER EYES SHINING WITH SUDDE...,amused,bea,female,46,"The girl faced him, her eyes shining with sudd...",Y,Y
2,amused_1-15_0005.wav,WILL WE EVER FORGET IT,amused,bea,female,5,Will we ever forget it.,Y,Y
3,amused_281-308_0281.wav,I DO NOT BLAME YOU FOR ANYTHING REMEMBER THAT,amused,bea,female,281,I do not blame you for anything; remember that.,Y,Y
4,amused_225-252_0226.wav,THAT CAME BEFORE MY A V CS,amused,bea,female,226,That came before my A B C's.,Y,N


In [36]:
len(merged_df)

11420

In [37]:
# drop the assertion columns
merged_df.drop(columns=['front_assertions', 'back_assertions'], inplace=True)

merged_df.head()

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label
0,amused_29-45_0042.wav,HOW COULD HE EXPLAIN HIS POSSESSION OF THE SKETCH,amused,bea,female,42,How could he explain his possession of the ske...
1,amused_46-56_0046.wav,THE GIRL FACED HIM HER EYES SHINING WITH SUDDE...,amused,bea,female,46,"The girl faced him, her eyes shining with sudd..."
2,amused_1-15_0005.wav,WILL WE EVER FORGET IT,amused,bea,female,5,Will we ever forget it.
3,amused_281-308_0281.wav,I DO NOT BLAME YOU FOR ANYTHING REMEMBER THAT,amused,bea,female,281,I do not blame you for anything; remember that.
4,amused_225-252_0226.wav,THAT CAME BEFORE MY A V CS,amused,bea,female,226,That came before my A B C's.


In [38]:
# clean-up the orders of the columns
ordered_cols = ['filename','clean_filename','actor','gender','emotion','auto_transcription','label']

merged_df = merged_df[ordered_cols]

merged_df.head()

Unnamed: 0,filename,clean_filename,actor,gender,emotion,auto_transcription,label
0,amused_29-45_0042.wav,42,bea,female,amused,HOW COULD HE EXPLAIN HIS POSSESSION OF THE SKETCH,How could he explain his possession of the ske...
1,amused_46-56_0046.wav,46,bea,female,amused,THE GIRL FACED HIM HER EYES SHINING WITH SUDDE...,"The girl faced him, her eyes shining with sudd..."
2,amused_1-15_0005.wav,5,bea,female,amused,WILL WE EVER FORGET IT,Will we ever forget it.
3,amused_281-308_0281.wav,281,bea,female,amused,I DO NOT BLAME YOU FOR ANYTHING REMEMBER THAT,I do not blame you for anything; remember that.
4,amused_225-252_0226.wav,226,bea,female,amused,THAT CAME BEFORE MY A V CS,That came before my A B C's.


In [39]:
merged_df['emotion'].value_counts()

neutral    6096
sleepy     1720
amused     1317
angry      1268
disgust    1019
Name: emotion, dtype: int64

In [40]:
merged_df['gender'].value_counts()

female    5840
male      5580
Name: gender, dtype: int64

In [41]:
merged_df['label'].value_counts()

How could he explain his possession of the sketch.                       22
In spite of their absurdity the words affected Philip curiously.         22
Her own betrayal of herself was like tonic to Philip.                    22
I had faith in them.                                                     22
They were three hundred yards apart.                                     22
                                                                         ..
They laughed like two happy children.                                     4
Besides, had he not whipped the big owl in the forest.                    4
You're a devil for fighting, and will surely win.                         4
One guess will do, Ernest retorted.                                       4
It would give me nervous prostration. She said with chattering teeth.     1
Name: label, Length: 1133, dtype: int64

# Export the merged csv to disk

In [42]:
merged_df.to_csv('/content/drive/MyDrive/266/Data/Clean_Data/EmoV_Arctic/merged.csv', index=False)