# Import libraries

In [1]:
# drive access
from google.colab import drive
drive.mount('/content/drive')

# standard library
import numpy as np
import pandas as pd

# for audio
import librosa
from IPython.display import Audio

Mounted at /content/drive


# Read the transcription csv

In [2]:
transcription = pd.read_csv('/content/drive/MyDrive/266/Data/Clean_Data/Arctic/transcription.csv')

transcription.head()

Unnamed: 0,filename,transcription
0,a0001,"Author of the danger trail, Philip Steels, etc."
1,a0002,"Not at this particular case, Tom, apologized W..."
2,a0003,For the twentieth time that evening the two me...
3,a0004,"Lord, but I'm glad to see you again, Phil."
4,a0005,Will we ever forget it.


In [3]:
transcription.tail()

Unnamed: 0,filename,transcription
1127,b0535,He read his fragments aloud.
1128,b0536,Typhoid -- did I tell you.
1129,b0537,But she had become an automaton.
1130,b0538,"At the best, they were necessary accessories."
1131,b0539,"You were making them talk shop, Ruth charged him."


# Clean rms csv

In [4]:
rms = pd.read_csv('/content/drive/MyDrive/266/Data/Clean_Data/Arctic/auto_transcription/rms.csv')

rms.head()

Unnamed: 0,filename,auto_transcription
0,arctic_a0133.wav,PHILIP BEGAN TO FEEL THAT HE HAD FOOLISHLY OVE...
1,arctic_a0135.wav,I AM GOING TO SURPRISE FATHER AND YOU WILL GO ...
2,arctic_a0134.wav,HE OBEYED THE PRESSURE OF HER HAND
3,arctic_a0136.wav,ABOUT HIM EVERYWHERE WERE THE EVIDENCES OF LUX...
4,arctic_a0137.wav,THEN HE STEPPED BACK WITH A LOW CRY OF PLEASURE


In [5]:
rms['emotion'] = 'neutral'

rms.head()

Unnamed: 0,filename,auto_transcription,emotion
0,arctic_a0133.wav,PHILIP BEGAN TO FEEL THAT HE HAD FOOLISHLY OVE...,neutral
1,arctic_a0135.wav,I AM GOING TO SURPRISE FATHER AND YOU WILL GO ...,neutral
2,arctic_a0134.wav,HE OBEYED THE PRESSURE OF HER HAND,neutral
3,arctic_a0136.wav,ABOUT HIM EVERYWHERE WERE THE EVIDENCES OF LUX...,neutral
4,arctic_a0137.wav,THEN HE STEPPED BACK WITH A LOW CRY OF PLEASURE,neutral


### Add actor and gender attributes to the total csv

In [6]:
rms['actor'] = 'rms'
rms['gender'] = 'male'

rms.head()

Unnamed: 0,filename,auto_transcription,emotion,actor,gender
0,arctic_a0133.wav,PHILIP BEGAN TO FEEL THAT HE HAD FOOLISHLY OVE...,neutral,rms,male
1,arctic_a0135.wav,I AM GOING TO SURPRISE FATHER AND YOU WILL GO ...,neutral,rms,male
2,arctic_a0134.wav,HE OBEYED THE PRESSURE OF HER HAND,neutral,rms,male
3,arctic_a0136.wav,ABOUT HIM EVERYWHERE WERE THE EVIDENCES OF LUX...,neutral,rms,male
4,arctic_a0137.wav,THEN HE STEPPED BACK WITH A LOW CRY OF PLEASURE,neutral,rms,male


## Get the cleaned filenames

In [7]:
clean_filenames = []
for each in rms['filename']:
  clean_filenames.append(each.split('.')[0].split('_')[-1])

assert len(clean_filenames) == len(rms)

clean_filenames[:5]

['a0133', 'a0135', 'a0134', 'a0136', 'a0137']

In [8]:
rms['clean_filename'] = clean_filenames

rms.head()

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename
0,arctic_a0133.wav,PHILIP BEGAN TO FEEL THAT HE HAD FOOLISHLY OVE...,neutral,rms,male,a0133
1,arctic_a0135.wav,I AM GOING TO SURPRISE FATHER AND YOU WILL GO ...,neutral,rms,male,a0135
2,arctic_a0134.wav,HE OBEYED THE PRESSURE OF HER HAND,neutral,rms,male,a0134
3,arctic_a0136.wav,ABOUT HIM EVERYWHERE WERE THE EVIDENCES OF LUX...,neutral,rms,male,a0136
4,arctic_a0137.wav,THEN HE STEPPED BACK WITH A LOW CRY OF PLEASURE,neutral,rms,male,a0137


## Get the true transcriptions/labels based on cleaned filenames

In [9]:
labels = []

for row in rms['clean_filename']:
  labels.append(transcription[transcription['filename'] == row]['transcription'].item())

assert len(labels) == len(rms)

labels[:5]

['Philip began to feel that he had foolishly overestimated his strength.',
 'I am going to surprise father, and you will go with Pierre.',
 'He obeyed the pressure of her hand.',
 'About him, everywhere, were the evidences of luxury and of age.',
 'Then he stepped back with a low cry of pleasure.']

In [10]:
rms['label'] = labels

rms.head(10)

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label
0,arctic_a0133.wav,PHILIP BEGAN TO FEEL THAT HE HAD FOOLISHLY OVE...,neutral,rms,male,a0133,Philip began to feel that he had foolishly ove...
1,arctic_a0135.wav,I AM GOING TO SURPRISE FATHER AND YOU WILL GO ...,neutral,rms,male,a0135,"I am going to surprise father, and you will go..."
2,arctic_a0134.wav,HE OBEYED THE PRESSURE OF HER HAND,neutral,rms,male,a0134,He obeyed the pressure of her hand.
3,arctic_a0136.wav,ABOUT HIM EVERYWHERE WERE THE EVIDENCES OF LUX...,neutral,rms,male,a0136,"About him, everywhere, were the evidences of l..."
4,arctic_a0137.wav,THEN HE STEPPED BACK WITH A LOW CRY OF PLEASURE,neutral,rms,male,a0137,Then he stepped back with a low cry of pleasure.
5,arctic_a0138.wav,IN THE PICTURE HE SAW EACH MOMENT A GREATER RE...,neutral,rms,male,a0138,In the picture he saw each moment a greater re...
6,arctic_a0140.wav,ACCEPT A FATHER'S BLESSING AND WITH IT THIS,neutral,rms,male,a0140,"Accept a father's blessing, and with it, this."
7,arctic_a0139.wav,HE TOLD HIMSELF THAT AS HE WASHED HIMSELF AND ...,neutral,rms,male,a0139,He told himself that as he washed himself and ...
8,arctic_a0141.wav,IT SEEMS LIKE A STRANGE POINTING OF THE HAND O...,neutral,rms,male,a0141,It seems like a strange pointing of the hand o...
9,arctic_a0143.wav,AH I HAD FORGOTTEN HE EXCLAIMED,neutral,rms,male,a0143,"Ah, I had forgotten, he exclaimed."


## Confirm the first 5 characters of the auto transcription and label matches, or the last 5 characters of the auto transcription and label matches, if neither matches, manually investigate

### check the first 5 characters

In [11]:
front_assertions = []

for i, row in rms.iterrows():
  if str(row['auto_transcription'])[:5].lower() == str(row['label'])[:5].lower():
    front_assertions.append('Y')
  else:
    front_assertions.append('N')

assert len(front_assertions) == len(rms)

front_assertions[:5]

['Y', 'Y', 'Y', 'Y', 'Y']

In [12]:
rms['front_assertions'] = front_assertions

rms.head()

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions
0,arctic_a0133.wav,PHILIP BEGAN TO FEEL THAT HE HAD FOOLISHLY OVE...,neutral,rms,male,a0133,Philip began to feel that he had foolishly ove...,Y
1,arctic_a0135.wav,I AM GOING TO SURPRISE FATHER AND YOU WILL GO ...,neutral,rms,male,a0135,"I am going to surprise father, and you will go...",Y
2,arctic_a0134.wav,HE OBEYED THE PRESSURE OF HER HAND,neutral,rms,male,a0134,He obeyed the pressure of her hand.,Y
3,arctic_a0136.wav,ABOUT HIM EVERYWHERE WERE THE EVIDENCES OF LUX...,neutral,rms,male,a0136,"About him, everywhere, were the evidences of l...",Y
4,arctic_a0137.wav,THEN HE STEPPED BACK WITH A LOW CRY OF PLEASURE,neutral,rms,male,a0137,Then he stepped back with a low cry of pleasure.,Y


In [13]:
rms['front_assertions'].value_counts()

Y    1033
N      99
Name: front_assertions, dtype: int64

### check the last 5 characters

In [14]:
back_assertions = []

for i, row in rms.iterrows():
  if str(row['auto_transcription'])[-5:].lower() == str(row['label']).replace('.','')[-5:].lower():
      back_assertions.append('Y')
  else:
    back_assertions.append('N')

assert len(back_assertions) == len(rms)

back_assertions[:5]

['Y', 'Y', 'Y', 'Y', 'Y']

In [15]:
rms['back_assertions'] = back_assertions

rms.head()

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions,back_assertions
0,arctic_a0133.wav,PHILIP BEGAN TO FEEL THAT HE HAD FOOLISHLY OVE...,neutral,rms,male,a0133,Philip began to feel that he had foolishly ove...,Y,Y
1,arctic_a0135.wav,I AM GOING TO SURPRISE FATHER AND YOU WILL GO ...,neutral,rms,male,a0135,"I am going to surprise father, and you will go...",Y,Y
2,arctic_a0134.wav,HE OBEYED THE PRESSURE OF HER HAND,neutral,rms,male,a0134,He obeyed the pressure of her hand.,Y,Y
3,arctic_a0136.wav,ABOUT HIM EVERYWHERE WERE THE EVIDENCES OF LUX...,neutral,rms,male,a0136,"About him, everywhere, were the evidences of l...",Y,Y
4,arctic_a0137.wav,THEN HE STEPPED BACK WITH A LOW CRY OF PLEASURE,neutral,rms,male,a0137,Then he stepped back with a low cry of pleasure.,Y,Y


In [16]:
rms['back_assertions'].value_counts()

Y    1068
N      64
Name: back_assertions, dtype: int64

### manually check instances where either front and back assertions failed

In [17]:
rms[(rms['front_assertions'] == 'N') | (rms['back_assertions'] == 'N')]

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label,front_assertions,back_assertions
5,arctic_a0138.wav,IN THE PICTURE HE SAW EACH MOMENT A GREATER RE...,neutral,rms,male,a0138,In the picture he saw each moment a greater re...,Y,N
9,arctic_a0143.wav,AH I HAD FORGOTTEN HE EXCLAIMED,neutral,rms,male,a0143,"Ah, I had forgotten, he exclaimed.",N,Y
17,arctic_a0150.wav,GOOD BYE PIERRE HE SHOUTED,neutral,rms,male,a0150,"Goodbye, Pierre, he shouted.",N,Y
20,arctic_a0153.wav,MIC DUGEL TAPPED HIS FOREHEAD SUSPICIOUSLY WIT...,neutral,rms,male,a0153,MacDougall tapped his forehead suspiciously wi...,N,Y
31,arctic_a0164.wav,HE CAME FIRST A YEAR AGO AND REVEALED HIMSELF ...,neutral,rms,male,a0164,"He came first a year ago, and revealed himself...",Y,N
...,...,...,...,...,...,...,...,...,...
1109,arctic_a0110.wav,MUCH REPLIED GEAN ESTURSELEY,neutral,rms,male,a0110,"Much, replied Jeanne, as tersely.",N,N
1113,arctic_a0114.wav,HER WORDS SENT A STRANGE CHILL THROUGH FILIP,neutral,rms,male,a0114,Her words sent a strange chill through Philip.,Y,N
1118,arctic_a0119.wav,JEAN WAS TURNING THE BOW SHOREWARD,neutral,rms,male,a0119,Jeanne was turning the bow shoreward.,N,Y
1120,arctic_a0120.wav,MY RIGHT FOOT FEELS LIKE THAT OF A CHINESE DEB...,neutral,rms,male,a0120,My right foot feels like that of a Chinese deb...,Y,N


Manually checked instances where either the front or the back assertions failed, confirmed no mis-matched instances

# Export the cleaned rms csv to be merged to the main csv

In [18]:
rms.drop(columns=['front_assertions', 'back_assertions'], inplace=True)

rms.head()

Unnamed: 0,filename,auto_transcription,emotion,actor,gender,clean_filename,label
0,arctic_a0133.wav,PHILIP BEGAN TO FEEL THAT HE HAD FOOLISHLY OVE...,neutral,rms,male,a0133,Philip began to feel that he had foolishly ove...
1,arctic_a0135.wav,I AM GOING TO SURPRISE FATHER AND YOU WILL GO ...,neutral,rms,male,a0135,"I am going to surprise father, and you will go..."
2,arctic_a0134.wav,HE OBEYED THE PRESSURE OF HER HAND,neutral,rms,male,a0134,He obeyed the pressure of her hand.
3,arctic_a0136.wav,ABOUT HIM EVERYWHERE WERE THE EVIDENCES OF LUX...,neutral,rms,male,a0136,"About him, everywhere, were the evidences of l..."
4,arctic_a0137.wav,THEN HE STEPPED BACK WITH A LOW CRY OF PLEASURE,neutral,rms,male,a0137,Then he stepped back with a low cry of pleasure.


In [19]:
rms.to_csv('/content/drive/MyDrive/266/Data/Clean_Data/Arctic/rms.csv', index=False)