# Configuration

NOTES: The warnings after the import are referred to the fact that Tensorflow 2.x versions are built to directly look for a GPU in the system. The warning can be forgot if you are not going to use the GPU. 

In [1]:
LENGTH_CHOSEN = 126520

In [None]:
!pip install fsspec

In [None]:
!virtualenv myenv

In [None]:
!python3 -m venv myenv

In [2]:
!source myenv/bin/activate

In [3]:
!pip install seaborn



In [4]:
!pip install ipywidgets



In [67]:
import os
import librosa
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
sns.set_style('whitegrid')
import IPython.display as ipd
import librosa.display
import numpy as np
import pickle
import scipy
import ipywidgets
import math

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering

from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Conv2D, AveragePooling1D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import regularizers

# from livelossplot import PlotLossesKeras
tf.config.list_physical_devices('GPU')

[]

# Get data from datasets

In [68]:
main_path = '/media/helemanc/OS/Users/i2CAT/Desktop/Datasets SER/'
TESS = os.path.join(main_path, "tess/TESS Toronto emotional speech set data/") 
RAV = os.path.join(main_path, "ravdess-emotional-speech-audio/audio_speech_actors_01-24")
SAVEE = os.path.join(main_path, "savee/ALL/")
CREMA = os.path.join(main_path, "creamd/AudioWAV/")

dir_list = os.listdir(RAV)

## RADVESS

In [69]:
lst = []
emotion = []
voc_channel = []
full_path = []
modality = []
intensity = []
actors = []
phrase =[]

for root, dirs, files in tqdm(os.walk(RAV)):
    for file in files:
        try:
            #Load librosa array, obtain mfcss, store the file and the mfcss information in a new array
            # X, sample_rate = librosa.load(os.path.join(root,file), res_type='kaiser_fast')
            # mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0) 
            # The instruction below converts the labels (from 1 to 8) to a series from 0 to 7
            # This is because our predictor needs to start from 0 otherwise it will try to predict also 0.
           
            modal = int(file[1:2])
            vchan = int(file[4:5])
            lab = int(file[7:8])
            ints = int(file[10:11])
            phr = int(file[13:14])
            act = int(file[18:20])
            # arr = mfccs, lab
            # lst.append(arr)
            
            modality.append(modal)
            voc_channel.append(vchan)
            emotion.append(lab) #only labels
            intensity.append(ints)
            phrase.append(phr)
            actors.append(act)
            
            full_path.append((root, file)) # only files
          # If the file is not valid, skip it
        except ValueError:
            continue

25it [00:00, 1079.48it/s]


In [70]:
# 01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised
emotions_list = ['neutral', 'calm', 'happy', 'sadness', 'angry', 'fear', 'disgust', 'surprise']
emotion_dict = {em[0]+1:em[1] for em in enumerate(emotions_list)}

df = pd.DataFrame([emotion, voc_channel, modality, intensity, actors, actors,phrase, full_path]).T
df.columns = ['emotion', 'voc_channel', 'modality', 'intensity', 'actors', 'gender', 'phrase', 'path']
df['emotion'] = df['emotion'].map(emotion_dict)
df['voc_channel'] = df['voc_channel'].map({1: 'speech', 2:'song'})
df['modality'] = df['modality'].map({1: 'full AV', 2:'video only', 3:'audio only'})
df['intensity'] = df['intensity'].map({1: 'normal', 2:'strong'})
df['actors'] = df['actors']
df['gender'] = df['actors'].apply(lambda x: 'female' if x%2 == 0 else 'male')
df['phrase'] = df['phrase'].map({1: 'Kids are talking by the door', 2:'Dogs are sitting by the door'})
df['path'] = df['path'].apply(lambda x: x[0] + '/' + x[1])

In [71]:
# remove files with noise to apply the same noise to all files for data augmentation 
df = df[~df.path.str.contains('noise')]

In [72]:
df.head()

Unnamed: 0,emotion,voc_channel,modality,intensity,actors,gender,phrase,path
0,disgust,speech,audio only,normal,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,disgust,speech,audio only,strong,1,male,Kids are talking by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,disgust,speech,audio only,strong,1,male,Kids are talking by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
6,disgust,speech,audio only,strong,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
8,disgust,speech,audio only,strong,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [73]:
# only speech
RAV_df = df
RAV_df = RAV_df.loc[RAV_df.voc_channel == 'speech']

In [74]:
RAV_df.insert(0, "emotion_label", RAV_df.emotion, True)

In [75]:
RAV_df = RAV_df.drop(['emotion', 'voc_channel', 'modality', 'intensity', 'phrase'], 1)

In [76]:
RAV_df

Unnamed: 0,emotion_label,actors,gender,path
0,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
6,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
8,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
...,...,...,...,...
2871,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2873,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2875,calm,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2877,calm,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [77]:
RAV_train = []
RAV_val = []
RAV_test = []

In [78]:
for index, row in RAV_df.iterrows():
    if row['actors'] in range(1,21): 
        RAV_train.append(row) 
    elif row['actors'] in range(21,23): 
        RAV_val.append(row)
    elif row['actors'] in range(23,25): 
        RAV_test.append(row)
len(RAV_train), len(RAV_val), len(RAV_test)

(1200, 120, 120)

In [79]:
RAV_train = pd.DataFrame(RAV_train)
RAV_val = pd.DataFrame(RAV_val)
RAV_test = pd.DataFrame(RAV_test)

In [80]:
RAV_train = RAV_train.drop(['actors'], 1)
RAV_val = RAV_val.drop(['actors'], 1)
RAV_test = RAV_test.drop(['actors'], 1)

In [81]:
RAV_train.head()

Unnamed: 0,emotion_label,gender,path
0,disgust,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,disgust,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,disgust,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
6,disgust,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
8,disgust,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [82]:
RAV_val.head()

Unnamed: 0,emotion_label,gender,path
2400,angry,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2402,fear,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2404,fear,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2406,fear,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2408,fear,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [274]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/pre-processed"
RAV_df.to_csv(os.path.join(preprocess_path,"RAV_df.csv"), index=False)

## SAVEE

In [83]:
# Get the data location for SAVEE
dir_list = os.listdir(SAVEE)

# parse the filename to get the emotions
emotion=[]
path = []
actors = []
gender = []
for i in dir_list:
    actors.append(i[:2])
    if i[-8:-6]=='_a':
        emotion.append('angry')
        gender.append('male')
    elif i[-8:-6]=='_d':
        emotion.append('disgust')
        gender.append('male')
    elif i[-8:-6]=='_f':
        emotion.append('fear')
        gender.append('male')
    elif i[-8:-6]=='_h':
        emotion.append('happy')
        gender.append('male')
    elif i[-8:-6]=='_n':
        emotion.append('neutral')
        gender.append('male')
    elif i[-8:-6]=='sa':
        emotion.append('sadness')
        gender.append('male')
    elif i[-8:-6]=='su':
        emotion.append('surprise')
        gender.append('male') 
    else:
        emotion.append('Unknown') 
    path.append(SAVEE + i)
    
# Now check out the label count distribution 
SAVEE_df = pd.DataFrame(emotion, columns = ['emotion_label'])
                      
SAVEE_df = pd.concat([SAVEE_df,
                      pd.DataFrame(actors, columns = ['actors']),
                      pd.DataFrame(gender, columns = ['gender']), 
                      pd.DataFrame(path, columns = ['path'])], axis = 1)
SAVEE_df.emotion_label.value_counts()

neutral     120
sadness      60
surprise     60
happy        60
disgust      60
fear         60
angry        60
Name: emotion_label, dtype: int64

In [84]:
SAVEE_df.head()

Unnamed: 0,emotion_label,actors,gender,path
0,neutral,DC,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
1,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
3,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [85]:
SAVEE_train = []
SAVEE_val = []
SAVEE_test = []

In [86]:
#DC, JE, JK, KL
for index, row in SAVEE_df.iterrows(): 
    if row['actors'] == 'DC' or row ['actors'] == 'JE':
        SAVEE_train.append(row)
    elif row['actors'] == 'JK': 
        SAVEE_val.append(row)
    else: 
        SAVEE_test.append(row)
len(SAVEE_train), len(SAVEE_val), len(SAVEE_test)

(240, 120, 120)

In [87]:
SAVEE_train = pd.DataFrame(SAVEE_train)
SAVEE_val = pd.DataFrame(SAVEE_val)
SAVEE_test = pd.DataFrame(SAVEE_test)

In [88]:
SAVEE_train = SAVEE_train.drop(['actors'], 1)
SAVEE_val = SAVEE_val.drop(['actors'], 1)
SAVEE_test = SAVEE_test.drop(['actors'], 1)

In [275]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/pre-processed"
SAVEE_df.to_csv(os.path.join(preprocess_path,"SAVEE_df.csv"), index=False)

## TESS

In [89]:
dir_list = os.listdir(TESS)
dir_list.sort()
dir_list

path = []
emotion = []
gender = []
actors = []

for i in dir_list:
    fname = os.listdir(TESS + i)
    for f in fname:
        if i == 'OAF_angry':
            emotion.append('angry')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_angry': 
            emotion.append('angry')
            gender.append('female')
            actors.append('YAF')
            
            
        elif i == 'OAF_disgust' :
            emotion.append('disgust')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_disgust': 
            emotion.append('disgust')
            gender.append('female')
            actors.append('YAF')
            
            
        elif i == 'OAF_Fear':
            emotion.append('fear')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_fear': 
            emotion.append('fear')
            gender.append('female')
            actors.append('YAF') 
            
            
        elif i == 'OAF_happy' :
            emotion.append('happy')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_happy': 
            emotion.append('angry')
            gender.append('female')
            actors.append('YAF')            
            
        elif i == 'OAF_neutral':
            emotion.append('neutral')
            gender.append('female')
            actors.append('OAF')   
        elif i == 'YAF_neutral': 
            emotion.append('neutral')
            gender.append('female')
            actors.append('YAF')      
            
                
        elif i == 'OAF_Pleasant_surprise':
            emotion.append('surprise')
            gender.append('female')
            actors.append('OAF')
        
        elif i == 'YAF_pleasant_surprised': 
            emotion.append('surprise')
            gender.append('female')
            actors.append('YAF')            
            
        elif i == 'OAF_Sad':
            emotion.append('sadness')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_sad': 
            emotion.append('sadness')
            gender.append('female')
            actors.append('YAF')            
        else:
            emotion.append('Unknown')
        path.append(TESS + i + "/" + f)

TESS_df = pd.DataFrame(emotion, columns = ['emotion_label'])
TESS_df = pd.concat([TESS_df, pd.DataFrame(gender, columns = ['gender']), 
                     pd.DataFrame(actors, columns= ['actors']),
                     pd.DataFrame(path, columns = ['path'])],axis=1)
TESS_df.emotion_label.value_counts()

angry       1200
fear         800
surprise     800
sadness      800
disgust      800
neutral      800
happy        400
Name: emotion_label, dtype: int64

In [90]:
TESS_df= TESS_df[~TESS_df.path.str.contains('noise')]

In [91]:
TESS_train = []
TESS_test = []

In [92]:
for index, row in TESS_df.iterrows(): 
    if row['actors'] == 'YAF': 
        TESS_train.append(row)
    else: 
        TESS_test.append(row)
len(TESS_train), len(TESS_test)

(1400, 1400)

In [93]:
TESS_train = pd.DataFrame(TESS_train)
TESS_test = pd.DataFrame(TESS_test)

In [125]:
TESS_train = TESS_train.drop(['actors'], 1)
TESS_test = TESS_test.drop(['actors'], 1)

In [276]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/pre-processed"
TESS_df.to_csv(os.path.join(preprocess_path,"TESS_df.csv"), index=False)

## CREMA-D

In [94]:
males = [1,
5,
11,
14,
15,
16,
17,
19,
22,
23,
26,
27,
31,
32,
33,
34,
35,
36,
38,
39,
41,
42,
44,
45,
48,
50,
51,
57,
59, 
62, 
64,
65, 
66,
67,
68,
69,
70,
71,
77, 
80, 
81, 
83, 
85, 
86, 
87,
88, 
90]

In [95]:
females = [ 2,
3,
4,
6,
7,
8,
9,
10,
12,
13,
18,
20,
21,
24,
25,
28,
29,
30,
37,
40,
43,
46,
47,
49,
52,
53,
54,
55,
56, 
58, 
60,
61,
63,
72, 
73, 
74, 
75, 
76, 
78, 
79, 
82, 
84, 
89, 
91]

In [96]:
crema_directory_list = os.listdir(CREMA)

file_emotion = []
file_path = []
actors = []
gender = []




for file in crema_directory_list:

    # storing file emotions
    part=file.split('_')
    
    # use only high intensity files
    if "HI" in part[3] :
        actor = part[0][2:]
        actors.append(actor)
        if int(actor) in males:
            gender.append('male')
        else: 
            gender.append('female')
    
        # storing file paths
        file_path.append(CREMA + file)
        if part[2] == 'SAD':
            file_emotion.append('sadness')
        elif part[2] == 'ANG':
            file_emotion.append('angry')
        elif part[2] == 'DIS':
            file_emotion.append('disgust')
        elif part[2] == 'FEA':
            file_emotion.append('fear')
        elif part[2] == 'HAP':
            file_emotion.append('happy')
        elif part[2] == 'NEU':
            file_emotion.append('neutral')
        else:
            file_emotion.append('Unknown')

# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['emotion_label'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['path'])
actors_df = pd.DataFrame(actors, columns=['actors'])
gender_df = pd.DataFrame(gender, columns=['gender'])                      
Crema_df = pd.concat([emotion_df, actors_df, gender_df, path_df], axis=1)
Crema_df.head()

Unnamed: 0,emotion_label,actors,gender,path
0,happy,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
1,sadness,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,angry,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
3,disgust,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,fear,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [97]:
Crema_df.shape

(455, 4)

In [98]:
actor_files = {}

for index, row in Crema_df.iterrows():
    actor = row['actors']
    if actor not in actor_files.keys(): 
        actor_files[actor] = 1
    else: 
        actor_files[actor]+=1

In [99]:
actor_files

{'91': 5,
 '90': 5,
 '89': 5,
 '88': 5,
 '87': 5,
 '86': 5,
 '85': 5,
 '84': 5,
 '83': 5,
 '82': 5,
 '81': 5,
 '80': 5,
 '79': 5,
 '78': 5,
 '77': 5,
 '76': 5,
 '75': 5,
 '74': 5,
 '73': 5,
 '72': 5,
 '71': 5,
 '70': 5,
 '69': 5,
 '68': 5,
 '67': 5,
 '66': 5,
 '65': 5,
 '64': 5,
 '63': 5,
 '62': 5,
 '61': 5,
 '60': 5,
 '59': 5,
 '58': 5,
 '57': 5,
 '56': 5,
 '55': 5,
 '54': 5,
 '53': 5,
 '52': 5,
 '51': 5,
 '50': 5,
 '49': 5,
 '48': 5,
 '47': 5,
 '46': 5,
 '45': 5,
 '44': 5,
 '43': 5,
 '42': 5,
 '41': 5,
 '40': 5,
 '39': 5,
 '38': 5,
 '37': 5,
 '36': 5,
 '35': 5,
 '34': 5,
 '33': 5,
 '32': 5,
 '31': 5,
 '30': 5,
 '29': 5,
 '28': 5,
 '27': 5,
 '26': 5,
 '25': 5,
 '24': 5,
 '23': 5,
 '22': 5,
 '21': 5,
 '20': 5,
 '19': 5,
 '18': 5,
 '17': 5,
 '16': 5,
 '15': 5,
 '14': 5,
 '13': 5,
 '12': 5,
 '11': 5,
 '10': 5,
 '09': 5,
 '08': 5,
 '07': 5,
 '06': 5,
 '05': 5,
 '04': 5,
 '03': 5,
 '02': 5,
 '01': 5}

In [100]:
count_males = 0 
count_females = 0 
male_list = []
for index, row in Crema_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if gender == 'male':
        count_males +=1
        if actor not in male_list: 
            male_list.append(actor)
    else: 
        count_females +=1

In [101]:
count_males, count_females

(235, 220)

Since there are more males than females we will remove randomly 3 male actors (since there are exactly 5 audio files per actor)

In [102]:
import random 
random.seed(42)
males_to_remove = random.sample(male_list, 3)
males_to_remove

['17', '80', '88']

In [103]:
new_df = []
for index, row in Crema_df.iterrows(): 
    if row['actors'] not in males_to_remove: 
        new_df.append(row)

In [104]:
CREMA_df = pd.DataFrame(new_df)

In [105]:
for index, row in CREMA_df.iterrows(): 
    if row['actors'] == '17': 
        print("Elements not removed")

In [106]:
count_males = 0 
count_females = 0 
male_list = []
female_list = []
for index, row in CREMA_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if gender == 'male':
        count_males +=1
        if actor not in male_list: 
            male_list.append(actor)
    else: 
        count_females +=1
        if actor not in female_list: 
            female_list.append(actor)

In [107]:
count_males, count_females

(220, 220)

In [108]:
len(female_list)

44

In [109]:
len(male_list)

44

In [110]:
CREMA_train = []
CREMA_val = []
CREMA_test = []

In [111]:
females_train = random.sample(female_list, 32)
males_train = random.sample(male_list, 32)

# remove the elements assigned to train 
for element in females_train:
    if element in female_list:
        female_list.remove(element)
        
for element in males_train:
    if element in male_list:
        male_list.remove(element)

         
females_val = random.sample(female_list, 6) 
males_val = random.sample(male_list, 6) 

# remove the elements assigned to val
for element in females_val:
    if element in female_list:
        female_list.remove(element)
        
for element in males_val:
    if element in male_list:
        male_list.remove(element)
        
females_test = random.sample(female_list, 6) 
males_test = random.sample(male_list, 6)        

In [112]:
females_train, males_train, females_val, males_val, females_test, males_test

(['54',
  '56',
  '58',
  '74',
  '76',
  '13',
  '78',
  '29',
  '84',
  '89',
  '09',
  '60',
  '04',
  '55',
  '52',
  '91',
  '02',
  '07',
  '46',
  '49',
  '37',
  '10',
  '20',
  '75',
  '21',
  '53',
  '06',
  '28',
  '18',
  '63',
  '30',
  '03'],
 ['57',
  '69',
  '65',
  '45',
  '77',
  '81',
  '41',
  '15',
  '44',
  '23',
  '59',
  '86',
  '34',
  '01',
  '85',
  '66',
  '31',
  '33',
  '05',
  '48',
  '50',
  '67',
  '51',
  '22',
  '36',
  '87',
  '71',
  '39',
  '42',
  '11',
  '32',
  '14'],
 ['43', '61', '40', '47', '73', '24'],
 ['62', '68', '64', '83', '70', '26'],
 ['08', '79', '12', '25', '72', '82'],
 ['16', '19', '38', '35', '27', '90'])

In [113]:
train = females_train + males_train 
val = females_val + males_val 
test = females_test + males_test

In [114]:
for index, row in CREMA_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if actor in train: 
        CREMA_train.append(row)
    elif actor in val: 
        CREMA_val.append(row)
    else:
        CREMA_test.append(row)

In [115]:
CREMA_train = pd.DataFrame(CREMA_train) 
CREMA_val = pd.DataFrame(CREMA_val) 
CREMA_test = pd.DataFrame(CREMA_test)

In [116]:
CREMA_train.shape, CREMA_val.shape, CREMA_test.shape

((320, 4), (60, 4), (60, 4))

In [117]:
CREMA_train.head()

Unnamed: 0,emotion_label,actors,gender,path
0,happy,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
1,sadness,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,angry,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
3,disgust,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,fear,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [118]:
CREMA_train = CREMA_train.drop(['actors'], 1)
CREMA_val = CREMA_val.drop(['actors'], 1)
CREMA_test = CREMA_test.drop(['actors'], 1)

In [None]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/pre-processed"
CRE_df.to_csv(os.path.join(preprocess_path,"RAV_df.csv"), index=False)

## Combine datasets

In [126]:
df_train = pd.concat([RAV_train, SAVEE_train, TESS_train, CREMA_train])

In [127]:
df_val = pd.concat([RAV_val, SAVEE_val, CREMA_val])

In [128]:
df_test = pd.concat([RAV_test, SAVEE_test, TESS_test, CREMA_test])

In [129]:
df_train.shape, df_val.shape, df_test.shape

((3160, 3), (300, 3), (1700, 3))

In [130]:
df_test.tail()

Unnamed: 0,emotion_label,gender,path
412,fear,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
413,happy,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
414,sadness,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
415,angry,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
416,disgust,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


# Exploratory Data Analysis 

We will do the EDA on all files, not only on the training set, so we are going to concatenate the datasets.

In [166]:
df = pd.concat([CREMA_df, RAV_df, SAVEE_df, TESS_df])

In [167]:
df.tail()

Unnamed: 0,emotion_label,actors,gender,path
5577,sadness,YAF,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
5579,sadness,YAF,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
5581,sadness,YAF,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
5583,sadness,YAF,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
5585,sadness,YAF,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


## Check labels

In [168]:
np.unique(CREMA_df['emotion_label'])

array(['angry', 'disgust', 'fear', 'happy', 'sadness'], dtype=object)

In [169]:
np.unique(RAV_df['emotion_label'])

array(['angry', 'calm', 'disgust', 'fear', 'happy', 'neutral', 'sadness',
       'surprise'], dtype=object)

In [170]:
np.unique(SAVEE_df['emotion_label'])

array(['angry', 'disgust', 'fear', 'happy', 'neutral', 'sadness',
       'surprise'], dtype=object)

In [171]:
np.unique(TESS_df['emotion_label'])

array(['angry', 'disgust', 'fear', 'happy', 'neutral', 'sadness',
       'surprise'], dtype=object)

## Check Bit-Depth of wav files

In [134]:
from soundfile import SoundFile

In [135]:
ravdess_bd = []
savee_bd = []
tess_bd = []
crema_bd = []


for index, row in df.iterrows(): 
    path = row['path']
    file = SoundFile(path)
    if 'ravdess' in path:
        ravdess_bd.append(file.subtype)
    elif 'savee' in path: 
        savee_bd.append(file.subtype)
    elif 'creamd' in path: 
        crema_bd.append(file.subtype) 
    else: 
        tess_bd.append(file.subtype)
         

In [136]:
# check that all the audio files of a dataset have the same bith depth 
for el in ravdess_bd: 
    if el != 'PCM_16':
        print('Diff')

In [137]:
for el in savee_bd: 
    if el != 'PCM_16':
        print('Diff')

In [138]:
for el in tess_bd: 
    if el != 'PCM_16':
        print('Diff')

In [139]:
for el in crema_bd: 
    if el != 'PCM_16':
        print('Diff')

## Check range of sample values per emotion - per dataset

In [172]:
ravdess_range_min = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
ravdess_range_max = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

savee_range_min = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
savee_range_max = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

tess_range_min = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
tess_range_max = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

crema_range_min = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
crema_range_max = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

In [173]:
ravdess_range_min

{'fear': [],
 'disgust': [],
 'neutral': [],
 'calm': [],
 'happy': [],
 'sadness': [],
 'surprise': [],
 'angry': []}

In [174]:
for index, row in tqdm(df.iterrows()): 
    path = row['path']
    label = row['emotion_label']
    data, samplerate = librosa.load(path, sr = 16000) 
    if 'ravdess' in path: 
        ravdess_range_min.get(label).append(np.min(data))
        ravdess_range_max.get(label).append(np.max(data))
    elif 'savee' in path: 
        savee_range_min.get(label).append(np.min(data))
        savee_range_max.get(label).append(np.max(data))
    elif 'tess' in path: 
        tess_range_min.get(label).append(np.min(data))
        tess_range_max.get(label).append(np.max(data))
    elif 'cream' in path: 
        crema_range_min.get(label).append(np.min(data))
        crema_range_max.get(label).append(np.max(data))
        

5160it [01:59, 43.00it/s]  


In [175]:
ravdess_range_min_stats = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
ravdess_range_max_stats = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

savee_range_min_stats = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
savee_range_max_stats = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

tess_range_min_stats = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
tess_range_max_stats = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

crema_range_min_stats = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
crema_range_max_stats = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

In [176]:
# min values
for k, v in ravdess_range_min.items():
    if ravdess_range_min_stats.get(k) == []: 
        ravdess_range_min_stats.get(k).append(np.mean(v))
        ravdess_range_min_stats.get(k).append(np.median(v))

for k, v in savee_range_min.items():
    if savee_range_min_stats.get(k) == []: 
        savee_range_min_stats.get(k).append(np.mean(v))
        savee_range_min_stats.get(k).append(np.median(v))

for k, v in tess_range_min.items():
    if tess_range_min_stats.get(k) == []: 
        tess_range_min_stats.get(k).append(np.mean(v))
        tess_range_min_stats.get(k).append(np.median(v))  

for k, v in crema_range_min.items():
    if crema_range_min_stats.get(k) == []: 
        crema_range_min_stats.get(k).append(np.mean(v))
        crema_range_min_stats.get(k).append(np.median(v))     

In [178]:
 ravdess_range_min_stats, tess_range_min_stats, savee_range_min_stats, crema_range_min_stats

({'fear': [-0.23780616, -0.14706421],
  'disgust': [-0.101773106, -0.08566284],
  'neutral': [-0.046873093, -0.038619995],
  'calm': [-0.035154145, -0.028579712],
  'happy': [-0.17604177, -0.13343811],
  'sadness': [-0.076462746, -0.053024292],
  'surprise': [-0.12169584, -0.10638428],
  'angry': [-0.4092703, -0.32962036]},
 {'fear': [-0.27757418, -0.24928084],
  'disgust': [-0.15876997, -0.15109253],
  'neutral': [-0.10046892, -0.07223511],
  'calm': [nan, nan],
  'happy': [-0.12787384, -0.122680664],
  'sadness': [-0.14802553, -0.1129303],
  'surprise': [-0.19243604, -0.18675232],
  'angry': [-0.44376713, -0.44312444]},
 {'fear': [-0.71591896, -0.88570786],
  'disgust': [-0.5699336, -0.6159462],
  'neutral': [-0.40062708, -0.3691684],
  'calm': [nan, nan],
  'happy': [-0.77316344, -1.0065743],
  'sadness': [-0.48110422, -0.49636734],
  'surprise': [-0.7670256, -1.0061314],
  'angry': [-0.78904533, -1.0099708]},
 {'fear': [-0.7101187, -0.7963562],
  'disgust': [-0.36877024, -0.2817230

In [179]:
# max values
for k, v in ravdess_range_max.items():
    if ravdess_range_max_stats.get(k) == []: 
        ravdess_range_max_stats.get(k).append(np.mean(v))
        ravdess_range_max_stats.get(k).append(np.median(v))

for k, v in savee_range_max.items():
    if savee_range_max_stats.get(k) == []: 
        savee_range_max_stats.get(k).append(np.mean(v))
        savee_range_max_stats.get(k).append(np.median(v))

for k, v in tess_range_max.items():
    if tess_range_max_stats.get(k) == []: 
        tess_range_max_stats.get(k).append(np.mean(v))
        tess_range_max_stats.get(k).append(np.median(v))     

for k, v in crema_range_max.items():
    if crema_range_max_stats.get(k) == []: 
        crema_range_max_stats.get(k).append(np.mean(v))
        crema_range_max_stats.get(k).append(np.median(v))   

In [180]:
 ravdess_range_max_stats, tess_range_max_stats, savee_range_max_stats, crema_range_max_stats

({'fear': [0.25650072, 0.16661072],
  'disgust': [0.10567633, 0.08670044],
  'neutral': [0.05240218, 0.042510986],
  'calm': [0.037156593, 0.028656006],
  'happy': [0.19236724, 0.1496582],
  'sadness': [0.081086, 0.05407715],
  'surprise': [0.12735271, 0.114746094],
  'angry': [0.45379272, 0.3430481]},
 {'fear': [0.24390072, 0.2087408],
  'disgust': [0.16192557, 0.15957642],
  'neutral': [0.088810176, 0.058654785],
  'calm': [nan, nan],
  'happy': [0.10222793, 0.09567261],
  'sadness': [0.10903225, 0.08784485],
  'surprise': [0.18419087, 0.17173767],
  'angry': [0.4117691, 0.40662274]},
 {'fear': [0.68867236, 0.8359611],
  'disgust': [0.5250933, 0.53767484],
  'neutral': [0.3384821, 0.31288826],
  'calm': [nan, nan],
  'happy': [0.7482513, 1.0047958],
  'sadness': [0.46011713, 0.4823631],
  'surprise': [0.74085814, 0.9891869],
  'angry': [0.78176016, 1.0073144]},
 {'fear': [0.70948863, 0.7923126],
  'disgust': [0.36619222, 0.27059937],
  'neutral': [nan, nan],
  'calm': [nan, nan],
  '

## Check Amplitude

In [215]:
ravdess_amp = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
savee_amp = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

tess_amp = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
crema_amp = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}


In [216]:
for index, row in tqdm(df.iterrows()): 
    path = row['path']
    label = row['emotion_label']
    data, samplerate = librosa.load(path, sr = 16000)
    X=librosa.stft(data)
    amplitude = librosa.amplitude_to_db(abs(X))
    if 'ravdess' in path: 
        ravdess_amp.get(label).append(np.median(amplitude))

    elif 'savee' in path: 
        savee_amp.get(label).append(np.median(amplitude))
    elif 'tess' in path: 
        tess_amp.get(label).append(np.median(amplitude))
    elif 'cream' in path: 
        crema_amp.get(label).append(np.median(amplitude))
    

5160it [02:32, 33.92it/s] 


In [217]:
ravdess_amp_median = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
savee_amp_median = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

tess_amp_median = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
crema_amp_median = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}


In [218]:
# median of amplitude per emotion and per dataset 
for k, v in ravdess_amp.items():
    if ravdess_amp_median.get(k) == []: 
        ravdess_amp_median.get(k).append(np.median(v))

for k, v in savee_amp.items():
    if savee_amp_median.get(k) == []: 
        savee_amp_median.get(k).append(np.median(v))


for k, v in tess_amp.items():
    if tess_amp_median.get(k) == []: 
        tess_amp_median.get(k).append(np.median(v))

for k, v in crema_amp.items():
    if crema_amp_median.get(k) == []: 
        crema_amp_median.get(k).append(np.median(v))


In [219]:
ravdess_amp_median, savee_amp_median, tess_amp_median, crema_amp_median

({'fear': [-46.64563],
  'disgust': [-51.789726],
  'neutral': [-61.547245],
  'calm': [-59.527428],
  'happy': [-51.2706],
  'sadness': [-56.886414],
  'surprise': [-54.039314],
  'angry': [-43.45335]},
 {'fear': [-30.685266],
  'disgust': [-37.014606],
  'neutral': [-42.397095],
  'calm': [nan],
  'happy': [-28.83998],
  'sadness': [-38.810497],
  'surprise': [-29.03577],
  'angry': [-29.30946]},
 {'fear': [-26.96304],
  'disgust': [-34.383503],
  'neutral': [-38.743904],
  'calm': [nan],
  'happy': [-35.066933],
  'sadness': [-39.51477],
  'surprise': [-29.666206],
  'angry': [-25.703892]},
 {'fear': [-27.407616],
  'disgust': [-32.23555],
  'neutral': [nan],
  'calm': [nan],
  'happy': [-27.47182],
  'sadness': [-36.953915],
  'surprise': [nan],
  'angry': [-16.908876]})

## Check the volume of wav files 

To check the volume we are going to compute the rms for each audio files. Later we will plot the distribution of the volume for each database. 
https://docs.python.org/3/library/audioop.html

In [210]:
ravdess_rms = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
savee_rms = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

tess_rms = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
crema_rms = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}


In [211]:
for index, row in df_train.iterrows(): 
    path = row['path']
    label = row['emotion_label']
    data, samplerate = librosa.load(path)
    
    if 'ravdess' in path:
        ravdess_rms.get(label).append(np.mean(librosa.feature.rms(data)))

    elif 'savee' in path: 
        savee_rms.get(label).append(np.mean(librosa.feature.rms(data)))
    elif 'creamd' in path: 
        crema_rms.get(label).append(np.mean(librosa.feature.rms(data))) 
    else: 
        tess_rms.get(label).append(np.mean(librosa.feature.rms(data)))         

In [212]:
ravdess_rms_median = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
savee_rms_median = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}

tess_rms_median = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}
crema_rms_median = {'fear':list(), 'disgust':list(), 'neutral':list(), 'calm':list(),  'happy':list(), 'sadness':list(), 'surprise':list(), 'angry':list()}


In [213]:
# median of amplitude per emotion and per dataset 
for k, v in ravdess_rms.items():
    if ravdess_rms_median.get(k) == []: 
        ravdess_rms_median.get(k).append(np.median(v))

for k, v in savee_rms.items():
    if savee_rms_median.get(k) == []: 
        savee_rms_median.get(k).append(np.median(v))


for k, v in tess_rms.items():
    if tess_rms_median.get(k) == []: 
        tess_rms_median.get(k).append(np.median(v))

for k, v in crema_rms.items():
    if crema_rms_median.get(k) == []: 
        crema_rms_median.get(k).append(np.median(v))


In [214]:
ravdess_rms_median, savee_rms_median, tess_rms_median, crema_rms_median

({'fear': [0.010353636],
  'disgust': [0.0050750943],
  'neutral': [0.0025113472],
  'calm': [0.0020973552],
  'happy': [0.009248671],
  'sadness': [0.0033962876],
  'surprise': [0.00671383],
  'angry': [0.018378183]},
 {'fear': [0.11257792],
  'disgust': [0.07859196],
  'neutral': [0.061942585],
  'calm': [nan],
  'happy': [0.1404942],
  'sadness': [0.07850291],
  'surprise': [0.1240446],
  'angry': [0.15217704]},
 {'fear': [0.049176022],
  'disgust': [0.020419043],
  'neutral': [0.027181733],
  'calm': [nan],
  'happy': [nan],
  'sadness': [0.034186486],
  'surprise': [0.029411208],
  'angry': [0.05952949]},
 {'fear': [0.054254543],
  'disgust': [0.021384936],
  'neutral': [nan],
  'calm': [nan],
  'happy': [0.044515457],
  'sadness': [0.008052027],
  'surprise': [nan],
  'angry': [0.123767376]})

## Check duration of files per dataset

In [269]:
durations_rav = []
durations_savee = []
durations_tess = []
durations_crema = []

In [270]:
from tqdm import tqdm_notebook
for path in tqdm_notebook(list(RAV_df.path)):
    samples, sr = librosa.load(path, res_type='kaiser_fast', sr=16000)
    durations_rav.append(librosa.get_duration(y = samples, sr = sr))

for path in tqdm_notebook(list(SAVEE_df.path)):
    samples, sr = librosa.load(path, res_type='kaiser_fast', sr=16000)
    durations_savee.append(librosa.get_duration(y = samples, sr = sr))

for path in tqdm_notebook(list(TESS_df.path)):
    samples, sr = librosa.load(path, res_type='kaiser_fast', sr=16000)
    durations_tess.append(librosa.get_duration(y = samples, sr = sr))

for path in tqdm_notebook(list(CREMA_df.path)):
    samples, sr= librosa.load(path, res_type='kaiser_fast', sr=16000)
    durations_crema.append(librosa.get_duration(y = samples, sr = sr))

  0%|          | 0/1440 [00:00<?, ?it/s]

  0%|          | 0/480 [00:00<?, ?it/s]

  0%|          | 0/2800 [00:00<?, ?it/s]

  0%|          | 0/440 [00:00<?, ?it/s]

In [273]:
# check outliers i.e audio files with duration < 3 seconds

count = 0 
for el in durations_rav: 
    if el < 3: 
        count += 1 
print("RAV outliers ", count)

count = 0 
for el in durations_savee: 
    if el < 3: 
        count += 1 
print("SAVEE outliers ", count)

count = 0 
for el in durations_tess: 
    if el < 3: 
        count += 1 
print("TESS outliers ", count)

count = 0 
for el in durations_crema: 
    if el < 3: 
        count += 1 
print("CREMA outliers ", count)

RAV outliers  2
SAVEE outliers  105
TESS outliers  2800
CREMA outliers  380


# Audio Normalization ffmpeg 

https://pypi.org/project/ffmpeg-normalize/#examples


In [222]:
!pip3 install ffmpeg-normalize 


[31mERROR: Invalid requirement: '/home/helemanc/Desktop/Binary_Model/normalized_audio/norm.wav'
Hint: It looks like a path. File '/home/helemanc/Desktop/Binary_Model/normalized_audio/norm.wav' does not exist.[0m


In [231]:
password = getpass.getpass()
command = "sudo -S apt-get install ffmpeg"
os.system('echo %s | %s' % (password, command))

 ··········


[sudo] password for helemanc: 

Reading package lists...
Building dependency tree...
Reading state information...
The following additional packages will be installed:
  i965-va-driver intel-media-va-driver libaacs0 libaom0 libass9 libavcodec58
  libavdevice58 libavfilter7 libavformat58 libavresample4 libavutil56
  libbdplus0 libbluray2 libbs2b0 libchromaprint1 libcodec2-0.9 libdc1394-22
  libfftw3-double3 libflite1 libgme0 libigdgmm11 liblilv-0-0 libmysofa1
  libnorm1 libopenmpt0 libpgm-5.2-0 libpostproc55 librubberband2 libsdl2-2.0-0
  libserd-0-0 libshine3 libsnappy1v5 libsord-0-0 libsratom-0-0 libssh-gcrypt-4
  libswresample3 libswscale5 libva-drm2 libva-x11-2 libva2 libvidstab1.1
  libx264-155 libx265-179 libxvidcore4 libzmq5 libzvbi-common libzvbi0
  mesa-va-drivers va-driver-all
Suggested packages:
  ffmpeg-doc i965-va-driver-shaders libbluray-bdj libfftw3-bin libfftw3-dev
  serdi sordi
The following NEW packages will be installed:
  ffmpeg i965-va-driver intel-media-va-driver libaacs0 libaom0 libass9
  libavco

256

In [251]:
import getpass
import os

password = getpass.getpass()
command = "sudo -S ffmpeg-normalize '/media/helemanc/OS/Users/i2CAT/Desktop/Datasets SER/ravdess-emotional-speech-audio/audio_speech_actors_01-24/Actor_01/03-01-07-01-02-02-01.wav' -o '/home/helemanc/Desktop/Binary_Model/normalized_audio/norm.wav' " #can be any command but don't forget -S as it enables input from stdin
os.system('echo %s | %s' % (password, command))

 ··········


[sudo] password for helemanc: sudo: ffmpeg-normalize: command not found


256

In [264]:
!ffmpeg-normalize '/home/helemanc/Desktop/Binary_Model/normalized_audio/norm_crema.wav' -o '/home/helemanc/Desktop/Binary_Model/normalized_audio/norm_crema_2.wav' -ar 16000



In [233]:
!git clone https://github.com/slhck/ffmpeg-normalize

Cloning into 'ffmpeg-normalize'...
remote: Enumerating objects: 1321, done.[K
remote: Counting objects: 100% (143/143), done.[K
remote: Compressing objects: 100% (83/83), done.[K
remote: Total 1321 (delta 79), reused 109 (delta 52), pack-reused 1178[K
Receiving objects: 100% (1321/1321), 2.12 MiB | 573.00 KiB/s, done.
Resolving deltas: 100% (762/762), done.


In [240]:
%cd ffmpeg-normalize

/home/helemanc/Desktop/Binary_Model/ffmpeg-normalize


In [241]:
!ls

CHANGELOG.md	  LICENSE      requirements.dev.txt  setup.py
DEVELOPERS.md	  MANIFEST.in  requirements.txt      test
ffmpeg_normalize  README.md    setup.cfg


In [242]:
!python3 setup.py install --user

running install
running bdist_egg
running egg_info
creating ffmpeg_normalize.egg-info
writing ffmpeg_normalize.egg-info/PKG-INFO
writing dependency_links to ffmpeg_normalize.egg-info/dependency_links.txt
writing entry points to ffmpeg_normalize.egg-info/entry_points.txt
writing requirements to ffmpeg_normalize.egg-info/requires.txt
writing top-level names to ffmpeg_normalize.egg-info/top_level.txt
writing manifest file 'ffmpeg_normalize.egg-info/SOURCES.txt'
reading manifest file 'ffmpeg_normalize.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
writing manifest file 'ffmpeg_normalize.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build
creating build/lib
creating build/lib/ffmpeg_normalize
copying ffmpeg_normalize/_ffmpeg_normalize.py -> build/lib/ffmpeg_normalize
copying ffmpeg_normalize/__init__.py -> build/lib/ffmpeg_normalize
copying ffmpeg_normalize/_cmd_utils.py -> build/lib/ffmpeg_normaliz

In [245]:
%cd ffmpeg-normalize

[Errno 20] Not a directory: 'ffmpeg-normalize'
/home/helemanc/.local/bin


In [247]:
%cd ..

/home/helemanc/.local


In [249]:
%cd ..

/home/helemanc


In [250]:
%cd '/home/helemanc/Desktop/Binary_Model/ffmpeg-normalize'

/home/helemanc/Desktop/Binary_Model/ffmpeg-normalize


In [258]:
CREMA_df.path[0]

'/media/helemanc/OS/Users/i2CAT/Desktop/Datasets SER/creamd/AudioWAV/1091_IEO_HAP_HI.wav'

In [262]:
!pip3 install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [263]:
from pydub import AudioSegment

pad_ms = 1000  # milliseconds of silence needed
silence = AudioSegment.silent(duration=pad_ms)
audio = AudioSegment.from_wav(CREMA_df.path[0])

padded = audio + silence  # Adding silence after the audio
padded.export('/home/helemanc/Desktop/Binary_Model/normalized_audio/norm_crema.wav', format='wav')

<_io.BufferedRandom name='/home/helemanc/Desktop/Binary_Model/normalized_audio/norm_crema.wav'>