# Configuration

NOTES: The warnings after the import are referred to the fact that Tensorflow 2.x versions are built to directly look for a GPU in the system. The warning can be forgot if you are not going to use the GPU. 

In [1]:
!source myenv/bin/activate

In [2]:
import os
import librosa
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
sns.set_style('whitegrid')
import IPython.display as ipd
import librosa.display
import numpy as np
import pickle
import scipy
import ipywidgets
import math

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold, StratifiedKFold


from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Conv2D, AveragePooling1D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD, Adam 
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import regularizers

# from livelossplot import PlotLossesKeras
tf.config.list_physical_devices('GPU')

2021-09-10 10:04:10.810835: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 10:04:10.810858: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 10:04:11.672942: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-09-10 10:04:11.673505: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-09-10 10:04:11.741795: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2021-09-10 10:04:11.741818: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (helemanc-Latitude-5410): /pr

[]

# Utils

# Compute dataframes for datasets and split in Train, Val, Test 

In [4]:
main_path = '/media/helemanc/OS/Users/i2CAT/Desktop/Datasets SER/'
TESS = os.path.join(main_path, "tess/TESS Toronto emotional speech set data/") 
RAV = os.path.join(main_path, "ravdess-emotional-speech-audio/audio_speech_actors_01-24")
SAVEE = os.path.join(main_path, "savee/ALL/")
CREMA = os.path.join(main_path, "creamd/AudioWAV/")

In [5]:
# Get the data location for SAVEE
dir_list = os.listdir(SAVEE)

# parse the filename to get the emotions
emotion=[]
path = []
actors = []
gender = []
for i in dir_list:
    actors.append(i[:2])
    if i[-8:-6]=='_a':
        emotion.append('angry')
        gender.append('male')
    elif i[-8:-6]=='_d':
        emotion.append('disgust')
        gender.append('male')
    elif i[-8:-6]=='_f':
        emotion.append('fear')
        gender.append('male')
    elif i[-8:-6]=='_h':
        emotion.append('happy')
        gender.append('male')
    elif i[-8:-6]=='_n':
        emotion.append('neutral')
        gender.append('male')
    elif i[-8:-6]=='sa':
        emotion.append('sadness')
        gender.append('male')
    elif i[-8:-6]=='su':
        emotion.append('surprise')
        gender.append('male') 
    else:
        emotion.append('Unknown') 
    path.append(SAVEE + i)
    
# Now check out the label count distribution 
SAVEE_df = pd.DataFrame(emotion, columns = ['emotion_label'])
                      
SAVEE_df = pd.concat([SAVEE_df,
                      pd.DataFrame(actors, columns = ['actors']),
                      pd.DataFrame(gender, columns = ['gender']), 
                      pd.DataFrame(path, columns = ['path'])], axis = 1)
SAVEE_df.emotion_label.value_counts()

neutral     120
sadness      60
surprise     60
happy        60
disgust      60
fear         60
angry        60
Name: emotion_label, dtype: int64

In [6]:
SAVEE_df.head()

Unnamed: 0,emotion_label,actors,gender,path
0,neutral,DC,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
1,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
3,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [7]:
SAVEE_train = []
SAVEE_val = []
SAVEE_test = []

In [8]:
#DC, JE, JK, KL
for index, row in SAVEE_df.iterrows(): 
    if row['actors'] == 'DC' or row ['actors'] == 'JE':
        SAVEE_train.append(row)
    elif row['actors'] == 'JK': 
        SAVEE_val.append(row)
    else: 
        SAVEE_test.append(row)
len(SAVEE_train), len(SAVEE_val), len(SAVEE_test)

(240, 120, 120)

In [9]:
SAVEE_train = pd.DataFrame(SAVEE_train)
SAVEE_val = pd.DataFrame(SAVEE_val)
SAVEE_test = pd.DataFrame(SAVEE_test)

In [10]:
SAVEE_train = SAVEE_train.drop(['actors'], 1)
SAVEE_val = SAVEE_val.drop(['actors'], 1)
SAVEE_test = SAVEE_test.drop(['actors'], 1)

In [11]:
SAVEE_train = SAVEE_train.reset_index(drop=True) 
SAVEE_val = SAVEE_val.reset_index(drop=True) 
SAVEE_test = SAVEE_test.reset_index(drop=True) 

In [12]:
df_train = SAVEE_train 
df_val = SAVEE_val 
df_test = SAVEE_test

# Create Noise Files

In [15]:
from pydub import AudioSegment
import random 
from pydub.utils import make_chunks


def create_noise_files(df_train, df_val, df_test): 
    
    '''
    Apply noise only on training files, so double the number of training files and keep 
    validation and test the same
    '''
    path_noise_sound_1 = '/home/helemanc/Desktop/Binary_Model/noise_sounds/freight_train.wav'
    path_noise_sound_2 = '/home/helemanc/Desktop/Binary_Model/noise_sounds/inside_train.wav'
    path_noise_sound_3 = '/home/helemanc/Desktop/Binary_Model/noise_sounds/small_crowd.wav'
    
    path_noise_dataset_train = '/home/helemanc/Desktop/Binary_Model/noise_datasets/savee/train'
    #path_noise_dataset_val = '/home/helemanc/Desktop/Binary_Model/noise_datasets/ravdess/val'
    #path_noise_dataset_test = '/home/helemanc/Desktop/Binary_Model/noise_datasets/ravdess/test'
    

    #df_list = [df_train, df_val, df_test]
    #count_df = 0 
    
    train_emotions = []
    train_genders = []
    train_paths = []
    
    #val_emotions = []
    #val_genders = []
    #val_paths = []
    
    #test_emotions = []
    #test_genders = []
    #test_paths = []
    
    #for df in df_list: 
        
    for index, row in tqdm(df_train.iterrows()): 
        path = row['path']
        sound1 = AudioSegment.from_file(path)
        samples, sr = librosa.load(path, res_type='kaiser_fast', sr=16000)
        duration = librosa.get_duration(y = samples, sr = sr)

        # pick a noise sound file randomly 
        noise_list = [path_noise_sound_1, path_noise_sound_2, path_noise_sound_3]
        random_noise = random.choice(noise_list) 

        lower_volume = 0 

        # adjust volume to not cover the voice of the audio file 
        # warning: different levels of dB need to be calibrate for each dataset 
        '''
        if random_noise == path_noise_sound_1: 
            lower_volume = 40
        elif random_noise == path_noise_sound_2: 
            lower_volume = 25 
        else: 
            lower_volume = 40
        '''

        # other strategy: 
        # compute db of both files, compute the difference, and lower the volume of the file to make it 
        # a bit lower than the original file -almost equal- 

        sound2 = AudioSegment.from_file(random_noise)

        # make chunks of duration equal to the audio file 
        chunk_length_ms = duration*1000 #ms
        chunks = make_chunks(sound2, chunk_length_ms) 

        # pick a random chunk 
        random_chunk = random.choice(chunks)
        difference = random_chunk.dBFS - sound1.dBFS

        abs_difference = abs(difference)

        lower = random_chunk - abs_difference - 2

        # lower the volume of the noise file to be overlayed with the voice_sound 
        #lower = random_chunk - lower_volume

        combined = sound1.overlay(lower)

        parts = path.split('/')
        fname = parts[-1]
        
        new_path = path_noise_dataset_train + '/' + fname 

        train_emotions.append(row['emotion_label'])
        train_genders.append(row['gender'])
        train_paths.append(new_path)

        '''
        if count_df == 0: 
            new_path = path_noise_dataset_train + '/' + fname 

            train_emotions.append(row['emotion_label'])
            train_genders.append(row['gender'])
            train_paths.append(new_path)

        elif count_df == 1: 
            new_path = path_noise_dataset_val + '/' + fname

            val_emotions.append(row['emotion_label'])
            val_genders.append(row['gender'])
            val_paths.append(new_path)

        elif count_df == 2:
            new_path = path_noise_dataset_test + '/' + fname          

            test_emotions.append(row['emotion_label'])
            test_genders.append(row['gender'])
            test_paths.append(new_path)
        '''
        combined.export(new_path, format= 'wav')

    #count_df +=1

    df_train_noise = pd.DataFrame([train_emotions, train_genders, train_paths]).T
    df_train_noise.columns = ['emotion_label', 'gender', 'path']
    
    #df_val_noise = pd.DataFrame([val_emotions, val_genders, val_paths]).T
    #df_val_noise.columns = ['emotion_label', 'gender', 'path']
    
    #df_test_noise = pd.DataFrame([test_emotions, test_genders, test_paths]).T
    #df_test_noise.columns = ['emotion_label', 'gender', 'path']

    df_train_combined = pd.concat([df_train, df_train_noise])
    df_train_combined.reset_index(drop=True, inplace=True)
    
    #df_val_combined = pd.concat([df_val, df_val_noise])
    #df_val_combined.reset_index(drop=True, inplace=True)
                                   
    #df_test_combined = pd.concat([df_test, df_test_noise])
    #df_test_combined.reset_index(drop=True, inplace=True)
    
    return df_train_combined, df_val, df_test
# have to save df 

In [16]:
new_df_train, new_df_val, new_df_test = create_noise_files(df_train, df_val, df_test)

240it [00:09, 26.58it/s]


In [17]:
new_df_train.shape, new_df_val.shape, new_df_test.shape

((480, 3), (120, 3), (120, 3))

## Save dataframes

In [18]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/savee"

In [20]:
new_df_train.to_csv(os.path.join(preprocess_path,"df_train.csv"), index=False)

In [21]:
new_df_val.to_csv(os.path.join(preprocess_path,"df_val.csv"), index=False)

In [22]:
new_df_test.to_csv(os.path.join(preprocess_path,"df_test.csv"), index=False)