# Configuration

NOTES: The warnings after the import are referred to the fact that Tensorflow 2.x versions are built to directly look for a GPU in the system. The warning can be forgot if you are not going to use the GPU. 

In [11]:
!source myenv/bin/activate

In [12]:
import os
import librosa
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
sns.set_style('whitegrid')
import IPython.display as ipd
import librosa.display
import numpy as np
import pickle
import scipy
import ipywidgets
import math

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold, StratifiedKFold


from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Conv2D, AveragePooling1D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD, Adam 
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import regularizers

# from livelossplot import PlotLossesKeras
tf.config.list_physical_devices('GPU')

[]

# Utils

# Compute dataframes for datasets and split in Train, Val, Test 

In [13]:
main_path = '/media/helemanc/OS/Users/i2CAT/Desktop/Datasets SER/'
TESS = os.path.join(main_path, "tess/TESS Toronto emotional speech set data/") 
RAV = os.path.join(main_path, "ravdess-emotional-speech-audio/audio_speech_actors_01-24")
SAVEE = os.path.join(main_path, "savee/ALL/")
CREMA = os.path.join(main_path, "creamd/AudioWAV/")

In [14]:
dir_list = os.listdir(TESS)
dir_list.sort()
dir_list

path = []
emotion = []
gender = []
actors = []

for i in dir_list:
    fname = os.listdir(TESS + i)
    for f in fname:
        if i == 'OAF_angry':
            emotion.append('angry')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_angry': 
            emotion.append('angry')
            gender.append('female')
            actors.append('YAF')
            
            
        elif i == 'OAF_disgust' :
            emotion.append('disgust')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_disgust': 
            emotion.append('disgust')
            gender.append('female')
            actors.append('YAF')
            
            
        elif i == 'OAF_Fear':
            emotion.append('fear')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_fear': 
            emotion.append('fear')
            gender.append('female')
            actors.append('YAF') 
            
            
        elif i == 'OAF_happy' :
            emotion.append('happy')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_happy': 
            emotion.append('angry')
            gender.append('female')
            actors.append('YAF')            
            
        elif i == 'OAF_neutral':
            emotion.append('neutral')
            gender.append('female')
            actors.append('OAF')   
        elif i == 'YAF_neutral': 
            emotion.append('neutral')
            gender.append('female')
            actors.append('YAF')      
            
                
        elif i == 'OAF_Pleasant_surprise':
            emotion.append('surprise')
            gender.append('female')
            actors.append('OAF')
        
        elif i == 'YAF_pleasant_surprised': 
            emotion.append('surprise')
            gender.append('female')
            actors.append('YAF')            
            
        elif i == 'OAF_Sad':
            emotion.append('sadness')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_sad': 
            emotion.append('sadness')
            gender.append('female')
            actors.append('YAF')            
        else:
            emotion.append('Unknown')
        path.append(TESS + i + "/" + f)

TESS_df = pd.DataFrame(emotion, columns = ['emotion_label'])
TESS_df = pd.concat([TESS_df, pd.DataFrame(gender, columns = ['gender']), 
                     pd.DataFrame(actors, columns= ['actors']),
                     pd.DataFrame(path, columns = ['path'])],axis=1)
TESS_df.emotion_label.value_counts()

angry       1200
fear         800
surprise     800
sadness      800
disgust      800
neutral      800
happy        400
Name: emotion_label, dtype: int64

In [15]:
TESS_df= TESS_df[~TESS_df.path.str.contains('noise')]

In [16]:
TESS_train = []
TESS_test = []

In [17]:
for index, row in TESS_df.iterrows(): 
    if row['actors'] == 'YAF': 
        TESS_train.append(row)
    else: 
        TESS_test.append(row)
len(TESS_train), len(TESS_test)

(1400, 1400)

In [18]:
df_train = pd.DataFrame(TESS_train)
df_test = pd.DataFrame(TESS_test)
df_val = []

# Create Noise Files

In [22]:
from pydub import AudioSegment
import random 
from pydub.utils import make_chunks


def create_noise_files(df_train, df_val, df_test): 
    
    '''
    Apply noise only on training files, so double the number of training files and keep 
    validation and test the same
    '''
    path_noise_sound_1 = '/home/helemanc/Desktop/Binary_Model/noise_sounds/freight_train.wav'
    path_noise_sound_2 = '/home/helemanc/Desktop/Binary_Model/noise_sounds/inside_train.wav'
    path_noise_sound_3 = '/home/helemanc/Desktop/Binary_Model/noise_sounds/small_crowd.wav'
    
    path_noise_dataset_train = '/home/helemanc/Desktop/Binary_Model/noise_datasets/tess/train'
    #path_noise_dataset_val = '/home/helemanc/Desktop/Binary_Model/noise_datasets/ravdess/val'
    #path_noise_dataset_test = '/home/helemanc/Desktop/Binary_Model/noise_datasets/ravdess/test'
    

    #df_list = [df_train, df_val, df_test]
    #count_df = 0 
    
    train_emotions = []
    train_genders = []
    train_paths = []
    
    #val_emotions = []
    #val_genders = []
    #val_paths = []
    
    #test_emotions = []
    #test_genders = []
    #test_paths = []
    
    #for df in df_list: 
        
    for index, row in tqdm(df_train.iterrows()): 
        path = row['path']
        sound1 = AudioSegment.from_file(path)
        samples, sr = librosa.load(path, res_type='kaiser_fast', sr=16000)
        duration = librosa.get_duration(y = samples, sr = sr)

        # pick a noise sound file randomly 
        noise_list = [path_noise_sound_1, path_noise_sound_2, path_noise_sound_3]
        random_noise = random.choice(noise_list) 

        lower_volume = 0 

        # adjust volume to not cover the voice of the audio file 
        # warning: different levels of dB need to be calibrate for each dataset 
        '''
        if random_noise == path_noise_sound_1: 
            lower_volume = 40
        elif random_noise == path_noise_sound_2: 
            lower_volume = 25 
        else: 
            lower_volume = 40
        '''

        # other strategy: 
        # compute db of both files, compute the difference, and lower the volume of the file to make it 
        # a bit lower than the original file -almost equal- 

        sound2 = AudioSegment.from_file(random_noise)

        # make chunks of duration equal to the audio file 
        chunk_length_ms = duration*1000 #ms
        chunks = make_chunks(sound2, chunk_length_ms) 

        # pick a random chunk 
        random_chunk = random.choice(chunks)
        difference = random_chunk.dBFS - sound1.dBFS

        abs_difference = abs(difference)

        lower = random_chunk - abs_difference - 2

        # lower the volume of the noise file to be overlayed with the voice_sound 
        #lower = random_chunk - lower_volume

        combined = sound1.overlay(lower)

        parts = path.split('/')
        fname = parts[-1]
        
        new_path = path_noise_dataset_train + '/' + fname 

        train_emotions.append(row['emotion_label'])
        train_genders.append(row['gender'])
        train_paths.append(new_path)

        '''
        if count_df == 0: 
            new_path = path_noise_dataset_train + '/' + fname 

            train_emotions.append(row['emotion_label'])
            train_genders.append(row['gender'])
            train_paths.append(new_path)

        elif count_df == 1: 
            new_path = path_noise_dataset_val + '/' + fname

            val_emotions.append(row['emotion_label'])
            val_genders.append(row['gender'])
            val_paths.append(new_path)

        elif count_df == 2:
            new_path = path_noise_dataset_test + '/' + fname          

            test_emotions.append(row['emotion_label'])
            test_genders.append(row['gender'])
            test_paths.append(new_path)
        '''
        combined.export(new_path, format= 'wav')

    #count_df +=1

    df_train_noise = pd.DataFrame([train_emotions, train_genders, train_paths]).T
    df_train_noise.columns = ['emotion_label', 'gender', 'path']
    
    #df_val_noise = pd.DataFrame([val_emotions, val_genders, val_paths]).T
    #df_val_noise.columns = ['emotion_label', 'gender', 'path']
    
    #df_test_noise = pd.DataFrame([test_emotions, test_genders, test_paths]).T
    #df_test_noise.columns = ['emotion_label', 'gender', 'path']

    df_train_combined = pd.concat([df_train, df_train_noise])
    df_train_combined.reset_index(drop=True, inplace=True)
    
    #df_val_combined = pd.concat([df_val, df_val_noise])
    #df_val_combined.reset_index(drop=True, inplace=True)
                                   
    #df_test_combined = pd.concat([df_test, df_test_noise])
    #df_test_combined.reset_index(drop=True, inplace=True)
    
    return df_train_combined, df_val, df_test
# have to save df 

In [23]:
new_df_train, new_df_val, new_df_test = create_noise_files(df_train, df_val, df_test)

1400it [00:20, 67.34it/s]


In [24]:
new_df_train.shape, new_df_test.shape

((2800, 4), (1400, 4))

## Save dataframes

In [25]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/tess"

In [26]:
new_df_train.to_csv(os.path.join(preprocess_path,"df_train.csv"), index=False)

In [27]:
new_df_test.to_csv(os.path.join(preprocess_path,"df_test.csv"), index=False)