In [1]:
import numpy as np
import pandas as pd
import soundfile as sf
import librosa
from scipy.signal import spectrogram
import os
import math
import matplotlib.pyplot as plt
import random
import re
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Dropout, Activation, Convolution2D, MaxPooling2D, UpSampling2D
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import regularizers

Using TensorFlow backend.


In [2]:
class RAVDESS:
    
    def __init__(self, frequency):
        self.frequency = frequency
        self.separator = "-"
        self.audio_only = "03"
        self.voice_channel = "01"
        self.emotions = {
            "01": "neutral", 
            "02": "calm", 
            "03": "happy", 
            "04": "sad", 
            "05": "angry", 
            "06": "fearful", 
            "07": "disgust", 
            "08": "surprised"
        }
        self.emotion_intensities = {"01": "normal", "02": "strong"}
        self.statements = {"01": "Kids_are_talking_by_the_door", "02": "Dogs_are_sitting_by_the_door"}
        self.repetitions = ["01", "02"]
        self.actors = {
            "01": "male", 
            "02": "female", 
            "03": "male", 
            "04": "female", 
            "05": "male", 
            "06": "female", 
            "07": "male", 
            "08": "female", 
            "09": "male", 
            "10": "female", 
            "11": "male", 
            "12": "female", 
            "13": "male", 
            "14": "female", 
            "15": "male", 
            "16": "female", 
            "17": "male", 
            "18": "female", 
            "19": "male", 
            "20": "female", 
            "21": "male", 
            "22": "female", 
            "23": "male",
            "24": "male",
        }
        self.shown_specs = 0
        self.data_path = 'data/RAVDESS/'
        return

    
    def get_audios_paths(self):
        actors = os.listdir("data/RAVDESS")
        actors.remove('README.md')
        actors.remove('spectograms')
        file_paths = []
        for i in actors:
            file_names = os.listdir("data/RAVDESS/" + i)
            file_names = ["data/RAVDESS/" + i + '/' + k for k in file_names]
            file_paths.extend(file_names)
        return file_paths
    
    
    def get_spectograms_paths(self):
        return os.listdir("data/RAVDESS/spectograms/")
        
    
    def trim_audios(self):
        print('Trim files')
        files = self.get_audios_paths()
        
        for file in files:
            numbers = re.findall(r'\d+', file) # list of numbers
            if len(numbers) == 8 and file[:-4] + '-00.wav' not in files:  # original file and not trimmed
                audio_data, samplerate = librosa.load(file, sr=self.frequency) # load and convert the audio
                audio_data_filtered, index = librosa.effects.trim(audio_data, top_db=30, frame_length=2048, hop_length=512)
                converted_file_path = file[:-4] + '-00.wav'
                print('Trim: ' + file)
                librosa.output.write_wav(converted_file_path, audio_data_filtered, samplerate) 

    
    # Add noise to the audio to create more samples
    def create_audio_with_noise(self, data, number_of_noisy_audios):
        mean, std = data.mean(), data.std()
        noised_data = []

        # Add noise based on the standard deviation
        for i in range(number_of_noisy_audios):
            noise = np.random.normal(mean, std*random.uniform(0.1, 0.3), data.shape) 
            noised_data.append(data + noise)
        return noised_data
    
    
    def is_noisy_data(self, files, file, number_of_noisy_audios):
        # if there is not as much noisy data as it should be, it returns True
        without_extension = file[:-6]
        asd = [i for i in files if i[:len(without_extension)] == without_extension]
        return len([i for i in files if i[:len(without_extension)] == without_extension]) != number_of_noisy_audios + 1

    
    def craete_save_noised_audio(self, number_of_noisy_audios):
        print('Noise audios')
        files = self.get_audios_paths()
        
        for file in files:
            numbers = re.findall(r'\d+', file) # list of numbers
            if len(numbers) == 9 and numbers[-1] == '00'  and self.is_noisy_data(files, file, number_of_noisy_audios):  # Trimmed audios
                audio_data, samplerate = sf.read(file) # load the audio
                noised_data = self.create_audio_with_noise(audio_data, number_of_noisy_audios)
                for i in range(0, len(noised_data)):
                    new_file_name = file[:-7] + '-' + "{:02d}".format(i+1) + '.wav'
                    print('Add noisy audio: ' + new_file_name)
                    sf.write(new_file_name, noised_data[i], samplerate)
                    
                      
    def create_spectograms(self,):
        print('Create spectograms')
        files = self.get_audios_paths()
        specograms_paths = self.get_spectograms_paths()
        for file in files:
            numbers = re.findall(r'\d+', file) # list of numbers 
            if len(numbers) == 9 and file[file.rfind('/')+1:-4] + '.png' not in specograms_paths:
                audio_data, samplerate = sf.read(file)
                window_lenght = samplerate * 1
                window_offset = math.floor(samplerate * 0.1)
                window_start = 0
                split_audio_data = audio_data[window_start : window_start + window_lenght]
                nfft = 128
                fig,ax = plt.subplots(1)
                fig.subplots_adjust(left=0,right=1,bottom=0,top=1)
                spect, f, t, image = plt.specgram(split_audio_data, NFFT=nfft, Fs=samplerate, noverlap=nfft/2)
                plt.axis('off')
                print('Save image: ' + 'data/RAVDESS/spectograms' + file[file.rfind('/'):-4] + '.png')
                fig.savefig('data/RAVDESS/spectograms' + file[file.rfind('/'):-4] + '.png' , dpi=300)
                plt.close()           
    
    
    def name_to_data(self, file):
        numbers = re.findall(r'\d+', file)
        return [
            self.emotions[numbers[2]],
            self.emotion_intensities[numbers[3]],
            self.statements[numbers[4]],
            self.repetitions[int(numbers[5])-1],
            numbers[6], 
            self.actors[numbers[6]], 
            self.data_path + 'spectograms/' + file
        ]
        
    
    # Creates the Data Frame for the RAVDESS
    def create_dataframe(self):
        print('Create dataframe')
        data = []
        specograms_paths = self.get_spectograms_paths()
        for spectogram in specograms_paths:
            data.append(self.name_to_data(spectogram))

        df = pd.DataFrame(  # contruct DataFrame from data
            { 
                'emotion'           : pd.Categorical([row[0] for row in data]),
                'emotion_intensity' : pd.Categorical([row[1] for row in data]),
                'statement'         : pd.Categorical([row[2] for row in data]),
                'repetition'        : pd.Categorical([row[3] for row in data]),
                'actor'             : pd.Categorical([row[4] for row in data]),
                'actor_gender'      : pd.Categorical([row[5] for row in data]),
                'image'             : pd.Categorical([row[6] for row in data])
            })

        # one-hot encode columns
        df = pd.get_dummies(df, columns=["emotion", "emotion_intensity", "statement", "repetition", "actor", "actor_gender"])
        return df

In [3]:
frequency = 22000
ravdess = RAVDESS(frequency)

# STEP 1: Trim audios
ravdess.trim_audios()

Trim files


In [4]:
# STEP 2: Create noisy audios
ravdess.craete_save_noised_audio(number_of_noisy_audios=2)

Noise audios


In [5]:
# STEP 3: Create spectograms
ravdess.create_spectograms()

Create spectograms


In [6]:
# STEP 4: Create dataframe
df = ravdess.create_dataframe()
df.head()

Create dataframe


Unnamed: 0,image,emotion_angry,emotion_calm,emotion_disgust,emotion_fearful,emotion_happy,emotion_neutral,emotion_sad,emotion_surprised,emotion_intensity_normal,...,actor_17,actor_18,actor_19,actor_20,actor_21,actor_22,actor_23,actor_24,actor_gender_female,actor_gender_male
0,data/RAVDESS/spectograms/03-01-01-01-01-01-01-...,0,0,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,1
1,data/RAVDESS/spectograms/03-01-01-01-01-01-01-...,0,0,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,1
2,data/RAVDESS/spectograms/03-01-01-01-01-01-01-...,0,0,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,1
3,data/RAVDESS/spectograms/03-01-01-01-01-01-02-...,0,0,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,1,0
4,data/RAVDESS/spectograms/03-01-01-01-01-01-02-...,0,0,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,1,0
