In [7]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
from package.Data import Dataset, Audio
import os
from tqdm import tqdm as tqdm
import pandas as pd

In [9]:
import numpy as np
from scipy.stats import kurtosis, skew
from package.Kernels import DWT

dwt = DWT(wavelets =  ['db1','db6','db8','db10'], levels =  10, featureFunctions = 
    {
        'minabs' : lambda x: max(abs(x)),
        'min' : min,
        'minabs': lambda x: min(abs(x)),
        'std': np.std,
        'stdabs':  lambda x: np.std(abs(x)),
        'mean': np.mean,
        'meanabs' : lambda x: np.mean(abs(x)) ,
        'median' : np.median ,
        'medianabs' : lambda x: np.median(abs(x)),
        'kurt' : kurtosis,
        'kurtabs' : lambda x: kurtosis(abs(x)),
        'skew' : skew,
        'skewabs' : lambda x: skew(abs(x)),
        'zcr' : lambda x: (np.diff(np.sign(x)) != 0).sum() - (x == 0).sum(),
        'energy' : lambda x: np.sum(x.astype(float)**2),
    }
    )

In [4]:
# Acted Emotional Speech Dynamic Database
audioPaths = []
for emotion in ['fear', 'sadness', 'happiness', 'anger', 'disgust']:
    path = 'SERDatasets.nosync/Acted Emotional Speech Dynamic Database'
    for filename in os.listdir(f'{path}/{emotion}'):
        if filename.endswith('.wav'):
            audioPaths.append((emotion, f'{path}/{emotion}/{filename}'))   

# Reading WAV files
features = []
labels = []
for emotion, path in tqdm(audioPaths):
    rawAudio = Audio(path)
    rawAudio.resample(2**14)
    windowedAudios = rawAudio.window(2**14, overlap = 0.6)
    features += [dwt.decompose(audio) for audio in windowedAudios]
    labels += [emotion]* len(windowedAudios)

dataset = Dataset(pd.DataFrame(features, columns = dwt.featureNames()), y= labels)
dataset.save(path.split('/')[1])

  s = s**2
  s *= a_zero_mean
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  wavfile.read(filename)
100%|██████████| 604/604 [04:42<00:00,  2.14it/s]


In [5]:
# CaFE_48k
audioPaths = []
for emotion in ['Coläre/Faible', 'Coläre/Fort', 'DÇgoñt/Faible', 'DÇgoñt/Fort',  'Joie/Faible', 
    'Joie/Fort',  'Neutre',   'Peur/Faible', 'Peur/Fort', 'Surprise/Faible', 
    'Surprise/Fort', 'Tristesse/Faible', 'Tristesse/Fort']:
    path = 'SERDatasets.nosync/CaFE_48k'
    for filename in os.listdir(f'{path}/{emotion}'):
        if filename.endswith('.wav'):
            audioPaths.append((emotion.split('/')[0], f'{path}/{emotion}/{filename}'))   

# Reading WAV files
features = []
labels = []
for emotion, path in tqdm(audioPaths):
    rawAudio = Audio(path)
    rawAudio.resample(2**14)
    windowedAudios = rawAudio.window(2**14, overlap = 0.6)
    features += [dwt.decompose(audio) for audio in windowedAudios]
    labels += [emotion]* len(windowedAudios)

dataset = Dataset(pd.DataFrame(features, columns = dwt.featureNames()), y= labels)
dataset.save(path.split('/')[1])

100%|██████████| 936/936 [09:30<00:00,  1.64it/s]


In [19]:
# emoSynth-DB
path = 'SERDatasets.nosync/emoSynth-DB/all_data'
labels_csv = pd.read_csv(f'{path}/audio_labels.csv', index_col=1)

features = []
labels = []
for filename in tqdm(os.listdir(path+'/wavs')):
    rawAudio = Audio(f'{path}/wavs/{filename}')
    rawAudio.resample(2**14)
    windowedAudios = rawAudio.window(2**14, overlap = 0.6)
    features += [dwt.decompose(audio) for audio in windowedAudios]
    labels += [[labels_csv.loc[filename]['valence'],labels_csv.loc[filename]['arousal']]]*len(windowedAudios)

dataset = Dataset(pd.DataFrame(features, columns = dwt.featureNames()), y= pd.DataFrame(labels, columns=['valence', 'arousal']))
dataset.save(path.split('/')[1])

100%|██████████| 168/168 [02:37<00:00,  1.07it/s]


In [23]:
# Emotioanl Speech Database (ESD)
path = 'SERDatasets.nosync/Emotional Speech Dataset (ESD)'
audioPaths = []
for speaker in os.listdir(path):
    if not speaker.startswith('00'):
        continue
    for emotion in ['Angry', 'Happy', 'Neutral', 'Sad', 'Surprise']:
        for set_ in ['train', 'test','evaluation']:
            for filename in os.listdir(f'{path}/{speaker}/{emotion}/{set_}'):
                if filename.endswith('.wav'):
                    audioPaths.append((emotion, f'{path}/{speaker}/{emotion}/{set_}/{filename}'))

features = []
labels = []
for emotion, path in tqdm(audioPaths):
    rawAudio = Audio(path)
    rawAudio.resample(2**14)
    windowedAudios = rawAudio.window(2**14, overlap = 0.6)
    features += [dwt.decompose(audio) for audio in windowedAudios]
    labels += [emotion]* len(windowedAudios)

#Save features and labels with index number
np.savetxt('.dataset/Emotional Speech Dataset (ESD)/features.csv', np.array(features), delimiter=',')
pd.DataFrame(labels).to_csv('.dataset/Emotional Speech Dataset (ESD)/labels.csv')

dataset = Dataset(pd.DataFrame(features, columns = dwt.featureNames()), y= labels)
dataset.save(path.split('/')[1])

  wavfile.read(filename)
100%|██████████| 35000/35000 [2:49:40<00:00,  3.44it/s]  


In [10]:
# EMOVO
path = 'SERDatasets.nosync/EMOVO'
audioPaths = []
for folder in os.listdir(path):
    if folder == 'documents':
        continue
    for filename in os.listdir(f'{path}/{folder}'):
        if filename.endswith('.wav'):
            emotion = filename[:3]
            print(emotion)
            audioPaths.append((emotion, f'{path}/{folder}/{filename}'))

rab
dis
rab
rab
dis
sor
sor
rab
dis
pau
gio
gio
dis
dis
sor
dis
neu
pau
gio
tri
pau
dis
dis
sor
sor
sor
dis
neu
gio
tri
dis
sor
sor
sor
neu
pau
tri
tri
neu
sor
dis
gio
pau
gio
pau
gio
pau
dis
rab
dis
neu
pau
tri
gio
gio
tri
pau
gio
pau
neu
rab
sor
dis
neu
gio
tri
tri
tri
gio
neu
neu
sor
sor
neu
tri
pau
tri
tri
pau
neu
neu
sor
rab
gio
pau
rab
rab
rab
gio
pau
rab
rab
tri
neu
rab
tri
neu
rab
pau
gio
dis
dis
dis
gio
pau
gio
neu
dis
sor
sor
dis
dis
pau
tri
gio
neu
dis
sor
sor
sor
dis
tri
tri
pau
neu
sor
sor
sor
neu
tri
rab
dis
rab
dis
rab
sor
rab
sor
rab
rab
rab
gio
pau
rab
rab
rab
gio
pau
tri
neu
tri
rab
rab
neu
pau
gio
dis
rab
dis
gio
pau
gio
pau
tri
gio
pau
neu
dis
sor
rab
neu
gio
pau
gio
tri
pau
tri
gio
tri
neu
dis
sor
neu
neu
tri
gio
tri
tri
pau
neu
sor
sor
neu
neu
pau
tri
rab
rab
gio
pau
rab
rab
rab
gio
pau
rab
rab
neu
tri
rab
neu
tri
rab
sor
gio
pau
tri
gio
pau
neu
neu
pau
tri
gio
dis
dis
rab
gio
pau
gio
pau
gio
pau
dis
sor
tri
pau
neu
neu
neu
pau
tri
tri
sor
sor
tri
gio
neu
neu
neu
