# Reconocimiento de emociones  en audio: Emo-DB

## imports

In [13]:
from extract_features import extractFeaturesFolder
try:
    import urllib2
except:
    import urllib.request as urllib2 
import time
import zipfile
import os
from IPython.display import Audio
import numpy as np
from create_labels import create_labelsEmoDB
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## Descargar base de datos

In [11]:
linkDB = "http://emodb.bilderbar.info/download/download.zip"
file_name = "download.zip"

now = time.time()

req = urllib2.Request(linkDB)
download = urllib2.urlopen(req)    


fileDB=open(file_name,"wb")
fileDB.write(download.read())
fileDB.close()

elapsed = time.time() - now

print ("Descargada la base de datos: %s en %0.3fs" % (file_name,elapsed))

Descargada la base de datos: download.zip en 100.792s


## Descomprimir base de datos

In [18]:
folder_data='./data/'
if not os.path.exists(folder_data):
    os.makedirs(folder_data)
zfile = zipfile.ZipFile(file_name)
for name in zfile.namelist():
    (dirname, filename) = os.path.split(name)
    if not os.path.exists(folder_data+dirname):
        os.makedirs(folder_data+dirname)
        print ("Descomprimiendo " + filename + " on " + folder_data+dirname)
    zfile.extract(name, folder_data)


Descomprimiendo  on ./data/lablaut
Descomprimiendo  on ./data/labsilb
Descomprimiendo  on ./data/silb
Descomprimiendo  on ./data/wav


## Muestras de la base de datos

In [2]:
Audio('./data/wav/03a01Wa.wav') # Rabia

In [3]:
Audio('./data/wav/03a01Nc.wav') # Neutral

In [6]:
Audio('./data/wav/03a02Ta.wav') # Tristeza

## Extraer características de la BD

### Coeficientes cepstrales en las frecuencias de Mel (MFCC)

https://es.wikipedia.org/wiki/MFCC

In [9]:
folder_audio='./data/wav/'
file_features='./Features.txt'
extractFeaturesFolder(folder_audio, file_features)

Features=np.loadtxt(file_features)
print(Features.shape)

(535, 144)


## Se buscan Labels de la BD, de acuerdo con el nombre del archivo


labels1: dos clases-> corresponde a emociones de alta vs. baja excitación

labels2: dos clases-> corresponde a emociones positivas vs. negativas

labels3: siete clases-> 

                    0-> Rabia

                    1->Aburrimiento
                        
                    2->Disgusto
                        
                    3->Ansiedad/Miedo
                        
                    4->Felicidad
                        
                    5->Tristeza
                        
                    6->Neutro


In [12]:
labels1, labels2, labels3=create_labelsEmoDB(folder_audio)

print(labels1.shape, labels2.shape, labels3.shape)

(535,) (535,) (535,)


## Separacion de conjuntos de entrenamiento y test

In [29]:
X=Features
y=labels1 # Escoger uno de los tres tipos de labels para usar
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

st=StandardScaler()
X_train=st.fit_transform(X_train)
X_test=st.transform(X_test)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(358, 144) (177, 144) (358,) (177,)


In [30]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
    
clf = SVC() # Initialize SVM
parameters={'C':[1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100], 'gamma':[1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100], 'kernel':['rbf'], 'class_weight':['balanced']} # parameters for grid-search
        
cls_cv = GridSearchCV(clf, parameters, cv=9, n_jobs=4) # grid-search
cls_cv.fit(X_train, y_train) # train the SVM
accDev= cls_cv.best_score_ # validation accuracy
Copt=cls_cv.best_params_.get('C') # best C
gammaopt=cls_cv.best_params_.get('gamma') # best gamma
    
cls=SVC(kernel='rbf', C=Copt, gamma=gammaopt, class_weight='balanced') # train the SVM with the optimal parameters
cls.fit(X_train, y_train)
y_test_est=cls.predict(X_test) # test predictions
acc=metrics.accuracy_score(y_test, y_test_est)

print(acc)


0.937853107345
