In [None]:
from python_speech_features import mfcc
import scipy.io.wavfile as wav
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import train_test_split
import time

In [None]:
# collect label dict, filename -> category and category -> list of filenames
cat_labels = {}
label2file = {}
for sess in ['Session1', 'Session2', 'Session3', 'Session4', 'Session5']:
    for subdir, dirs, files in os.walk('../../../../HBA/IEMOCAP_full_release/' + sess + '/dialog/EmoEvaluation/all/'):
        for fil in files:
            filepath = subdir + os.sep + fil
            if filepath.endswith('.txt'):
                with open(filepath) as f:
                    for i in f:
                        if i.startswith('['):
                            ph = i.rstrip().split('\t')
                            cat_labels[ph[1]] = ph[2]
                            if label2file.has_key(ph[2]):
                                label2file[ph[2]].append(ph[1])
                            else:
                                label2file[ph[2]] = [ph[1]]
    
print len(cat_labels.keys())
with open('cat_labels.pkl', 'wb') as f:
    pickle.dump(cat_labels, f)
with open('label2file.pkl', 'wb') as f:
    pickle.dump(label2file, f)

In [None]:
# analyzing the class distributions
with open('label2file.pkl') as f:
    label2file = pickle.load(f)

y = [len(v) for k,v in label2file.items()]
x = [k for k,v in label2file.items()]

plt.bar(x,y)
for i,v in enumerate(x):
    plt.text(v, y[i]+0.25, str(y[i]))
plt.show()

In [None]:
# extract mfcc features for all the sentences / utterances
mfcc_feat = {}
uttno = 1
for sess in ['Session1', 'Session2', 'Session3', 'Session4', 'Session5']:
    for subdir, dirs, files in os.walk('../../../../HBA/IEMOCAP_full_release/' + sess + '/sentences/wav/'):
        for fil in files:
            filepath = subdir + os.sep + fil
            if filepath.endswith('.wav'):
                (rate,sig) = wav.read(filepath)
                mfcc_feat[fil] = mfcc(sig,rate)
                uttno += 1
with open('feat/mfcc.pkl', 'wb') as f:
    pickle.dump(mfcc_feat, f)

In [None]:
# extracting stats as feature for each utterance
#k2num = {'ang':1, 'exc':2, 'fru':3, 'hap':4, 'neu':5, 'sad':6}
k2num = {'ang':1, 'exc':2, 'neu':3, 'sad':4}

X_mean = np.empty((0,13), float)
X_var = np.empty((0,13), float)
y = []
i = 0
for k,v in mfcc_feat.items():
    if i % 1000 == 0:
        print i
    if cat_labels[k[:-4]] in k2num.keys():
        A = np.mean(v, axis=0)
        X_mean = np.concatenate((X_mean, np.reshape(A, (-1, A.shape[0]))), axis=0)
        A = np.var(v, axis=0)
        X_var = np.concatenate((X_var, np.reshape(A, (-1, A.shape[0]))), axis=0)
        y.append(k2num[cat_labels[k[:-4]]])
    i += 1

print X_mean.shape
y = np.array(y)

X = np.concatenate((X_mean, X_var), axis=1)
y = np.reshape(y, (-1, 1))
print X.shape
print y.shape

In [None]:
from sklearn import svm
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

kf = KFold(n_splits=5)
acc = []
for train_index, test_index in kf.split(X):
    tr_x, te_x = X[train_index], X[test_index]
    tr_y, te_y = y[train_index], y[test_index]
    clf = svm.SVC()
    clf.fit(tr_x, tr_y)
    pred = clf.predict(te_x)
    acc.append(accuracy_score(te_y, pred))

print sum(acc)/float(len(acc))

In [None]:
# extracting zcr 
#def zcr():
#    import librosa
#    y, sr = librosa.load('../../../../HBA/IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F000.wav')
#    print len(y)
#    return librosa.feature.zero_crossing_rate(y)

#print zcr().shape

import librosa
zcr_feat = {}
uttno = 1
for sess in ['Session1', 'Session2', 'Session3', 'Session4', 'Session5']:
    for subdir, dirs, files in os.walk('../../../../HBA/IEMOCAP_full_release/' + sess + '/sentences/wav/'):
        for fil in files:
            filepath = subdir + os.sep + fil
            if filepath.endswith('.wav'):
                sig, sr = librosa.load(filepath)
                zcr_feat[fil] = librosa.feature.zero_crossing_rate(sig)[0]
                uttno += 1
                if uttno%100 == 0:
                    print uttno
                
with open('feat/zcr.pkl', 'wb') as f:
    pickle.dump(zcr_feat, f)
    
from sklearn import svm
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

kf = KFold(n_splits=5)
acc = []
for train_index, test_index in kf.split(X):
    tr_x, te_x = X[train_index], X[test_index]
    tr_y, te_y = y[train_index], y[test_index]
    clf = svm.SVC()
    clf.fit(tr_x, tr_y)
    pred = clf.predict(te_x)
    acc.append(accuracy_score(te_y, pred))

print sum(acc)/float(len(acc))

In [None]:
k2num = {'ang':1, 'exc':2, 'neu':3, 'sad':4}

X_mean = np.empty((1,1), float)
X_var = np.empty((1,1), float)
y = []
i = 0
for k,v in zcr_feat.items():
    #print v
    if i % 1000 == 0:
        print i
    if cat_labels[k[:-4]] in k2num.keys():
        A = np.mean(v)
        X_mean = np.concatenate((X_mean, np.reshape(A, (-1,1))), axis=0)
        A = np.var(v)
        X_var = np.concatenate((X_var, np.reshape(A, (-1,1))), axis=0)
        y.append(k2num[cat_labels[k[:-4]]])
    i += 1

print X_mean.shape
y = np.array(y)

In [None]:
# for extracting pitch statistics
import subprocess
import numpy as np

pitch_feat = {}
uttno = 0
for sess in ['Session1', 'Session2', 'Session3', 'Session4', 'Session5']:
    for subdir, dirs, files in os.walk('../../../../HBA/IEMOCAP_full_release/' + sess + '/sentences/wav/'):
        for fil in files:
            filepath = subdir + os.sep + fil
            if filepath.endswith('.wav'):
                subprocess.call(['/usr/bin/praat', '--run', 'extract_pitch.praat', filepath])
                pitch = {}
                with open('temp.pitch') as f:
                    for i in f:
                        val = i.split()
                        if val[1] != '--undefined--' and val[0] != 'Pitch':
                            pitch[float(val[0])] = float(val[1])
                pitch_feat[fil] = [np.mean(np.array(pitch.values())), np.var(np.array(pitch.values()))]
                uttno += 1
                if uttno%1000 == 0:
                    print uttno