# Practical Work 3

Authors : Lionel Burgbacher & David Jaquet

In [17]:
import numpy as np
from matplotlib import pyplot as pl
import os
import scipy.io.wavfile as wav
from python_speech_features import mfcc
import mlp_backprop_momentum as mlp
import k_fold_cross_validation as cv

%matplotlib inline

In [18]:
PATH = './vowels/'

## Utils

In [19]:
# Read all the wav files
def read_files(files):
    readed = []
    
    for file in files:
        readed.append(wav.read(os.path.join(PATH, file)))
    
    return readed

# Generate the Mel-Frequency Cepstrum Coefficients
def generate_mfcc(files):
    mfcc_array = []
    for (rate, X) in files:
        mfcc_array.append(mfcc(X, samplerate=rate, nfft=1024))
        
    return mfcc_array

# Calculate the mean of the features for the given array
def calculate_feature_mean(mfcc):
    feature_mean = []
    
    for feature in mfcc:
        mean = np.mean(feature,axis=0)
        feature_mean.append(mean)
    return feature_mean

# Calculate the median of the features for the given array
def calculate_feature_std(mfcc):
    feature_std = []
    
    for feature in mfcc:
        deviation = np.std(feature,axis=0)
        feature_std.append(deviation)
    return feature_std

# Calculate the standard deviation of the features for the given array
def calculate_feature_median(mfcc):
    feature_median = []
    
    for feature in mfcc:
        median = np.median(feature,axis=0)
        feature_median.append(median)
    return feature_median

# Normalize data per columns between min and max
def scale(array, min, max):
    nom = (array-array.min(axis=0))*(max-min)
    denom = array.max(axis=0) - array.min(axis=0)
    denom[denom==0] = 1
    return min + nom/denom

## First experiment - Man vs Woman with natural voice

In [20]:
# src : https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory
# Collect all the men files
men_files = [file for file in os.listdir(PATH) if (os.path.isfile(os.path.join(PATH, file)) and file.startswith('nam') and file.endswith('.wav'))]

# Collect all the women files
women_files = [file for file in os.listdir(PATH) if (os.path.isfile(os.path.join(PATH, file)) and file.startswith('naf') and file.endswith('.wav'))]

In [21]:
men = read_files(men_files)
women = read_files(women_files)
    
men_mfcc = generate_mfcc(men)
women_mfcc = generate_mfcc(women)

men_feature = calculate_feature_median(men_mfcc)
women_feature = calculate_feature_median(women_mfcc)

In [38]:
# Append all the datas
datas = []
MAN = [-1]
WOMAN = [1]
for feature in men_feature:
    datas.append(np.append(feature, MAN))

for feature in women_feature:
    datas.append(np.append(feature, WOMAN))
    
datas = scale(np.asarray(datas), -1, 1)

In [41]:
LEARNING_RATE = 0.001
MOMENTUM = 0.5
EPOCHS = 500
HIDDEN_NODES = 16

input_data = datas[:,0:13]
output_data = datas[:,13]

# 13 features in input and 1 output to determine man or woman
nn = mlp.MLP([13, HIDDEN_NODES, 1], 'tanh')

MSE = nn.fit((input_data, output_data), learning_rate=LEARNING_RATE, momentum=MOMENTUM, epochs=EPOCHS)

In [44]:
K = 8

train, test, matrix = cv.k_fold_cross_validation(nn, datas, K, LEARNING_RATE, MOMENTUM, EPOCHS, 0.0)
print(matrix)

[[33.  3.]
 [ 1. 35.]]
