# Attempt to classify Toronto dataset samples using CNN

## Dataset: https://tspace.library.utoronto.ca/handle/1807/24487
#### Dataset structure: 7 folders (categories) containing wav files

In [1]:
import os

from scipy.io import wavfile
from scipy import signal

import numpy as np

import pandas as pd

In [2]:
import tensorflow as tf
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

  return f(*args, **kwds)


In [3]:
from tqdm import tqdm 

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
def wav_preloader(target_path,
                  normalize=True,
                  resample=None, 
                  crop=None,
                  categorical_labels=True):
    with open(target_path, 'r') as f:
        wavs, labels = [], []
        for l in f.readlines():
            l = l.strip('\n').split()
            wavs.append(l[0])
            labels.append(int(l[1]))

    n_classes = np.max(labels) + 1
    X = WavPreloader(wavs, normalize, resample, crop)
    Y = tflearn.data_utils.LabelPreloader(labels, n_classes, categorical_labels)
    return X, Y

class WavPreloader(tflearn.data_utils.Preloader):
    def __init__(self, array, normalize=True, resample=None, crop=None):
        fn = lambda x: self.preload(x, normalize, resample, crop)
        super(WavPreloader, self).__init__(array, fn)

    def preload(self, path, normalize, resample, crop):
        sample_rate, sample = wavfile.read(path)
        if resample is not None:
            sample = signal.resample(sample, int(resample/sample_rate * sample.shape[0]))
        if normalize:
            sample = sample * 1.0 / np.amax(sample)
        if crop is not None:
            sample = sample[-crop:]
        return sample #.reshape(-1,1,1)

In [6]:
def create_dataset_lists(directory):
    
    if not os.path.exists(directory+'/dataset-txt'):
        os.makedirs(directory+'/dataset-txt')

    dataset_all = directory+'/dataset-txt/dataset-all.txt'
    dataset_train = directory+'/dataset-txt/dataset-train.txt'
    dataset_test = directory+'/dataset-txt/dataset-test.txt'
    dataset_validation = directory+'/dataset-txt/dataset-validation.txt'

    dataset = []

    categories = [name for name in os.listdir(DATASET) 
                  if os.path.isdir(DATASET+name) and 
                  not str(DATASET+name).count('dataset-txt')]
    category_mapping = {}
    for i in range(len(categories)):
        category_mapping[categories[i]] = i

    for category in categories:
        for sample in os.listdir(DATASET+category):
            path = DATASET+category+'/'+sample
            label = category_mapping[category]
            dataset.append((path,label))

    train, test = train_test_split(dataset, test_size=0.02)
    test, val = train_test_split(test, test_size=0.5)

    with open(dataset_all,'w') as f:
        for (path, label) in dataset:
            f.write('{} {}\n'.format(path,label))
    with open(dataset_train,'w') as f:
        for (path, label) in train:
            f.write('{} {}\n'.format(path,label))                    
    with open(dataset_test,'w') as f:
        for (path, label) in test:
            f.write('{} {}\n'.format(path,label))                    
    with open(dataset_validation,'w') as f:
        for (path, label) in val:
            f.write('{} {}\n'.format(path,label))  

In [7]:
DATASET = './datasets/toronto2/'

create_dataset_lists(DATASET)
dataset_all = DATASET+'/dataset-txt/dataset-all.txt'
dataset_train = DATASET+'/dataset-txt/dataset-train.txt'
dataset_test = DATASET+'/dataset-txt/dataset-test.txt'
dataset_validation = DATASET+'/dataset-txt/dataset-validation.txt'

input_shape = (10000, 1)

X_train, y_train = wav_preloader(dataset_train, resample=8000, crop = input_shape[0], categorical_labels=True)
X_test, y_test = wav_preloader(dataset_test, resample=8000, crop = input_shape[0], categorical_labels=True)
X_val, y_val = wav_preloader(dataset_validation, resample=8000, crop = input_shape[0], categorical_labels=True)

In [8]:
print(X_train[1].shape)
print(y_train[0].shape)

(10000,)
(7,)


In [9]:
def reshape_wav(X):
    X = np.array(X).reshape([-1, input_shape[0], 1, 1])
    return X

pre_proc = tflearn.DataPreprocessing()
pre_proc.add_custom_preprocessing(reshape_wav)

In [10]:
def create_model(input_shape, n_classes, pre_proc, LR):
    
    tf.reset_default_graph()

    convnet = input_data(shape=[None, input_shape[0], input_shape[1], 1], name='input',
                        data_preprocessing=pre_proc)

    convnet = conv_2d(convnet, 32, 3, activation='relu')
    convnet = max_pool_2d(convnet, 3)
    convnet = dropout(convnet, 0.8)

    convnet = fully_connected(convnet, n_classes, activation='softmax')
    convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

    model = tflearn.DNN(convnet, tensorboard_dir='log')
    return model

In [11]:
LR = 0.001
model = create_model(input_shape=input_shape, n_classes=y_train[0].shape[0], pre_proc=pre_proc, LR=LR)

Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.


In [12]:
MODEL_NAME = 'model.tfl'
model.fit({'input': X_train}, {'targets': y_train}, n_epoch=10, 
          validation_set=({'input': X_val}, {'targets': y_val}), 
        snapshot_step=500, show_metric=True, run_id=MODEL_NAME, shuffle=True)

model.save(MODEL_NAME)    

Training Step: 429  | total loss: [1m[32m0.02548[0m[0m | time: 995.497s
| Adam | epoch: 010 | loss: 0.02548 - acc: 1.0000 -- iter: 2688/2744
Training Step: 430  | total loss: [1m[32m0.02489[0m[0m | time: 1045.679s
| Adam | epoch: 010 | loss: 0.02489 - acc: 1.0000 | val_loss: 1.23480 - val_acc: 0.6429 -- iter: 2744/2744
--
INFO:tensorflow:/home/maria/Projects/DRU-W3-FP-6-Emotion-and-Tone-Analyzer/model.tfl is not in all_model_checkpoint_paths. Manually adding it.


In [14]:
model.evaluate(X_test, y_test)

[0.75]

In [15]:
res = model.predict(X_test)

In [40]:
y_test_labels = [np.argmax(y_test[i]) for i in range (len(y_test))]
res_labels = [np.argmax(res[i]) for i in range (len(res))]
df = pd.DataFrame()
df['Pred'] = res_labels
df['Real'] = y_test_labels
categories = [name for name in os.listdir(DATASET) 
                  if os.path.isdir(DATASET+name) and 
                  not str(DATASET+name).count('dataset-txt')]
category_unmapping = {}
for i in range(len(categories)):
    category_unmapping[i] = categories[i]
df.replace(category_unmapping)

Unnamed: 0,Pred,Real
0,Disgust,Disgust
1,Sad,Sad
2,Disgust,Disgust
3,Sad,Sad
4,Fear,Fear
5,Happy,Fear
6,Fear,Neutral
7,Neutral,Angry
8,Sad,Pleasant
9,Pleasant,Pleasant
