# Machine learning with TensorFlow: CNN

Date: October 28, 2018

Neural network: two convolution layers and three hidden layers

Classification of acoustic events:
- Piano music
- Framenco guitar music
- Classical guitar music
- Blues hars music
- Tin whistle music

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

DATA_FOLDER = '/content/gdrive/My Drive/acoustic_event_detection/data/'
FILTERS=40
TRAINING_FILES = 16
FILES = 24

In [0]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import random
import os

## Preparing data set for training CNN

In [0]:
import glob
piano_files = glob.glob(DATA_FOLDER+'*piano*melpsd*.csv')
classical_guitar_files = glob.glob(DATA_FOLDER+'*classical_guitar*melpsd*.csv')
framenco_guitar_files = glob.glob(DATA_FOLDER+'*framenco_guitar*melpsd*.csv')
blues_harp_files = glob.glob(DATA_FOLDER+'*blues_harp*melpsd*.csv')
tin_whistle_files = glob.glob(DATA_FOLDER+'*tin_whistle*melpsd*.csv')

random.shuffle(piano_files)
random.shuffle(classical_guitar_files)
random.shuffle(framenco_guitar_files)
random.shuffle(blues_harp_files)
random.shuffle(tin_whistle_files)

In [0]:
TRAINING_SET_PIANO = piano_files[:TRAINING_FILES]
TRAINING_SET_CLASSICAL_GUITAR = classical_guitar_files[:TRAINING_FILES]
TRAINING_SET_FRAMENCO_GUITAR = framenco_guitar_files[:TRAINING_FILES]
TRAINING_SET_BLUES_HARP = blues_harp_files[:TRAINING_FILES]
TRAINING_SET_TIN_WHISTLE = tin_whistle_files[:TRAINING_FILES]

TEST_SET_PIANO = piano_files[TRAINING_FILES:FILES]
TEST_SET_CLASSICAL_GUITAR = classical_guitar_files[TRAINING_FILES:FILES]
TEST_SET_FRAMENCO_GUITAR = framenco_guitar_files[TRAINING_FILES:FILES]
TEST_SET_BLUES_HARP = blues_harp_files[TRAINING_FILES:FILES]
TEST_SET_TIN_WHISTLE = tin_whistle_files[TRAINING_FILES:FILES]

TRAINING_SET = [(TRAINING_SET_PIANO, 0),
                (TRAINING_SET_CLASSICAL_GUITAR, 1),
                (TRAINING_SET_FRAMENCO_GUITAR, 2),
                (TRAINING_SET_BLUES_HARP, 3),
                (TRAINING_SET_TIN_WHISTLE, 4)]

TEST_SET = [(TEST_SET_PIANO, 0),
            (TEST_SET_CLASSICAL_GUITAR, 1),
            (TEST_SET_FRAMENCO_GUITAR, 2),
            (TEST_SET_BLUES_HARP, 3),
            (TEST_SET_TIN_WHISTLE, 4)]

In [0]:
training_set = []
test_set = []

divider=FILTERS*100

def label(l):
    ll = [0,0,0,0,0]
    ll[l] = 1
    return ll

for files, l in TRAINING_SET:
    for file in files:
        df = pd.read_csv(file.replace(os.path.sep, '/'), dtype=np.int16)
        df = df[df['n']<FILTERS]
        training_set.append((df[:divider], label(l)))
        training_set.append((df[divider:], label(l)))
        training_set.append((df[int(divider/2):divider+int(divider/2)], label(l)))
        
for files, l in TEST_SET:
    for file in files:
        df = pd.read_csv(file.replace(os.path.sep, '/'), dtype=np.int16)
        df = df[df['n']<FILTERS]
        test_set.append((df[:divider], label(l)))
        test_set.append((df[divider:], label(l)))
        test_set.append((df[int(divider/2):divider+int(divider/2)], label(l)))
                        
random.shuffle(training_set)
random.shuffle(test_set)

In [0]:
train_x, train_t = [], []
test_x, test_t = [], []
for df, label in training_set:
    values = df['magnitude'].values
    train_x.append(values)
    train_t.append(label)
for df, label in test_set:
    values = df['magnitude'].values
    test_x.append(values)
    test_t.append(label)

## CNN on TensorFlow

In [0]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

import numpy as np
import matplotlib.pyplot as plt

np.random.seed(20181031)
tf.set_random_seed(20181031)

### Convolution layer

In [0]:
num_samples = FILTERS * 100
num_classes = 5

num_filters1 = 128
num_filters2 = 256

num_layers = 1

x = tf.placeholder(tf.float32, [None, FILTERS*100])
x_image = tf.reshape(x, [-1,FILTERS,100,1])

W_conv1 = tf.Variable(tf.truncated_normal([5, 5, 1, num_filters1], stddev=0.1))
h_conv1 = tf.nn.conv2d(x_image, W_conv1, strides=[1,1,1,1], padding='SAME')
b_conv1 = tf.Variable(tf.constant(0.1, shape=[num_filters1]))
h_conv1_cutoff = tf.nn.relu(h_conv1 + b_conv1)
h_pool1 = tf.nn.max_pool(h_conv1_cutoff, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

W_conv2 = tf.Variable(tf.truncated_normal([5, 5, num_filters1, num_filters2], stddev=0.1))
h_conv2 = tf.nn.conv2d(h_pool1, W_conv2, strides=[1,1,1,1], padding='SAME')
b_conv2 = tf.Variable(tf.constant(0.1, shape=[num_filters2]))
h_conv2_cutoff = tf.nn.relu(h_conv2 + b_conv2)
h_pool2 = tf.nn.max_pool(h_conv2_cutoff, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
h_pool_flat2 = tf.reshape(h_pool2, [-1, int(FILTERS/4)*25*num_filters2])

### Fully connected layer

In [0]:
num_units1 = int(FILTERS/4)*25*num_filters2
num_units2 = 4096

num_classes = 5

w1 = tf.Variable(tf.truncated_normal([num_units1, num_units2]))
b1 = tf.Variable(tf.zeros([num_units2]))
y1 = tf.matmul(h_pool_flat2, w1) + b1
hidden1 = tf.nn.relu(y1)

w2 = tf.Variable(tf.truncated_normal([num_units2, num_units2]))
b2 = tf.Variable(tf.zeros([num_units2]))
y2 = tf.matmul(hidden1, w2) + b2
hidden2 = tf.nn.relu(y2)

w3 = tf.Variable(tf.truncated_normal([num_units2, num_units2]))
b3 = tf.Variable(tf.zeros([num_units2]))
y3 = tf.matmul(hidden2, w3) + b3
hidden3 = tf.nn.tanh(y3)

keep_prob = tf.placeholder(tf.float32)
hidden3_drop = tf.nn.dropout(hidden3, keep_prob)

w0 = tf.Variable(tf.zeros([num_units2, num_classes]))
b0 = tf.Variable(tf.zeros([num_classes]))
p = tf.nn.softmax(tf.matmul(hidden3_drop, w0) + b0)

In [0]:
t = tf.placeholder(tf.float32, [None, num_classes])
loss = -tf.reduce_sum(t * tf.log(p))
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)
correct_prediction = tf.equal(tf.argmax(p, 1), tf.argmax(t, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [0]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [0]:
i = 0
for _ in range(1000):
    i += 1
    sess.run(train_step, feed_dict={x:train_x, t:train_t, keep_prob:0.5})
    if i % 20 == 0:
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={x:test_x, t:test_t, keep_prob:1.0})
        print('Step: {}, Loss: {}, Accuracy: {}'.format(i, loss_val, acc_val))

### Confirmation with test data set

In [0]:
p_test = sess.run(p, feed_dict={x:test_x, keep_prob:1.0})
(p_test*100).astype(int)[:16]

In [0]:
test_t[:16]

In [0]:
sess.close()