**SOW-MKI49: Neural Information Processing Systems**  
*Weeks 4 and 5: Assignment (225 points + 30 bonus points)*  
Author: Umut

In [None]:
# Group number: 6
# Student 1 name, student 1 number: Aumkar Lele, s4743962
# Student 2 name, student 2 number: Djamari Oetringer, s4464559
# Student 3 name, student 3 number: Daphne Lenders, s4433556

# Comment: We made two implementations of the WaveCRF model, one using Chainer, the other one using Tensorflow. The Chainer 
# model is complete and working and thus also the model we would like to hand in for grading. The Tensorflow model is 
# not completely working yet, so if you could provide us some feedback on what's going wrong/what's missing, that would
# be really useful!

In [None]:
import os
import numpy as np
import cv2
import pickle
import random
import pandas as pd
import tensorflow as tf
from glob import glob
from time import time
import tqdm
import pdb
import matplotlib.pyplot as plt

In [None]:
epochs = 10
batch_size = 32
layers = tf.contrib.layers

**WaveNet component (75 points)**

* Implement missing parts of the call method (y and z). **25 points**
* Implement residual block class. **50 points**

---
Reminder:

* One convolution layer that has 61 kernels of size 2 with no nonlinearities.
![alt text](http://i67.tinypic.com/21mgi2w.png)
![alt text](http://i67.tinypic.com/292n04y.png)
---



In [None]:
root_dir = os.path.normpath(os.path.join(os.path.dirname(os.path.realpath('__file__'))))
data_directory = os.path.join(root_dir, 'piano_roll_data')

In [None]:
with open(os.path.join('piano_rolls.p'), 'rb') as f:
    piano_rolls = pickle.load(f)

In [None]:
keys = sorted(piano_rolls.keys())

random.seed(6)
random.shuffle(keys)

test_set = dict((key, piano_rolls[key]) for key in keys[:int(0.1 * len(keys))])
training_set = dict((key, piano_rolls[key]) for key in keys[int(0.1 * len(keys)):])
training_set_keys = list(training_set.keys())

## Residual block

In [None]:
def residual_block(y, i, par_conv_sum):
    
    short = y
    
    padded_input = tf.pad(y, [[0, 0], [0, 0], [0, 0], [i, 0]], mode='CONSTANT')
    
    y_ = layers.conv2d(padded_input, 122, kernel_size = 2, stride = 1, rate = i, data_format = 'NCHW')
    
    split1, split2 = tf.split(y_, [61, 61], axis = 1)
    y1 = layers.conv2d(split1, 61, kernel_size = 2, stride = 1, rate = i, data_format = 'NCHW')
    y2 = layers.conv2d(split2, 61, kernel_size = 2, stride = 1, rate = i, data_format = 'NCHW')

    y1_tan = tf.nn.tanh(y1)
    y2_sig = tf.nn.sigmoid(y2)

    y_mul = tf.multiply(y1_tan, y2_sig)
    
    parallel_conv1 = layers.conv2d(y_mul, 61, kernel_size = 1, stride = 1, data_format = 'NCHW')
    
    parallel_conv2 = layers.conv2d(y_mul, 512, kernel_size = 1, stride = 1,  data_format = 'NCHW', activation_fn = tf.nn.relu)
    
    y_1 = tf.add(short, parallel_conv1)
         
    if i != 32:
        
        if i == 1:
            return y_1, parallel_conv2
        else:
            return y_1, tf.add(parallel_conv2, par_conv_sum)
        
    elif i == 32:
        return tf.add(parallel_conv2, par_conv_sum)

## CRF block

In [None]:
def _crf(k, psi_u):
    
    q = psi_u
    
    #Message passing layer
    y = tf.nn.softmax(psi_u, axis = 1)
    y_ = tf.matmul(tf.transpose(y, perm = [0, 3, 1, 2]), tf.transpose(k, perm = [0, 3, 1, 2]))
    
    
    #Compatibility transform layer
    y_1 = tf.transpose(y_, perm = [0, 2, 1, 3])
    
    ctl = layers.conv2d(y_1, 2, kernel_size = 1, stride = 1, data_format = 'NCHW')
    
    #Local update and normalization layer
    
    z = tf.subtract(tf.transpose(-psi_u, perm = [0, 3, 1, 2]), ctl)
    
    for i in range(5):
        
        if i == 4:
            return tf.transpose(z, perm = [0, 2, 3, 1])
            
        elif i < 4:
            z = tf.nn.softmax(z, axis = 1)
            

In [None]:
def waveCRF(input_):

    padded_input = tf.pad(input_, [[0, 0], [0, 0], [0, 0], [1, 0]], mode='CONSTANT') # (batch_size, channels, notes, time)

    model = layers.conv2d(padded_input, 61, kernel_size = [1, 2], data_format = 'NCHW')
    
    model_shape = model.get_shape().as_list()
    
    conv_init = tf.zeros([batch_size, 512, model_shape[2], model_shape[3]], tf.float32)
    
    res1, sum1 = residual_block(model, 1, conv_init)

    res2, sum2 = residual_block(res1, 2, sum1)

    res3, sum3 = residual_block(res2, 4, sum2)

    res4, sum4 = residual_block(res3, 8, sum3)

    res5, sum5 = residual_block(res4, 16, sum4)

    res6 = residual_block(res5, 32, sum5)

    model_ = layers.conv2d(res6, 3843, kernel_size = 1, stride = 1, data_format = 'NCHW')

    out1, out2 = tf.split(model_, [3721, 122], axis = 1)

    out_shape1 = out1.get_shape().as_list()

    out_shape2 = out2.get_shape().as_list()
 
    wave_out1 = tf.reshape(out1, (out_shape1[0], 61, 61, out_shape1[2] * out_shape1[3]))  #k

    wave_out2 = tf.reshape(out2, (out_shape2[0], 2, 61, out_shape2[2] * out_shape2[3]))  #psi_u
    
    return _crf(wave_out1, wave_out2)

In [None]:
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    for epoch in tqdm.tnrange(epochs):
        random.shuffle(training_set_keys)

        batch = ()

        for key in tqdm.tqdm_notebook(training_set_keys, leave = False):
            i = random.randint(0, training_set[key].shape[1] - 80)
            batch += (training_set[key][32 : 93, i : i + 80],)

            if len(batch) == batch_size:
                batch = np.asarray(batch)
                
                Q = batch[:, :, 1:].astype('i')
                input_ = batch[:, :, None, :-1].astype('f')
                Q_hat = waveCRF(input_)
                
                t_vars = tf.trainable_variables()
                train_loss = tf.losses.softmax_cross_entropy(Q, logits = Q_hat)
                Optimizer = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.9, 
                                                   beta2=0.999, epsilon=1e-8).minimize(train_loss, var_list = t_vars)
                _, lossV, _trainY, _predict = sess.run([Optimizer, train_loss, 
                                                        tf.convert_to_tensor(Q, dtype=tf.float32), 
                                                        tf.convert_to_tensor(Q_hat, dtype=tf.float32)])
                _label = np.argmax(_trainY, axis=1)
                _accuracy = np.mean(_label == _predict)
                plt.plot('loss', lossV)
                plt.plot('train accuracy', _accuracy)
                
