**SOW-MKI49: Neural Information Processing Systems**  
*Weeks 4 and 5: Assignment (225 points + 30 bonus points)*  
Author: Umut

In [0]:
# Group number: ...
# Student 1 name, student 1 number: ...
# Student 2 name, student 2 number: ...
# Student 3 name, student 3 number: ...

In [1]:
from chainer import ChainList, optimizers, serializers
import chainer
import chainer.functions as F
import chainer.links as L
import numpy as np

In [7]:
dt = np.random.randn(61, 10, 80)
print(dt.shape)
out = F.split_axis(dt, [2], 1)
print(out[0].shape)
print(out[1].shape)

(61, 10, 80)
(61, 2, 80)
(61, 8, 80)


**WaveNet component (75 points)**

* Implement missing parts of the call method (y and z). **25 points**
* Implement residual block class. **50 points**

---
Reminder:

* One convolution layer that has 61 kernels of size 2 with no nonlinearities.
![alt text](http://i67.tinypic.com/21mgi2w.png)
![alt text](http://i67.tinypic.com/292n04y.png)
---



# Contributor - Sameera

In [13]:
class _ResidualBlock(ChainList):
    def __init__(self, dilation):
        super(_ResidualBlock, self).__init__(
            dilaLayer = L.Convolution2D(in_channels=61, out_channels=2*61, ksize=(1, 2), dilate=dilation),
            outLayer = L.Convolution2D(in_channels=61, out_channels=512, ksize=(1, 1)),
            toNextLayer = L.Convolution2D(in_channels=61, out_channels=61, ksize=(1, 1))
        )
        
    
    def __call__(self, x):
        h = F.split_axis(dilaLayer(x), 2, 1)
        gatedOutput = F.sigmoid(h[0]) * F.tanh(h[1])
        z = outLayer(gatedOutput)
        y = toNextLayer(gatedOutput) + x
        return y, z

    
class _WaveNet(ChainList):
    def __init__(self):
        links = (L.Convolution2D(in_channels=61, out_channels=61, ksize=(1, 2)),)
        links += tuple(_ResidualBlock((1, 2 ** (i % 6))) for i in range(6))
        links += (L.Convolution2D(512, 512, 1), L.Convolution2D(512, 3843, 1))
        super(_WaveNet, self).__init__(*links)
        '''
        QUESTIONS
        1. Why/how conv2D_7 has input of 512? If 512, should the last Res. layer has output of 512
        2. Why/How 61*61 and 2*61 comes about?
        '''
        ''' --- Structure ---
        0.conv2D (61 -> 61)
        1.Residual (61 > 61, 512)
        2.Residual (61 > 61, 512)
        3.Residual (61 > 61, 512)
        4.Residual (61 > 61, 512)
        5.Residual (61 > 61, 512)
        6.Residual (61 > 61, 512)
        7.conv2D (512 > 512)
        8.conv2D (512 > 3843)
        '''
        
    def __call__(self, x):
        y = (self[0](F.pad(x, ((0, 0), (0, 0), (0, 0), (1, 0)), 'constant')),)
        z = 0
        
        # z - skip connection output
        # y - output of each res. layer which becomes the input to next res. layer
        
        for i in range(1, len(self) - 2):#going through Residual layers
            #pass
            y, z_hat = self[i](y)
            z += z_hat
        
        z = F.relu(self[7](z))
        z = self[8](z)
        y, z = F.split_axis(z, [61*61], 1) #if second param is an array, those positions are used as split points

        return F.reshape(y, (y.shape[0], 61, 61, y.shape[3])), F.reshape(z, (z.shape[0], 2, 61, z.shape[3]))


In [14]:
WN = _WaveNet()
print(WN)
for i in range(6):
    print(1, 2 ** (i % 6))

9
<__main__._WaveNet object at 0x7f68dc9f69b0>
1 1
1 2
1 4
1 8
1 16
1 32


In [None]:
'''
Backup
'''
class _WaveNet(ChainList):
    def __init__(self):
        links = (L.Convolution2D(61, 61, (1, 2)),)
        links += tuple(_ResidualBlock((1, 2 ** (i % 6))) for i in range(6))
        links += (L.Convolution2D(512, 512, 1), L.Convolution2D(512, 3843, 1))

        super(_WaveNet, self).__init__(*links)

    def __call__(self, x):
        y = (self[0](F.pad(x, ((0, 0), (0, 0), (0, 0), (1, 0)), 'constant')),)
        z = 0

        for i in range(1, len(self) - 2):
            #y =
            #z +=

        #y, z =

        return F.reshape(y, (y.shape[0], 61, 61, y.shape[3])), \
               F.reshape(z, (z.shape[0], 2, 61, z.shape[3]))

class _ResidualBlock(ChainList):
    pass

**CRF-RNN component (50 points)**

* Implement missing parts of the call method (z). **25 points**
* Why is z not normalized in the last iteration? **25 points**

---

Reminder:

![alt text](http://i68.tinypic.com/sy6mix.png)

---

# Contributor - Jitendra

In [0]:
class _CRF(ChainList):
    def __init__(self):
        super(_CRF, self).__init__(L.ConvolutionND(1, 2, 2, 1, nobias = True))

    def __call__(self, x, y):
        #z =

        for i in range(5):
            #z =

            if i < 4:
                z = F.softmax(z)

        return z

**WaveCRF model (50 points)**

1. Implement missing parts of the call method (k, psi_u and Q_hat). **20 points**
2. Implement missing parts of the save and load methods (save and load model). **10 points**
3. Implement missing parts of the test and train methods (forward and/or backward propagate). **20 points**

# Contributor - Mohit

In [0]:
class WaveCRF(object):
    def __init__(self):
        self.log = {('test', 'accuracy'): (), ('test', 'loss'): (), ('training', 'accuracy'): (),
                    ('training', 'loss'): ()}
        self.model = ChainList(_WaveNet(), _CRF())
        self.optimizer = optimizers.Adam(0.0002, 0.5)

        self.optimizer.setup(self.model)

    def __call__(self, x):
        #k, psi_u =
        #Q_hat =

        return F.transpose(F.reshape(Q_hat, (x.shape[0], x.shape[3], 2, 61)), (0, 2, 3, 1))

    @classmethod
    def load(cls, directory):
        self = cls()
        self.log = np.load('{}/log.npy'.format(directory))

        # Load model
        serializers.load_npz('{}/optimizer.npz'.format(directory), self.optimizer)

        return self

    def save(self, directory):
        np.save('{}/log.npy'.format(directory), self.log)
        # Save model
        serializers.save_npz('{}/optimizer.npz'.format(directory), self.optimizer)

    def test(self, Q, x):
        with chainer.using_config('train', False):
            # Forward prop
            # Forward prop

            self.log['test', 'accuracy'] += (float(F.accuracy(Q_hat, Q).data),)
            self.log['test', 'loss'] += (float(loss.data),)

    def train(self, Q, x):
        # Forward prop
        # Forward prop

        # Backprop
        # Backprop
        # Backprop

        self.log['training', 'accuracy'] += (float(F.accuracy(Q_hat, Q).data),)
        self.log['training', 'loss'] += (float(loss.data),)

# Training

In [2]:
%matplotlib inline

import IPython
import chainer
import matplotlib
import numpy
import os
import pickle
import random
from tqdm import tqdm


In [14]:
for i in tqdm(range(int(9e6)), ascii=True, desc="NIPS-G17"):
    pass

NIPS-G17: 100%|##########| 9000000/9000000 [00:02<00:00, 4274806.20it/s]


In [0]:
batch_size = 30
epochs = 100
root = '..'

In [9]:
with open('piano_rolls.p', 'rb') as f:
    piano_rolls = pickle.load(f, encoding = 'latin1')
    print(len(piano_rolls))
    for key in piano_rolls:
        
        print('{} - {}'.format(key, len(piano_rolls[key])))
        print(piano_rolls[key].shape)
        
        print(piano_rolls[key])
        

3636
017906B_/0_0_3_0 - 128
(128, 424)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
022711B_/0_0_3_0 - 128
(128, 608)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
007706B_/1_0_0_0 - 128
(128, 512)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
000106B_/1_0_5_0 - 128
(128, 608)
[[False False False ... False False False]
 [False False False ... 

[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
032700B_/1_0_0_0 - 128
(128, 360)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
024514B_/0_0_4_0 - 128
(128, 512)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
039100B_/0_0_3_0 - 128
(128, 528)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False 

(128, 288)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
001907B_/0_0_2_0 - 128
(128, 864)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
005903B_/0_0_5_0 - 128
(128, 872)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
009209B_/0_0_8_0 - 128
(128, 584)
[[False False False ... False False False]
 [False False False ... False False False]
 [False F

 [False False False ... False False False]]
039700B_/0_0_1_0 - 128
(128, 672)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
032000B_/0_0_8_0 - 128
(128, 912)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
001805Bw/1_0_7_0 - 128
(128, 512)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
019406B_/0_0_7_0 - 128
(128, 544)
[[False False False ... Fals

[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
017405B_/1_0_5_0 - 128
(128, 768)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
030600B_/0_0_2_0 - 128
(128, 368)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
031000B_/0_0_7_0 - 128
(128, 320)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False 

[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
011506B_/0_0_5_0 - 128
(128, 448)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
024410B_/0_0_8_0 - 128
(128, 360)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
011205B_/1_0_0_0 - 128
(128, 416)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False 

 [False False False ... False False False]]
000907B_/1_0_5_0 - 128
(128, 416)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
017606B_/0_0_1_0 - 128
(128, 552)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
028900B_/1_0_7_0 - 128
(128, 352)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
030100B_/0_0_1_0 - 128
(128, 608)
[[False False False ... Fals

In [0]:
with open('{}/Data/piano_rolls.p'.format(root), 'rb') as f:
    piano_rolls = pickle.load(f)

keys = sorted(piano_rolls.keys())

random.seed(6)
random.shuffle(keys)

test_set = dict((key, piano_rolls[key]) for key in keys[:int(0.1 * len(keys))])
training_set = dict((key, piano_rolls[key]) for key in keys[int(0.1 * len(keys)):])
training_set_keys = list(training_set.keys())

In [0]:
waveCRF = WaveCRF()

waveCRF.model.to_gpu()

In [0]:
for epoch in tqdm.tnrange(epochs):
    random.shuffle(training_set_keys)

    batch = ()

    for key in tqdm.tqdm_notebook(training_set_keys, leave = False):
        i = random.randint(0, training_set[key].shape[1] - 80)
        batch += (training_set[key][32 : 93, i : i + 80],)

        if len(batch) == batch_size:
            batch = waveCRF.model.xp.array(batch)

            waveCRF.train(batch[:, :, 1:].astype('i'), batch[:, :, None, :-1].astype('f'))

            batch = ()

    for key in tqdm.tqdm_notebook(test_set, leave = False):
        batch = waveCRF.model.xp.array((test_set[key][32 : 93],))

        waveCRF.test(batch[:, :, 1:].astype('i'), batch[:, :, None, :-1].astype('f'))

    IPython.display.clear_output()

    for i, key in enumerate(waveCRF.log):
        matplotlib.pyplot.subplot(221 + i)
        matplotlib.pyplot.plot(numpy.array(waveCRF.log[key]).reshape(epoch + 1, -1).mean(1))
        matplotlib.pyplot.xlabel('iteration')
        matplotlib.pyplot.ylabel(key)

    matplotlib.pyplot.tight_layout()
    matplotlib.pyplot.show()
    os.makedirs('{}/Models/WaveCRF/{}'.format(root, epoch))
    waveCRF.save('{}/Models/WaveCRF/{}'.format(root, epoch))

**Test (50 points)**  

* Generate a number of samples, pick the best one and play it in the notebook. **50 points**

In [0]:
# Test

**Bonus question (30 points)**

* Discuss how you can improve the model (you can talk about different architectures or different ways to encode the inputs, etc.) **10 points**
* Discuss the assumptions behind the meanfield approximation and its shortcomings. **10 points**
* Prove that the iterative update equation (CRF-RNN component) is differentiable so that we can backpropagate through them. **10 points**