# Destilation of knowledge

This deals with combination of data that wont be available in the testing but it will in the training  !!


## Non Spectral Destilation:

In [22]:
from copy import deepcopy
from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types
from mlp.layers import * 
from mlp.optimisers import SGDOptimiser #import the optimiser

from mlp.costs import CECost, MSECost #import the cost we want to use for optimisation
from mlp.schedulers import LearningRateFixed
import numpy
import logging
from mlp.dataset import *
from base import *

In [23]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MACLDataProvider(dset='train', batch_size=100,
                            max_num_batches=-10, randomize=True,name=['RLAx', 'LLAx'],
                            conv_reshape=False)
valid_dp = MACLDataProvider(dset='valid', batch_size=1140,
                            max_num_batches=1, randomize=False,name=['RLAx', 'LLAx'],
                            conv_reshape=False)


test_dp = MACLDataProvider(dset='test', batch_size=1140,
                           max_num_batches=1, randomize=False,name=['RLAx', 'LLAx'],
                           conv_reshape=False)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

INFO:root:Initialising data providers...


(6840, 45, 125) (6840,)
shape final:  (6840, 250)
(6840, 250)
(1140, 45, 125) (1140,)
shape final:  (1140, 250)
(1140, 250)
(1140, 45, 125) (1140,)
shape final:  (1140, 250)
(1140, 250)


In [24]:
#some hyper-parameters
nhid = 100
learning_rate =0.1
max_epochs = 1000

cost = CECost()    
stats = list()

# test_dp = deepcopy(valid_dp)
train_dp.reset()
valid_dp.reset()
test_dp.reset()

#define the model
model = MLP(cost=cost)

# Every activation function from dft layer produces two values (x,y) for x+iy
#model.add_layer(Sigmoid(idim=125, odim=125, irange=1.6, rng=rng))
model.add_layer(Sigmoid(idim=250, odim=125, irange=1.6, rng=rng))
#model.add_layer(Sigmoid(idim=250, odim=250, irange=1.6, rng=rng))
model.add_layer(Softmax(idim=125, odim=19, rng=rng))

# define the optimiser, here stochasitc gradient descent
# with fixed learning rate and max_epochs
lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)

logger.info('Training started...')
tr_stats_f, valid_stats_f = optimiser.train(model, train_dp, test_dp)

logger.info('Testing the model on test set:')

tst_costf, tst_accuracyf = optimiser.validate(model,valid_dp )
logger.info('ACL test set accuracy is %.2f %%, cost (%s) is %.3f'%
            (tst_accuracyf*100., cost.get_name(), tst_costf))

INFO:root:Training started...
INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 3.022. Accuracy is 4.60%
INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 3.043. Accuracy is 3.95%
INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 2.572. Accuracy is 23.12%
INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 2.469. Accuracy is 22.02%
INFO:mlp.optimisers:Epoch 1: Took 0 seconds. Training speed 25189 pps. Validation speed 57050 pps.
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 2.200. Accuracy is 32.93%
INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 2.314. Accuracy is 27.98%
INFO:mlp.optimisers:Epoch 2: Took 0 seconds. Training speed 23452 pps. Validation speed 57050 pps.
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 2.033. Accuracy is 36.13%
INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 2.250. Accuracy is 32.63%
INFO:mlp.optimisers:Epoch 3: Took 0 seconds. Training speed 21939 pps. Validation speed 38033 pps.
INFO:mlp.optimi

#  Destilation.

An important step is to remove the connections that where just there for training purposes

In [25]:
from copy import copy
old = copy(model.layers[0].W)
model.layers[0].W = model.layers[0].W[:125,:]

In [26]:
model.layers[0].W.shape

(125, 125)

In [31]:
valid_dp = MACLDataProvider(dset='valid', batch_size=1140,
￼
                           max_num_batches=1, randomize=False,name=['RLAx', 'LLAx'],
                           conv_reshape=False)

(1140, 45, 125) (1140,)
shape final:  (1140, 250)
(1140, 250)


In [32]:
valid_dp.reset()
train_dp.reset()
model.layers[0].W = copy(old)
tst_costf, tst_accuracyf = optimiser.validate(model,valid_dp )
logger.info('ACL test set accuracy is %.2f %%, cost (%s) is %.3f'%
            (tst_accuracyf*100., cost.get_name(), tst_costf))

INFO:root:ACL test set accuracy is 44.30 %, cost (ce) is 2.525


# Spectral Destilation of Knowledge


With embeded fft's

In [43]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.info('Initialising data providers...')

train_dp = MACLDataProvider(dset='train', batch_size=100,
                            max_num_batches=-10, randomize=True,name=['RLAx', 'LLAx'],
                            conv_reshape=False, fft=True)
valid_dp = MACLDataProvider(dset='valid', batch_size=1140,
                            max_num_batches=1, randomize=False,name=['RLAx', 'LLAx'],
                            conv_reshape=False, fft=True)


test_dp = MACLDataProvider(dset='test', batch_size=1140,
                           max_num_batches=1, randomize=False,name=['RLAx', 'LLAx'],
                           conv_reshape=False, fft=True)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
rng = numpy.random.RandomState([2015,10,10])

INFO:root:Initialising data providers...


(6840, 45, 125) (6840,)
shape final:  (6840, 250)
(6840, 250)
(1140, 45, 125) (1140,)
shape final:  (1140, 250)
(1140, 250)
(1140, 45, 125) (1140,)
shape final:  (1140, 250)
(1140, 250)


In [73]:
#some hyper-parameters
learning_rate =0.01
max_epochs = 1000

cost = CECost()    
stats = list()

# test_dp = deepcopy(valid_dp)
train_dp.reset()
valid_dp.reset()
test_dp.reset()

#define the model
model = MLP(cost=cost)

# Every activation function from dft layer produces two values (x,y) for x+iy
#model.add_layer(Sigmoid(idim=125, odim=125, irange=1.6, rng=rng))
model.add_layer(Sigmoid(idim=250, odim=250, irange=1.6, rng=rng))
#model.add_layer(Sigmoid(idim=250, odim=250, irange=1.6, rng=rng))
model.add_layer(Softmax(idim=250, odim=19, rng=rng))

# define the optimiser, here stochasitc gradient descent
# with fixed learning rate and max_epochs
lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)
logger.info('Training started...')
tr_stats_f, valid_stats_f = optimiser.train(model, train_dp,valid_dp)
logger.info('Testing the model on test set:')
tst_costf, tst_accuracyf = optimiser.validate(model,valid_dp )

INFO:root:Training started...
INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 3.136. Accuracy is 7.21%
INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 3.102. Accuracy is 9.30%
INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 2.798. Accuracy is 19.22%
INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 2.611. Accuracy is 31.32%
INFO:mlp.optimisers:Epoch 1: Took 1 seconds. Training speed 14169 pps. Validation speed 28525 pps.
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 2.521. Accuracy is 37.09%
INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 2.402. Accuracy is 37.19%
INFO:mlp.optimisers:Epoch 2: Took 1 seconds. Training speed 13602 pps. Validation speed 28525 pps.
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 2.325. Accuracy is 44.38%
INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 2.245. Accuracy is 35.61%
INFO:mlp.optimisers:Epoch 3: Took 1 seconds. Training speed 13079 pps. Validation speed 22820 pps.
INFO:mlp.optimi

In [102]:
from copy import deepcopy
train_dp.reset()

tp = deepcopy(train_dp)
#print numpy.argmax(model.fprop(train_dp.x), axis = -1)
#print min(numpy.argmax(model.fprop(train_dp.x), axis = -1))
tp.t  = numpy.argmax(model.fprop(train_dp.x), axis = -1) 
#print tp.t
#train_dp.t
print numpy.sum(tp.t == train_dp.t) *1.0/len(tp.t)

[14 14 14 ...,  7  7  7]
0
[14 14 14 ...,  7  7  7]
0.840058479532


In [103]:
tp2 =  MACLDataProvider(dset='train', batch_size=100,
                            max_num_batches=-10, randomize=True,name='RLAx',
                            conv_reshape=False, fft=True)
tp2.t = deepcopy(tp.t)
valid_dp = MACLDataProvider(dset='valid', batch_size=1140,
                            max_num_batches=1, randomize=False,name='RLAx',
                            conv_reshape=False, fft=True)


(6840, 45, 125) (6840,)
(6840, 125)
(1140, 45, 125) (1140,)
(1140, 125)


In [106]:
learning_rate =0.01
max_epochs = 1000
rng = numpy.random.RandomState([2015,10,10])
cost = CECost()    
stats = list()

# test_dp = deepcopy(valid_dp)


#define the model
model = MLP(cost=cost)

# Every activation function from dft layer produces two values (x,y) for x+iy
#model.add_layer(Sigmoid(idim=125, odim=125, irange=1.6, rng=rng))
lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)
model.add_layer(Sigmoid(idim=125, odim=125, irange=1.6, rng=rng))
#model.add_layer(Sigmoid(idim=250, odim=250, irange=1.6, rng=rng))
model.add_layer(Softmax(idim=125, odim=19, rng=rng))
logger.info('Training started...')
tr_stats_f, valid_stats_f = optimiser.train(model, tp2, valid_dp)

INFO:root:Training started...
INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.974. Accuracy is 9.82%
INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.984. Accuracy is 10.18%
INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 2.776. Accuracy is 15.01%
INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 2.874. Accuracy is 9.74%
INFO:mlp.optimisers:Epoch 1: Took 0 seconds. Training speed 32386 pps. Validation speed 38033 pps.
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 2.592. Accuracy is 19.90%
INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 2.785. Accuracy is 19.21%
INFO:mlp.optimisers:Epoch 2: Took 0 seconds. Training speed 32386 pps. Validation speed 38033 pps.
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 2.461. Accuracy is 29.60%
INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 2.702. Accuracy is 23.68%
INFO:mlp.optimisers:Epoch 3: Took 0 seconds. Training speed 34005 pps. Validation speed 57050 pps.
INFO:mlp.optimi

In [107]:

rng = numpy.random.RandomState([2015,10,10])

tp2 =  MACLDataProvider(dset='train', batch_size=100,
                            max_num_batches=-10, randomize=True,name='RLAx',
                            conv_reshape=False, fft=True)
#tp2.t = deepcopy(tp.t)
valid_dp = MACLDataProvider(dset='valid', batch_size=1140,
                            max_num_batches=1, randomize=False,name='RLAx',
                            conv_reshape=False, fft=True)

learning_rate =0.01
max_epochs = 1000

cost = CECost()    
stats = list()

# test_dp = deepcopy(valid_dp)


#define the model
model = MLP(cost=cost)

# Every activation function from dft layer produces two values (x,y) for x+iy
#model.add_layer(Sigmoid(idim=125, odim=125, irange=1.6, rng=rng))
lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)
model.add_layer(Sigmoid(idim=125, odim=125, irange=1.6, rng=rng))
#model.add_layer(Sigmoid(idim=250, odim=250, irange=1.6, rng=rng))
model.add_layer(Softmax(idim=125, odim=19, rng=rng))
logger.info('Training started...')
tr_stats_f, valid_stats_f = optimiser.train(model, tp2, valid_dp)

(6840, 45, 125) (6840,)
(6840, 125)
(1140, 45, 125)

INFO:root:Training started...
INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.938. Accuracy is 15.72%
INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 3.022. Accuracy is 5.26%
INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 2.789. Accuracy is 16.19%
INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 2.929. Accuracy is 10.00%
INFO:mlp.optimisers:Epoch 1: Took 0 seconds. Training speed 35795 pps. Validation speed 57050 pps.
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 2.607. Accuracy is 21.82%
INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 2.831. Accuracy is 19.65%
INFO:mlp.optimisers:Epoch 2: Took 0 seconds. Training speed 37783 pps. Validation speed 57050 pps.
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 2.474. Accuracy is 29.57%
INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 2.749. Accuracy is 22.11%
INFO:mlp.optimisers:Epoch 3: Took 0 seconds. Training speed 37783 pps. Validation speed 114100 pps.
INFO:mlp.opti

 (1140,)
(1140, 125)


# Spectral Convolutional Destilation of Knowledge


With embeded fft's

In [112]:
rng = numpy.random.RandomState([2015,10,10])
tp2 =  MACLDataProvider(dset='train', batch_size=100,
                            max_num_batches=-10, randomize=True,name='RLAx',
                            conv_reshape=False, fft=True)
#tp2.t = deepcopy(tp.t)
valid_dp = MACLDataProvider(dset='valid', batch_size=1140,
                            max_num_batches=1, randomize=False,name='RLAx',
                            conv_reshape=False, fft=True)

(6840, 45, 125) (6840,)
(6840, 125)
(1140, 45, 125) (1140,)
(1140, 125)


In [116]:
from mlp.convlin import *


learning_rate =0.01
max_epochs = 1000

cost = CECost()    
stats = list()

# test_dp = deepcopy(valid_dp)


#define the model
model = MLP(cost=cost)

# Every activation function from dft layer produces two values (x,y) for x+iy
#model.add_layer(Sigmoid(idim=125, odim=125, irange=1.6, rng=rng))
lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)
optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)
model.add_layer(Sigmoid(idim=122, odim=122, irange=1.6, rng=rng))
model.add_layer(ConvRelu_Opt(1, 5, irange=1.6, rng=rng))

model.add_layer(Sigmoid(idim=122*5, odim=122*5, irange=1.6, rng=rng))
#model.add_layer(Sigmoid(idim=250, odim=250, irange=1.6, rng=rng))
model.add_layer(Softmax(idim=122*5, odim=19, rng=rng))
logger.info('Training started...')
tr_stats_f, valid_stats_f = optimiser.train(model, tp2, valid_dp)

INFO:mlp.convlin:Training started...
INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 3.445. Accuracy is 7.22%
INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 3.433. Accuracy is 6.84%
INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 2.849. Accuracy is 12.46%
INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 2.571. Accuracy is 25.79%
INFO:mlp.optimisers:Epoch 1: Took 3 seconds. Training speed 2491 pps. Validation speed 6712 pps.
INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 2.380. Accuracy is 31.78%
INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 2.262. Accuracy is 34.82%
INFO:mlp.optimisers:Epoch 2: Took 3 seconds. Training speed 2446 pps. Validation speed 7607 pps.
INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 2.110. Accuracy is 40.28%
INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 2.056. Accuracy is 40.79%
INFO:mlp.optimisers:Epoch 3: Took 3 seconds. Training speed 2429 pps. Validation speed 8150 pps.
INFO:mlp.optim