In [85]:
import numpy as np
import theano
from theano import tensor as T
import lasagne
from lasagne.layers import *
from lasagne.nonlinearities import *
from lasagne.objectives import *
from lasagne.updates import *
import sys
sys.path.append("../../modules")
import helper as hp

In [18]:
class UpperRightOnesLayer(Layer):
    def __init__(self, incoming, **kwargs):
        super(UpperRightOnesLayer, self).__init__(incoming, **kwargs)
        num_inputs = int(np.prod(self.input_shape[1:]))
        self.num_inputs = num_inputs
        # construct the upper triangular matrix
        self.W = np.ones((num_inputs, num_inputs), dtype="float32")
        for k in range(0, num_inputs):
            self.W[k][0:k] = 0

    def get_output_shape_for(self, input_shape):
        return (input_shape[0], self.num_inputs)

    def get_output_for(self, input, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        result = T.dot(input, self.W)
        return result

In [14]:
W = np.ones((3, 3), dtype="float32")
for k in range(0, 3):
    W[k][0:k] = 0

In [15]:
W

array([[ 1.,  1.,  1.],
       [ 0.,  1.,  1.],
       [ 0.,  0.,  1.]], dtype=float32)

In [30]:
l_in = InputLayer((None, 3))
l_uro = UpperRightOnesLayer(l_in)
l_softmax = NonlinearityLayer(l_uro, nonlinearity=softmax)

In [31]:
X = T.fmatrix('X')
uro_out = get_output(l_uro, X)
softmax_out = get_output(l_softmax, X)

In [28]:
uro_out.eval({X: np.zeros((3,3), dtype="float32")+0.5})

array([[ 0.5,  1. ,  1.5],
       [ 0.5,  1. ,  1.5],
       [ 0.5,  1. ,  1.5]], dtype=float32)

In [32]:
softmax_out.eval({X: np.zeros((3,3), dtype="float32")+0.5})

array([[ 0.18632373,  0.30719587,  0.5064804 ],
       [ 0.18632373,  0.30719587,  0.5064804 ],
       [ 0.18632373,  0.30719587,  0.5064804 ]], dtype=float32)

---

In [167]:
class OrdinalSubtractLayer(Layer):
    def __init__(self, incoming, **kwargs):
        super(OrdinalSubtractLayer, self).__init__(incoming, **kwargs)
        num_inputs = int(np.prod(self.input_shape[1:]))
        self.num_inputs = num_inputs
        # construct the matrix
        self.W = np.zeros((num_inputs, num_inputs), dtype="float32")
        self.W[0,0]=1
        for k in range(1, num_inputs-1):
            self.W[k-1,k] = -1
            self.W[k,k] = 1
        self.W[num_inputs-1,num_inputs-1] = 1
        # construct the bias row vector
        self.b = np.zeros((1, num_inputs), dtype="float32")
        self.b[0, num_inputs-1] = 1
        print self.W
        print self.b

    def get_output_shape_for(self, input_shape):
        return (input_shape[0], self.num_inputs)

    def get_output_for(self, input, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        result = T.dot(input, self.W)
        result = T.abs_(self.b - result)
        return result

In [55]:
l_ord = OrdinalSubtractLayer(l_softmax)
ord_out = get_output(l_ord, X)

In [56]:
ord_out.eval({X: np.zeros((3,3), dtype="float32")+0.5})

array([[ 0.18632373,  0.12087214,  0.4935196 ],
       [ 0.18632373,  0.12087214,  0.4935196 ],
       [ 0.18632373,  0.12087214,  0.4935196 ]], dtype=float32)

-----

In [63]:
dat = hp.load_mnist("../../data/mnist.pkl.gz")
train_data, _, _ = dat
X_train, y_train = train_data
y_train = y_train.astype("int32")

In [64]:
X_train.dtype, y_train.dtype

(dtype('float32'), dtype('int32'))

In [168]:
from theano.compile.nanguardmode import NanGuardMode

In [229]:
def get_net():
    l_in = InputLayer( (None, 1, 28, 28) )
    l_conv = Conv2DLayer(l_in, num_filters=8, filter_size=3)
    l_mp = MaxPool2DLayer(l_conv, pool_size=2)
    l_conv2 = Conv2DLayer(l_mp, num_filters=16, filter_size=3)
    l_mp2 = MaxPool2DLayer(l_conv2, pool_size=2)
    l_conv3 = Conv2DLayer(l_mp2, num_filters=32, filter_size=3)
    l_pre = DenseLayer(l_conv3, num_units=10)
    l_uro = UpperRightOnesLayer(l_pre) # addition, and "cumulative probs"
    #l_softmax = NonlinearityLayer(l_uro, nonlinearity=softmax) # cumulative probs
    l_ord = OrdinalSubtractLayer(l_uro) # convert to discrete probs
    #l_ord = ExpressionLayer(l_ord, lambda X: (X+0.001) / 1.01, output_shape='auto') # numerical stability
    l_ord = NonlinearityLayer(l_ord, nonlinearity=softmax)
    return l_ord

In [230]:
X = T.tensor4('X')
y = T.ivector('y')
l_out = get_net()
net_out = get_output(l_out, X)
for layer in get_all_layers(l_out):
    print layer, layer.output_shape

[[ 1. -1.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1. -1.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1. -1.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1. -1.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1. -1.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  1. -1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1. -1.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  1. -1.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]]
[[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]]
<lasagne.layers.input.InputLayer object at 0x11361a490> (None, 1, 28, 28)
<lasagne.layers.conv.Conv2DLayer object at 0x113724310> (None, 8, 26, 26)
<lasagne.layers.pool.MaxPool2DLayer object at 0x115e39290> (None, 8, 13, 13)
<lasagne.layers.conv.Conv2DLayer object at 0x10b4e5e50> (None, 16, 11, 11)
<lasagne.layers.pool.MaxPool2DLayer object at 0x10b4e5710> (None, 16, 5, 5)
<lasagne.layers.conv.Conv2DLayer object at 0x115d52290> (None, 32, 3, 3)
<lasagne.layers.dense.DenseLayer object at 0x113724f10> (None, 10)
<__main__

In [210]:
net_out.eval({X:X_train[0:1]})+0.001 / 1.01

array([[ 0.04890596,  0.01240272,  0.00918723,  0.0009901 ,  0.03845213,
         0.00868523,  0.0009901 ,  0.0009901 ,  0.03661405,  0.83462839]])

In [208]:
net_out.eval({X:X_train[0:1]})

array([[ 0.04791586,  0.01141262,  0.00819714,  0.        ,  0.03746203,
         0.00769514,  0.        ,  0.        ,  0.03562395,  0.8336383 ]])

---

In [232]:
np.sum(net_out.eval({X:X_train[0:1]}))

0.99999999999999978

In [233]:
params = get_all_params(l_out)
loss = categorical_crossentropy(net_out, y).mean()
updates = nesterov_momentum(loss, params, 0.01, 0.9)

In [234]:
train_fn = theano.function([X,y], loss, updates=updates, mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True))

In [113]:
params

[W, b, W, b, W, b, W, b]

In [218]:
def iterate(X_train, y_train, bs=32):
    b=0
    while True:
        if b*bs >= X_train.shape[0]:
            break
        yield X_train[b*bs:(b+1)*bs], y_train[b*bs:(b+1)*bs]
        b += 1

In [235]:
for epoch in range(0,5):
    losses = []
    for X_batch, y_batch in iterate(X_train, y_train):
        losses.append(train_fn(X_batch,y_batch))
    print np.mean(losses)

1.0020876979
0.817761946893
0.743667285172
0.603692934384
0.584278360348


In [236]:
net_out.eval({X:X_train[0:1]})

array([[ 0.03678119,  0.03678119,  0.03678119,  0.03678119,  0.03678119,
         0.51594494,  0.03678119,  0.03678119,  0.03678119,  0.18980554]])

In [237]:
y_train[0]

5