In [1]:
%pylab inline

Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 20 days


Populating the interactive namespace from numpy and matplotlib


In [2]:
from theano import function, config, shared, sandbox
import theano.tensor as T
import numpy
import time

vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
iters = 1000

rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], T.exp(x))
print(f.maker.fgraph.toposort())
t0 = time.time()
for i in xrange(iters):
    r = f()
t1 = time.time()
print("Looping %d times took %f seconds" % (iters, t1 - t0))
print("Result is %s" % (r,))
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
    print('Used the cpu')
else:
    print('Used the gpu')

ERROR (theano.sandbox.cuda): nvcc compiler not found on $PATH. Check your nvcc installation and try again.


[Elemwise{exp,no_inplace}(<TensorType(float32, vector)>)]
Looping 1000 times took 2.341017 seconds
Result is [ 1.23178029  1.61879337  1.52278066 ...,  2.20771813  2.29967761
  1.62323284]
Used the cpu


In [3]:
#
# These are taken from https://github.com/mila-udem/blocks
# 

class Constant():
    """Initialize parameters to a constant.
    The constant may be a scalar or a :class:`~numpy.ndarray` of any shape
    that is broadcastable with the requested parameter arrays.
    Parameters
    ----------
    constant : :class:`~numpy.ndarray`
        The initialization value to use. Must be a scalar or an ndarray (or
        compatible object, such as a nested list) that has a shape that is
        broadcastable with any shape requested by `initialize`.
    """
    def __init__(self, constant):
        self._constant = numpy.asarray(constant)

    def generate(self, rng, shape):
        dest = numpy.empty(shape, dtype=np.float32)
        dest[...] = self._constant
        return dest


class IsotropicGaussian():
    """Initialize parameters from an isotropic Gaussian distribution.
    Parameters
    ----------
    std : float, optional
        The standard deviation of the Gaussian distribution. Defaults to 1.
    mean : float, optional
        The mean of the Gaussian distribution. Defaults to 0
    Notes
    -----
    Be careful: the standard deviation goes first and the mean goes
    second!
    """
    def __init__(self, std=1, mean=0):
        self._mean = mean
        self._std = std

    def generate(self, rng, shape):
        m = rng.normal(self._mean, self._std, size=shape)
        return m.astype(np.float32)


class Uniform():
    """Initialize parameters from a uniform distribution.
    Parameters
    ----------
    mean : float, optional
        The mean of the uniform distribution (i.e. the center of mass for
        the density function); Defaults to 0.
    width : float, optional
        One way of specifying the range of the uniform distribution. The
        support will be [mean - width/2, mean + width/2]. **Exactly one**
        of `width` or `std` must be specified.
    std : float, optional
        An alternative method of specifying the range of the uniform
        distribution. Chooses the width of the uniform such that random
        variates will have a desired standard deviation. **Exactly one** of
        `width` or `std` must be specified.
    """
    def __init__(self, mean=0., width=None, std=None):
        if (width is not None) == (std is not None):
            raise ValueError("must specify width or std, "
                             "but not both")
        if std is not None:
            # Variance of a uniform is 1/12 * width^2
            self._width = numpy.sqrt(12) * std
        else:
            self._width = width
        self._mean = mean

    def generate(self, rng, shape):
        w = self._width / 2
        #print 'u', shape
        m = rng.uniform(self._mean - w, self._mean + w, size=shape)
        return m.astype(np.float32)


In [4]:
from fuel.datasets.mnist import MNIST
from fuel.transformers import ScaleAndShift, Cast, Flatten, Mapping
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme, ShuffledScheme

MNIST.default_transformers = (
    (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}),
    (Cast, [np.float32], {'which_sources': 'features'}))

mnist_train = MNIST(("train",), subset=slice(None,50000))
#this stream will shuffle the MNIST set and return us batches of 100 examples
mnist_train_stream = DataStream.default_stream(
    mnist_train,
    iteration_scheme=ShuffledScheme(mnist_train.num_examples, 100))

                         
mnist_validation = MNIST(("train",), subset=slice(50000, None))

# We will use larger portions for testing and validation
# as these dont do a backward pass and reauire less RAM.
mnist_validation_stream = DataStream.default_stream(
    mnist_validation, iteration_scheme=SequentialScheme(mnist_validation.num_examples, 250))
mnist_test = MNIST(("test",))
mnist_test_stream = DataStream.default_stream(
    mnist_test, iteration_scheme=SequentialScheme(mnist_test.num_examples, 250))

ImportError: No module named fuel.datasets.mnist

In [25]:
from fuel.datasets.cifar10 import CIFAR10
from fuel.transformers import ScaleAndShift, Cast, Flatten, Mapping
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme, ShuffledScheme

CIFAR10.default_transformers = (
    (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}),
    (Cast, [np.float32], {'which_sources': 'features'}))

cifar10_train = CIFAR10(("train",), subset=slice(None,40000))
#this stream will shuffle the MNIST set and return us batches of 100 examples
cifar10_train_stream = DataStream.default_stream(
    cifar10_train,
    iteration_scheme=ShuffledScheme(cifar10_train.num_examples, 25))
                                               
cifar10_validation = CIFAR10(("train",), subset=slice(40000, None))

# We will use larger portions for testing and validation
# as these dont do a backward pass and reauire less RAM.
cifar10_validation_stream = DataStream.default_stream(
    cifar10_validation, iteration_scheme=SequentialScheme(cifar10_validation.num_examples, 100))
cifar10_test = CIFAR10(("test",))
cifar10_test_stream = DataStream.default_stream(
    cifar10_test, iteration_scheme=SequentialScheme(cifar10_test.num_examples, 100))

In [26]:
print "The streams return batches containing %s" % (mnist_train_stream.sources,)

print "Each trainin batch consits of a tuple containing:"
for element in next(mnist_train_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "Validation/test batches consits of tuples containing:"
for element in next(mnist_test_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "CIFAR: "  
print "The streams return batches containing %s" % (cifar10_train_stream.sources,)

print "Each trainin batch consits of a tuple containing:"
for element in next(cifar10_train_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "Validation/test batches consits of tuples containing:"
for element in next(cifar10_test_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)

The streams return batches containing (u'features', u'targets')
Each trainin batch consits of a tuple containing:
 - an array of size (100, 1, 28, 28) containing float32
 - an array of size (100, 1) containing uint8
Validation/test batches consits of tuples containing:
 - an array of size (250, 1, 28, 28) containing float32
 - an array of size (250, 1) containing uint8
CIFAR: 
The streams return batches containing (u'features', u'targets')
Each trainin batch consits of a tuple containing:
 - an array of size (25, 3, 32, 32) containing float32
 - an array of size (25, 1) containing uint8
Validation/test batches consits of tuples containing:
 - an array of size (100, 3, 32, 32) containing float32
 - an array of size (100, 1) containing uint8


In [27]:
from sklearn import datasets
iris = datasets.load_iris()

#print iris

feats = 4
alpha = 0.01
pop_num = 150
rng = np.random
iris_f = iris['data'][:pop_num,:feats]
iris_t = iris['target'][:pop_num]
iris = hstack(([[x] for x in iris_t], iris_f))

rng.shuffle(iris

#print iris

iris_train_f = iris[:2*pop_num/3,1:]
iris_train_t = np.array(iris[:2*pop_num/3, 0], dtype='uint8')
iris_test_f = iris[2*pop_num/3:,1:]
iris_test_t = np.array(iris[2*pop_num/3:, 0], dtype='uint8')

SyntaxError: invalid syntax (<ipython-input-27-6604dab46423>, line 18)

In [28]:
import theano
import theano.printing as TP
from IPython.display import SVG
def svgdotprint(g):
    return SVG(theano.printing.pydotprint(g, return_image=True, format='svg'))
import time

In [134]:
import theano.tensor.signal.downsample as down

class Layer(object):
    def __init__(self, lamb = 0.1,rng=None, name=""):
        self.name = name
        self.lamb = lamb
        if rng is None:
            rng = np.random.RandomState(1234)
        self.rng = rng
    
    @property
    def parameters(self):
        return []
    
    @property
    def parameter_names(self):
        return []
    
    def get_gradients(self, dLdY, fprop_context):
        return []
    
    def update(self, foo, alpha):
        return []
    def cost(self, gamma):
        return 0;
    def setInputDim(self, inputDim):
        self.num_out = inputDim
    def getOutputDim(self):
        return self.num_out
    @property
    def parameters(self):
        return []
    @property
    def moments(self):
        return []
    def setLambda(self, lamb):
        self.lamb = lamb
    

class AffineLayer(Layer):
    def __init__(self, num_out, initW = 10., gamma  = 0.1, n = "", weight_init=None, bias_init=None, **kwargs):
        super(AffineLayer, self).__init__(name= n, **kwargs)
        self.num_out = num_out
        if weight_init is None:
            b = numpy.sqrt(initW / (num_out))
            self.weight_init = Uniform(width=b)
        if bias_init is None:
            bias_init = Constant(0.0)
        self.gamma= theano.shared(gamma)
        self.b = theano.shared(Constant(0.0).generate(self.rng, (num_out)), name=self.name +" bias")
    
    @property
    def parameters(self):
        return [self.W, self.b]
    @property
    def moments(self):
        return [self.mW, self.mb]
    @property
    def parameter_names(self):
        return ['W','b']
    
    def build(self, X):
        #print self.name+ " ",X.shape 
        return X.dot(self.W) + self.b
    def cost(self, gamma):
        return  (self.W ** 2).sum() * gamma
    def update(self, foo, alpha):
        gw, gb = T.grad(foo, self.parameters)
        moments = self.lamb * self.moments -  [alpha *gw, alpha *gb]
        self.setMoments(moments)
        return  [(self.W, self.W + moments[0]), 
                 (self.b, self.b + moments[1])]
    def setInputDim(self, inputDim):
        shape = (inputDim, self.num_out)
        print "AffineLayer: ", shape
        print "AffineLayerVel : ", (shape, self.b.shape)
        self.W = theano.shared(IsotropicGaussian(0.01).generate(self.rng, shape),name=self.name +" weight")
        self.mW = theano.shared(zeros_like(self.W, dtype='float32'))
        self.mb = theano.shared(zeros_like(self.W, dtype='float32'))
    
class LogRegLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(LogRegLayer, self).__init__(name = n, **kwargs)
    def build(self, X):
        return T.nnet.sigmoid(X)


class TanhLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(TanhLayer, self).__init__(name = n, **kwargs)
    def build(self, X):
        print "tanh layer", X
        return T.tanh(X)

    
class ReLULayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(ReLULayer, self).__init__(name = n, **kwargs)
    
    def build(self, X):
        return T.maximum(0.0, X)

class Conv(Layer):
    def __init__(self, f_out, f_size, initW = 10., lamb = 0.9, n = "", weight_init = None, **kwargs):
        super(Conv, self).__init__(name = n, **kwargs)
        if weight_init is None:
            b = numpy.sqrt(initW / (f_out+ f_size + f_size))
            self.weight_init = Uniform(width=b)
        self.f_out = f_out
        self.f_size = f_size
    
    @property
    def parameters(self):
        return [self.F, self.b]
    @property
    def moments(self):
        return [self.mF, self.mb]
    
    def setInputDim(self, inputDim):
        F_size = (self.f_out, ) + (inputDim[0], self.f_size, self.f_size)                                   
        self.num_out = (self.f_out, inputDim[1] - self.f_size + 1, inputDim[2] - self.f_size + 1)
        print 'Conv filter', F_size
        self.F = theano.shared(IsotropicGaussian(0.01).generate(self.rng, F_size),name=self.name +" filter")
        self.b = theano.shared(Constant(0.0).generate(self.rng, (self.f_out, )), name='CB')
        self.mF = theano.shared(zeros_like(self.F, dtype='float32'), name='ConvM')
        self.mb = theano.shared(zeros_like(self.b, dtype='float32'), name='Convb')
    
    
    def update(self, foo, alpha):
        gf, gb = theano.grad(foo, [self.F, self.b])
        updates = []
        self.mF= self.lamb*self.mF  -  gf
        self.mb = self.lamb*self.mb - gb
        F_new = self.F + self.mF
        b_new = self.b + self.mb 
        return [(self.F, F_new), (self.b, b_new)]
     
    
    def build(self, X):
        conv = theano.tensor.nnet.conv2d(X, self.F) + self.b.dimshuffle('x',0,'x','x')
        return theano.tensor.maximum(0.0, conv)
        
        
class Flatten(Layer):
    def __init__(self, n = "", **kwargs):
        super(Flatten, self).__init__(name = n, **kwargs)
    def build(self, X):
        return T.flatten(X, 2)
    def setInputDim(self, inputDim):
        out_dim = 1
        for i in inputDim:
            out_dim = out_dim * i
        self.num_out = out_dim
    

class BNLayer(Layer):
    def __init__(self,num_out, n = "BNLayer", gamma = 0.1, alpha=1.0,**kwargs):
        super(BNLayer, self).__init__(name = n, **kwargs)
        self.num_out, self.alpha = num_out, alpha
        self.gamma= theano.shared(gamma)
    def build(self, X):
        self.Gamma = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Gamma " + self.name))
        print 'Gamma shape:', np.zeros((1, self.num_out)).shape
        self.Beta  = theano.shared(np.zeros((self.num_out,), dtype='float32'), name=("Beta " + self.name))
        print 'Beta shape:', np.zeros((1, self.num_out)).shape
        self.Gamma.tag.initializer = Constant(1.0)
        self.Beta.tag.initializer = Constant(0.0)
    
        self.means = self.alpha * theano.tensor.mean(X, 0, keepdims=True)
        self.stds = self.alpha * theano.tensor.std(X, 0, keepdims=True)
        self.means.tag.initializer = Constant(0.0)
        self.stds.tag.initializer = Constant(1.0)
        
        normalized = theano.tensor.nnet.bn.batch_normalization(
            X,
            self.Gamma,
            self.Beta,
            self.means,
            self.stds,
            'high_mem'
        )
        return normalized
    @property
    def parameters(self):
        return [self.Gamma, self.Beta]
    @property
    def check(self):
        return [self.gg, self.gb, self.Gamma, self.Beta,self.means, self.stds ]
    #def cost(self):
    #    return  ((self.Gamma ** 2).sum() + (self.Gamma ** 2).sum())* self.gamma
    def update(self, foo, alpha):
        self.gg, self.gb = T.grad(foo, self.parameters)
        return  [(self.Gamma, self.Gamma- alpha *self.gg),
            (self.Beta, self.Beta - alpha * self.gb)] 
    
class SoftMaxLayer(Layer):
    def __init__(self, n = "", **kwargs):
        super(SoftMaxLayer, self).__init__(name = n, **kwargs)
    
    def build(self, X):
        return T.nnet.softmax(X)

class MaxPoolLayer(Layer):
    def __init__(self, p_size, n = "MP", **kwargs):
        super(MaxPoolLayer, self).__init__(name = n, **kwargs)
        self.p_size = p_size
    def build(self, input):
        return down.max_pool_2d(input, (self.p_size,self.p_size), ignore_border=True)
    def getOutputDim(self):
        shape = (self.num_out[0], ) + (self.num_out[1]/self.p_size, self.num_out[2]/self.p_size) 
        print "maxPool", shape
        return shape
    
class DropOutLayer(Layer):
    def __init__(self, dropOut = 0.1, n = "MP", **kwargs):
        super(DropOutLayer, self).__init__(name = n, **kwargs)
        self.dropOut = dropOut
        self.u = Uniform(0.5, 1.)
    def build(self, input):
        self.D = theano.shared((self.u.generate(self.rng, (self.num_out,))>= self.dropOut) + 0,name=self.name +" Dropout") 
        print self.D.get_value()
        return input * self.D
    def getOutputDim(self):
        shape = self.num_out 
        print "maxPool", shape
        return shape
    def update(self, foo, alpha):
        return  [(self.D,
            (self.u.generate(self.rng, (self.num_out,)) >= self.dropOut)+0)]
    
class FeedForwardNet(object):
    def __init__(self, layers=None, lamb = 0.1):
        if layers is None:
            layers = []
        self.layers = layers
        self.lamb = lamb
    def add(self, layer):
        self.layers.append(layer)
    
    @property
    def parameters(self):
        params = []
        for layer in self.layers:
            params += layer.parameters
        return params
    
    @parameters.setter
    def parameters(self, values):
        for ownP, newP in zip(self.parameters, values):
            ownP[...] = newP
    
    @property
    def parameter_names(self):
        param_names = []
        for layer in self.layers:
            param_names += layer.parameter_names
        return param_names
    
    def build(self, inputDim):
        x = T.tensor4("x")
        y = T.vector("y", dtype='int64')
        cost = 0
        moments = []
        params = []
        updates = []
        X = x

    
        
        alpha = theano.tensor.scalar('alpha',dtype='float32')
        lamb = theano.tensor.scalar('alpha',dtype='float32')
        gamma = theano.tensor.scalar('alpha',dtype='float32')
        
        
        for layer, i in zip(self.layers, range(len(self.layers))):
            layer.setInputDim(inputDim)
            moments += layer.moments
            params += layer.parameters
            inputDim = layer.getOutputDim()
            X = layer.build(X)
            cost += layer.cost(gamma)
            
        pred = np.argmax(X, 1)
        error_rate = theano.tensor.neq(pred, y.ravel()).mean()
        nll = -theano.tensor.log(X[theano.tensor.arange(y.shape[0]), y.ravel()]).mean() 

        self.costFoo = nll + cost
            
        grads = theano.grad(self.costFoo, params)
        # for some reasons i have to get moments this way. 
        moments = [theano.shared(np.zeros_like(p.get_value()), name='V_%s' %(p.name, )) for p in params]

        for p,g,v in zip(params, grads, moments):
            print g
            print v
            v_new = lamb*v - alpha*g
            print v_new
            p_new = p + v_new
            updates += [(v,v_new), (p,p_new)]

        self.train = theano.function(inputs=[x,y, alpha, lamb, gamma], 
                                    outputs=[pred, self.costFoo, alpha],
                                    updates=updates)
        self.predict  = theano.function(inputs=[x], 
                                    outputs=pred)
        
    
    @property
    def trainFunction(self):
        return self.train
    
    @property
    def predictFunction(self):
        return self.predict
    @property
    def costFunction(self):
        return self.costFoo


In [135]:
def compute_er(net, stream):
    num_errs = 0.0
    num_examples = 0
    for X, Y in stream.get_epoch_iterator():
        predictions = net.predictFunction(X)
        #print predictions != Y.ravel()
        num_errs += (predictions != Y.ravel()).sum()
        #print Y.shape[0], num_errs
        num_examples += Y.shape[0]
    return num_errs/num_examples


In [140]:
# conv improved smaler filter - one affine + bigger othera

img_size = (32, 32)
c1_i = 3
c1_o = 50
c1_f = 3
p1   = 3
p2   = 2
c2_o = 50
c2_f = 3
hidden1 = 1000
hidden2 = 500
hidden3 = 900
hidden4 = 2000
hidden5 = 1500
outs = 10
gamma = 0.0005
lamb = 0.9
initC = 10.
initW = 10.
num_epochs  = 100

net = FeedForwardNet([
                      Conv(c1_o, c1_f),
                      MaxPoolLayer(p1),
                      Conv(c2_o, c2_f),
                      MaxPoolLayer(p2),
                      Flatten("Flatten"),
                      AffineLayer(hidden1, initW, gamma, "tA"),
                      DropOutLayer(0.3),
                      ReLULayer("ReLu"),
                      AffineLayer(outs, initW, gamma, "tA"), 
                      SoftMaxLayer("fSoftMax")],lamb)
net.build((c1_i, ) + img_size)
print "Start"
print "gamma: ", gamma
i = 0
e = 0

#Noise
u = Uniform(width=0.05)
while e < num_epochs:
    t0 = time.time()
    for X, Y in cifar10_train_stream.get_epoch_iterator():
        alpha = 1e-2 * 10000 / np.maximum(10000, i)
        pr ,c,a = net.trainFunction(X, Y.ravel(),alpha,lamb, gamma) #
        i+=1
        if i % 100 == 0:
            print a, c, (pr  != Y.ravel()).mean()
    t1 = time.time()
    print "After epoch: ", e, compute_er(net, cifar10_validation_stream), "time: ", t1-t0
    e+=1

 Conv filter (50, 3, 3, 3)
maxPool (50, 10, 10)
Conv filter (50, 50, 3, 3)
maxPool (50, 4, 4)
AffineLayer:  (800, 1000)
AffineLayerVel :  ((800, 1000), Shape.0)
maxPool 1000
[0 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0
 1 1 1 0 0 0 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 0 1 0 1 1 0 0 1 1 1 0
 0 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 0 1 1
 0 1 1 1 0 1 0 1 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 0 1 0 1 1 0 1 1 1 0 1 1 0
 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1
 0 1 1 1 1 1 0 0 1 0 1 1 0 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0
 1 1 1 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 0 0 0 1 0 0 0 1 0 1 1 1 1 0 1 1 0 1 1
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 0 1 1 1 1 1
 1 1 1 0 0 1 1 1 1 0 0 1 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 1 1
 1 0 1 1 1 1 1 1 1 1 0 1 1 0 1 0 0 1 1 1 1 0 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1
 1 1 0 1 1 0 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1
 

KeyboardInterrupt: 

In [141]:
compute_er(net, cifar10_test_stream)

0.2264