# lasagne
* lasagne - это библиотека для написания нейронок произвольной формы на theano
* библиотека низкоуровневая, границы между theano и lasagne практически нет

В качестве демо-задачи выберем то же распознавание чисел, но на большем масштабе задачи
* картинки 28x28
* 10 цифр

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [1]:
from mnist import load_dataset
X_train,y_train,X_val,y_val,X_test,y_test = load_dataset()

print X_train.shape,y_train.shape

(50000, 1, 28, 28) (50000,)


In [None]:
plt.imshow(X_train[0,0])

In [2]:
import lasagne
from theano import tensor as T
from lasagne.nonlinearities import *

input_X = T.tensor4("X")
target_y = T.vector("target Y integer",dtype='int32')

Так задаётся архитектура нейронки

In [None]:
#входной слой (вспомогательный)
input_layer = lasagne.layers.InputLayer(shape=(None,1,28,28), input_var=input_X)

#полносвязный слой, который принимает на вход input layer и имеет 100 нейронов.
# нелинейная функция - сигмоида как в логистической регрессии
# слоям тоже можно давать имена, но это необязательно
dense_1 = lasagne.layers.DenseLayer(input_layer,num_units=256, nonlinearity=sigmoid, name="hidden_dense_layer0")
drop_1=lasagne.layers.DropoutLayer(dense_1, p=0.5)
dense_1 = lasagne.layers.DenseLayer(drop_1,num_units=1000, nonlinearity=sigmoid, name="hidden_dense_layer1")
#lasagne.layers.DropoutLayer(предыдущий_слой, p=вероятность_занулить)
#ВЫХОДНОЙ полносвязный слой, который принимает на вход dense_1 и имеет 10 нейронов -по нейрону на цифру
#нелинейность - softmax - чтобы вероятности всех цифр давали в сумме 1
dense_output = lasagne.layers.DenseLayer(dense_1,num_units = 10, nonlinearity=softmax)

In [None]:
#предсказание нейронки (theano-преобразование)
y_predicted = lasagne.layers.get_output(dense_output)

In [None]:
#все веса нейронки (shared-переменные)
all_weights = lasagne.layers.get_all_params(dense_output)
print all_weights

### дальше вы могли бы просто
* задать функцию ошибки вручную
* посчитать градиент ошибки по all_weights
* написать updates
* но это долго, а простой шаг по градиенту - не самый лучший смособ оптимизировать веса

Вместо этого, опять используем lasagne

In [None]:
#функция ошибки - средняя кроссэнтропия
loss = lasagne.objectives.categorical_crossentropy(y_predicted,target_y).mean()

accuracy = lasagne.objectives.categorical_accuracy(y_predicted,target_y).mean()

#сразу посчитать словарь обновлённых значений с шагом по градиенту, как раньше
updates_sgd = lasagne.updates.rmsprop(loss, all_weights,learning_rate=0.01)

In [None]:
#функция, которая обучает сеть на 1 шаг и возвращащет значение функции потерь и точности
train_fun = theano.function([input_X,target_y],[loss,accuracy],updates= updates_sgd)

#функция, которая считает точность
accuracy_fun = theano.function([input_X,target_y],accuracy)

### Вот и всё, пошли её учить
* данных теперь много, поэтому лучше учиться стохастическим градиентным спуском
* для этого напишем функцию, которая бьёт выпорку на мини-батчи (в обычном питоне, не в theano)

In [3]:
# вспомогательная функция, которая возвращает список мини-батчей для обучения нейронки

#на вход
# inputs - тензор из картинок размером (много, 1, 28, 28), например - X_train
# targets - вектор из чиселок - ответов для каждой картинки из X; например - Y_train
# batchsize - одно число - желаемый размер группы

#что нужно сделать
# 1) перемешать данные
# - важно перемешать targets и targets одним и тем же образом, чтобы сохранить соответствие картинки ответу на неё
# 3) побить данные на подгруппы так, чтобы в каждой подгруппе было batchsize картинок и ответов
# - если число картинок не делится на batchsize, одну подгруппу можно вернуть другого размера
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

# Процесс обучения

In [None]:
import time

num_epochs = 10 #количество проходов по данным

batch_size = 50 #размер мини-батча

for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_acc = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train, y_train,batch_size):
        inputs, targets = batch
        train_err_batch, train_acc_batch= train_fun(inputs, targets)
        train_err += train_err_batch
        train_acc += train_acc_batch
        train_batches += 1

    # And a full pass over the validation data:
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, batch_size):
        inputs, targets = batch
        val_acc += accuracy_fun(inputs, targets)
        val_batches += 1

    
    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))

    print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
    print("  train accuracy:\t\t{:.2f} %".format(
        train_acc / train_batches * 100))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))

In [None]:
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500):
    inputs, targets = batch
    acc = accuracy_fun(inputs, targets)
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))

if test_acc / test_batches * 100 > 99:
    print "Achievement unlocked: колдун 80 уровня"
else:
    print "Нужно больше магии!"

# Нейронка твоей мечты


* Задача - сделать нейронку, которая получит точность 99% на валидации (validation accuracy)
 * Вариант "is fine too" - 97.5%. 
 * Чем выше, тем лучше.
 
__ В конце есть мини-отчётик, который имеет смысл прочитать вначале и заполнять по ходу работы. __
 

## Что можно улучшить:



 * размер сети
   * бОльше нейронов, 
   * бОльше слоёв, 
   
 * регуляризация - чтобы не переобучалось
   * приплюсовать к функции ошибки какую-нибудь сумму квадратов весов
   * можно сделать вручную, а можно - http://lasagne.readthedocs.org/en/latest/modules/regularization.html
   * Dropout - для борьбы с переобучением
       * `lasagne.layers.DropoutLayer(предыдущий_слой, p=вероятность_занулить)`
   
 * Метод оптимизации - adam, rmsprop, nesterov_momentum, adadelta, adagrad и т.п.
   * сходятся быстрее и иногда - к лучшему оптимуму
   * имеет смысл поиграть с размером батча, количеством эпох и скоростью обучения
   
 * Наконец, можно поиграть с нелинейностями в скрытых слоях
   * tanh, relu, leaky relu, etc

In [4]:
from mnist import load_dataset
X_train,y_train,X_val,y_val,X_test,y_test = load_dataset()

print X_train.shape,y_train.shape

(50000, 1, 28, 28) (50000,)


In [5]:
import lasagne

input_X = T.tensor4("X")

#размерность входа (None означает "может изменяться")
input_shape = (None,1,28,28)

target_y = T.vector("target Y integer",dtype='int32')

In [6]:
#входной слой (вспомогательный)
input_layer = lasagne.layers.InputLayer(shape = input_shape,input_var=input_X)
drop_1=lasagne.layers.DropoutLayer(input_layer, p=0.2)
layer_1 = lasagne.layers.DenseLayer(drop_1,num_units=1500, nonlinearity=sigmoid, name="hidden_dense_layer0")
drop_2=lasagne.layers.DropoutLayer(layer_1, p=0.5)
layer_2 = lasagne.layers.DenseLayer(drop_2,num_units=1000, nonlinearity=sigmoid, name="hidden_dense_layer1")
drop_3=lasagne.layers.DropoutLayer(layer_2, p=0.5)
layer_2 = lasagne.layers.DenseLayer(drop_3,num_units=500, nonlinearity=sigmoid, name="hidden_dense_layer2")
#lasagne.layers.DropoutLayer(предыдущий_слой, p=вероятность_занулить)
#ВЫХОДНОЙ полносвязный слой, который принимает на вход dense_1 и имеет 10 нейронов -по нейрону на цифру
#нелинейность - softmax - чтобы вероятности всех цифр давали в сумме 1
dense_output = lasagne.layers.DenseLayer(layer_2,num_units = 10, nonlinearity=softmax)


In [7]:
#предсказание нейронки (theano-преобразование)
y_predicted = lasagne.layers.get_output(dense_output)

In [8]:
#все веса нейронки (shared-переменные)
all_weights = lasagne.layers.get_all_params(dense_output)
print all_weights

[hidden_dense_layer0.W, hidden_dense_layer0.b, hidden_dense_layer1.W, hidden_dense_layer1.b, hidden_dense_layer2.W, hidden_dense_layer2.b, W, b]


In [9]:
from lasagne.regularization import regularize_layer_params_weighted, l2, l1
from lasagne.regularization import regularize_layer_params
#функция ошибки - средняя кроссэнтропия
#loss = lasagne.objectives.categorical_crossentropy(y_predicted,target_y).mean()
loss = T.mean(T.nnet.categorical_crossentropy(y_predicted, target_y))
print loss
#lnorm=LA.norm(all_weights,ord=2)
#<возможно добавить регуляризатор>
#layers = {layer_1: 0.001, layer_2: 0.001, dense_output: 0.5}
#l2_penalty = regularize_layer_params_weighted(layers, l2)
loss = loss #+ l2_penalty
accuracy = lasagne.objectives.categorical_accuracy(y_predicted,target_y).mean()

#сразу посчитать словарь обновлённых значений с шагом по градиенту, как раньше
#updates_sgd = lasagne.updates.rmsprop(loss, all_weights,learning_rate=0.01)
#updates_sgd = lasagne.updates.nesterov_momentum(loss, all_weights, learning_rate=0.01, momentum=0.9)
updates_sgd = lasagne.updates.adam(loss, all_weights)

mean


In [10]:
#функция, которая обучает сеть на 1 шаг и возвращащет значение функции потерь и точности
train_fun = theano.function([input_X,target_y],[loss,accuracy],updates= updates_sgd)

#функция, которая считает точность
accuracy_fun = theano.function([input_X,target_y],accuracy)

In [11]:
#итерации обучения
import time
from tqdm import tqdm
num_epochs = 300 #количество проходов по данным

batch_size = 250 #размер мини-батча

for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_acc = 0
    train_batches = 0
    start_time = time.time()
    for batch in tqdm(iterate_minibatches(X_train, y_train,batch_size)):
        inputs, targets = batch
        train_err_batch, train_acc_batch= train_fun(inputs, targets)
        train_err += train_err_batch
        train_acc += train_acc_batch
        train_batches += 1

    # And a full pass over the validation data:
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, batch_size):
        inputs, targets = batch
        val_acc += accuracy_fun(inputs, targets)
        val_batches += 1

    
    # Then we print the results for this epoch:
    #print("Epoch {} of {} took {:.3f}s".format(
     #   epoch + 1, num_epochs, time.time() - start_time))

    #print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
    if epoch%10==0:
        print("  train accuracy:\t\t{:.2f} %".format(train_acc / train_batches * 100))
        print("  validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100))

200it [00:46,  4.38it/s]
0it [00:00, ?it/s]

  train accuracy:		71.09 %
  validation accuracy:		88.30 %


200it [00:46,  4.37it/s]
200it [00:46,  4.33it/s]
200it [00:46,  4.39it/s]
200it [00:47,  3.52it/s]
200it [00:48,  4.26it/s]
200it [00:48,  4.21it/s]
200it [00:45,  4.50it/s]
200it [00:46,  4.16it/s]
200it [00:50,  3.05it/s]
200it [00:46,  4.42it/s]
0it [00:00, ?it/s]

  train accuracy:		96.36 %
  validation accuracy:		96.39 %


200it [00:45,  4.30it/s]
200it [00:45,  4.35it/s]
200it [00:46,  4.32it/s]
200it [00:46,  4.33it/s]
200it [00:45,  4.44it/s]
200it [00:45,  4.00it/s]
200it [00:45,  4.55it/s]
200it [00:45,  4.41it/s]
200it [00:48,  2.75it/s]
200it [00:44,  4.49it/s]
0it [00:00, ?it/s]

  train accuracy:		97.81 %
  validation accuracy:		96.91 %


200it [00:44,  4.47it/s]
200it [00:45,  4.38it/s]
200it [00:45,  4.28it/s]
200it [00:45,  4.35it/s]
200it [00:45,  4.50it/s]
200it [00:45,  4.46it/s]
200it [00:45,  4.30it/s]
200it [00:45,  4.43it/s]
200it [00:48,  4.43it/s]
200it [00:44,  4.45it/s]
0it [00:00, ?it/s]

  train accuracy:		98.36 %
  validation accuracy:		97.18 %


200it [00:44,  4.65it/s]
200it [00:44,  4.63it/s]
200it [00:44,  4.52it/s]
200it [00:43,  4.49it/s]
200it [00:43,  4.64it/s]
200it [00:44,  4.47it/s]
200it [00:43,  4.62it/s]
200it [00:44,  4.64it/s]
200it [00:44,  4.57it/s]
200it [00:44,  4.61it/s]
0it [00:00, ?it/s]

  train accuracy:		98.90 %
  validation accuracy:		97.33 %


200it [00:44,  4.60it/s]
200it [00:44,  4.66it/s]
200it [00:43,  4.70it/s]
200it [00:44,  4.62it/s]
200it [00:44,  4.50it/s]
200it [00:44,  4.43it/s]
200it [00:44,  4.47it/s]
200it [00:44,  4.49it/s]
200it [00:44,  4.62it/s]
200it [00:43,  4.31it/s]
0it [00:00, ?it/s]

  train accuracy:		99.11 %
  validation accuracy:		97.60 %


200it [00:44,  4.62it/s]
200it [00:43,  4.62it/s]
200it [00:44,  4.54it/s]
200it [00:44,  4.31it/s]
200it [00:44,  4.57it/s]
200it [00:44,  4.63it/s]
200it [00:43,  4.65it/s]
200it [00:44,  4.66it/s]
200it [00:43,  4.60it/s]
200it [00:44,  4.42it/s]
0it [00:00, ?it/s]

  train accuracy:		99.19 %
  validation accuracy:		97.76 %


200it [00:44,  4.63it/s]
200it [00:43,  4.65it/s]
200it [00:43,  4.41it/s]
200it [00:43,  4.59it/s]
200it [00:44,  4.39it/s]
200it [00:43,  4.63it/s]
200it [00:43,  4.70it/s]
200it [00:44,  4.56it/s]
200it [00:43,  4.66it/s]
200it [00:43,  4.62it/s]
0it [00:00, ?it/s]

  train accuracy:		99.36 %
  validation accuracy:		97.79 %


200it [00:44,  4.52it/s]
200it [00:43,  4.45it/s]
200it [00:43,  4.48it/s]
200it [00:44,  4.71it/s]
200it [00:44,  4.62it/s]
200it [00:44,  4.62it/s]
200it [00:43,  4.59it/s]
200it [00:44,  4.58it/s]
200it [00:43,  4.46it/s]
200it [00:44,  4.59it/s]
0it [00:00, ?it/s]

  train accuracy:		99.47 %
  validation accuracy:		97.87 %


200it [00:43,  4.67it/s]
200it [00:43,  4.64it/s]
200it [00:43,  4.59it/s]
200it [00:44,  4.45it/s]
200it [00:44,  4.46it/s]
200it [00:44,  4.46it/s]
200it [00:43,  4.62it/s]
200it [00:44,  4.47it/s]
200it [00:44,  4.61it/s]
200it [00:43,  4.64it/s]
0it [00:00, ?it/s]

  train accuracy:		99.50 %
  validation accuracy:		97.59 %


200it [00:44,  4.44it/s]
200it [00:44,  4.62it/s]
200it [00:43,  4.62it/s]
200it [00:44,  4.62it/s]
200it [00:44,  4.56it/s]
200it [00:44,  4.48it/s]
200it [00:43,  4.63it/s]
200it [00:44,  4.63it/s]
200it [00:44,  4.67it/s]
200it [00:43,  4.57it/s]
0it [00:00, ?it/s]

  train accuracy:		99.59 %
  validation accuracy:		97.59 %


200it [00:43,  4.61it/s]
200it [00:44,  4.33it/s]
200it [00:44,  4.63it/s]
200it [00:44,  4.43it/s]
200it [00:44,  4.58it/s]
200it [00:44,  4.58it/s]
200it [00:43,  4.36it/s]
200it [00:43,  4.60it/s]
200it [00:44,  4.62it/s]
200it [00:43,  4.40it/s]
0it [00:00, ?it/s]

  train accuracy:		99.56 %
  validation accuracy:		97.96 %


200it [00:43,  4.52it/s]
200it [00:44,  4.62it/s]
200it [00:43,  4.29it/s]
200it [00:43,  4.61it/s]
200it [00:43,  4.62it/s]
200it [00:43,  4.55it/s]
200it [00:43,  4.49it/s]
200it [00:43,  4.62it/s]
200it [00:44,  4.66it/s]
200it [00:43,  4.61it/s]
0it [00:00, ?it/s]

  train accuracy:		99.62 %
  validation accuracy:		98.05 %


200it [00:43,  4.68it/s]
200it [00:44,  4.46it/s]
200it [00:44,  4.58it/s]
200it [00:43,  4.64it/s]
200it [00:43,  4.57it/s]
200it [00:44,  4.62it/s]
200it [00:44,  4.34it/s]
200it [00:43,  4.63it/s]
200it [00:44,  4.66it/s]
200it [00:43,  4.61it/s]
0it [00:00, ?it/s]

  train accuracy:		99.64 %
  validation accuracy:		97.83 %


200it [00:44,  4.34it/s]
200it [00:44,  4.40it/s]
200it [00:43,  4.42it/s]
200it [00:44,  4.47it/s]
200it [00:43,  4.59it/s]
200it [00:44,  4.40it/s]
200it [00:44,  4.61it/s]
200it [00:43,  4.53it/s]
200it [00:43,  4.57it/s]
200it [00:43,  4.68it/s]
0it [00:00, ?it/s]

  train accuracy:		99.68 %
  validation accuracy:		97.95 %


200it [00:44,  4.64it/s]
200it [00:43,  4.61it/s]
200it [00:44,  4.54it/s]
200it [00:44,  4.62it/s]
200it [00:43,  4.12it/s]
200it [00:43,  4.68it/s]
200it [00:44,  4.62it/s]
200it [00:43,  4.60it/s]
200it [00:43,  4.61it/s]
200it [00:44,  4.59it/s]
0it [00:00, ?it/s]

  train accuracy:		99.64 %
  validation accuracy:		98.12 %


200it [00:43,  4.65it/s]
200it [00:43,  4.64it/s]
200it [00:44,  4.61it/s]
200it [00:44,  4.24it/s]
200it [00:43,  4.51it/s]
200it [00:43,  4.42it/s]
200it [00:44,  4.52it/s]
200it [00:44,  4.54it/s]
200it [00:44,  4.42it/s]
200it [00:44,  4.58it/s]
0it [00:00, ?it/s]

  train accuracy:		99.70 %
  validation accuracy:		97.90 %


200it [00:44,  4.57it/s]
200it [00:44,  4.48it/s]
200it [00:44,  4.49it/s]
200it [00:44,  4.64it/s]
200it [00:44,  4.67it/s]
200it [00:43,  4.61it/s]
200it [00:44,  4.60it/s]
200it [00:43,  4.49it/s]
200it [00:44,  4.57it/s]
200it [00:44,  4.69it/s]
0it [00:00, ?it/s]

  train accuracy:		99.71 %
  validation accuracy:		98.07 %


200it [00:43,  4.61it/s]
200it [00:44,  4.63it/s]
200it [00:43,  4.63it/s]
200it [00:44,  4.51it/s]
200it [00:44,  4.53it/s]
200it [00:44,  4.42it/s]
200it [00:44,  4.35it/s]
200it [00:44,  4.58it/s]
200it [00:44,  4.47it/s]
200it [00:43,  4.72it/s]
0it [00:00, ?it/s]

  train accuracy:		99.68 %
  validation accuracy:		97.90 %


200it [00:44,  4.59it/s]
200it [00:44,  4.41it/s]
200it [00:44,  4.47it/s]
200it [00:44,  4.43it/s]
200it [00:44,  4.58it/s]
200it [00:44,  4.61it/s]
200it [00:43,  4.62it/s]
200it [00:43,  4.47it/s]
200it [00:43,  4.51it/s]
200it [00:44,  4.61it/s]
0it [00:00, ?it/s]

  train accuracy:		99.72 %
  validation accuracy:		97.97 %


200it [00:44,  4.60it/s]
200it [00:44,  4.61it/s]
200it [00:43,  4.62it/s]
200it [00:44,  4.66it/s]
200it [00:44,  4.46it/s]
200it [00:44,  4.59it/s]
200it [00:44,  4.55it/s]
200it [00:43,  4.60it/s]
200it [00:44,  4.55it/s]
200it [00:43,  4.60it/s]
0it [00:00, ?it/s]

  train accuracy:		99.75 %
  validation accuracy:		98.00 %


200it [00:44,  4.61it/s]
200it [00:44,  4.34it/s]
200it [00:43,  4.64it/s]
200it [00:44,  4.62it/s]
200it [00:43,  4.43it/s]
200it [00:44,  4.40it/s]
200it [00:44,  4.52it/s]
200it [00:43,  4.63it/s]
200it [00:44,  4.53it/s]
200it [00:44,  4.54it/s]
0it [00:00, ?it/s]

  train accuracy:		99.74 %
  validation accuracy:		97.97 %


200it [00:43,  4.50it/s]
200it [00:43,  4.36it/s]
200it [00:44,  4.43it/s]
200it [00:44,  4.58it/s]
200it [00:43,  4.62it/s]
200it [00:44,  4.55it/s]
200it [00:44,  4.59it/s]
200it [00:43,  4.45it/s]
200it [00:44,  4.65it/s]
200it [00:44,  4.50it/s]
0it [00:00, ?it/s]

  train accuracy:		99.76 %
  validation accuracy:		97.92 %


200it [00:44,  4.35it/s]
200it [00:43,  4.56it/s]
200it [00:44,  4.55it/s]
200it [00:44,  4.65it/s]
200it [00:44,  4.54it/s]
200it [00:43,  4.58it/s]
200it [00:44,  4.59it/s]
200it [00:44,  4.71it/s]
200it [00:44,  4.40it/s]
200it [00:44,  4.62it/s]
0it [00:00, ?it/s]

  train accuracy:		99.75 %
  validation accuracy:		97.74 %


200it [00:43,  4.51it/s]
200it [00:44,  4.59it/s]
200it [00:43,  4.57it/s]
200it [00:44,  4.58it/s]
200it [00:44,  4.61it/s]
200it [00:43,  4.70it/s]
200it [00:44,  4.54it/s]
200it [00:44,  4.50it/s]
200it [00:44,  4.45it/s]
200it [00:44,  4.61it/s]
0it [00:00, ?it/s]

  train accuracy:		99.78 %
  validation accuracy:		98.01 %


200it [00:43,  4.57it/s]
200it [00:43,  4.50it/s]
200it [00:44,  4.68it/s]
200it [00:44,  4.32it/s]
200it [00:44,  4.26it/s]
200it [00:44,  4.46it/s]
200it [00:43,  4.62it/s]
200it [00:44,  4.61it/s]
200it [00:44,  4.37it/s]
200it [00:44,  4.56it/s]
0it [00:00, ?it/s]

  train accuracy:		99.79 %
  validation accuracy:		97.90 %


200it [00:44,  4.45it/s]
200it [00:44,  4.53it/s]
200it [00:44,  4.44it/s]
200it [00:43,  4.67it/s]
200it [00:44,  4.56it/s]
200it [00:44,  4.54it/s]
200it [00:44,  4.58it/s]
200it [00:44,  4.40it/s]
200it [00:44,  4.53it/s]
200it [00:44,  4.46it/s]
0it [00:00, ?it/s]

  train accuracy:		99.79 %
  validation accuracy:		98.04 %


200it [00:43,  4.56it/s]
200it [00:44,  4.49it/s]
200it [00:44,  4.48it/s]
200it [00:44,  4.55it/s]
200it [00:44,  4.59it/s]
200it [00:44,  4.50it/s]
200it [00:44,  4.63it/s]
200it [00:44,  4.53it/s]
200it [00:44,  4.53it/s]
200it [00:44,  4.53it/s]
0it [00:00, ?it/s]

  train accuracy:		99.78 %
  validation accuracy:		98.14 %


200it [00:43,  4.55it/s]
200it [00:44,  4.58it/s]
200it [00:44,  4.59it/s]
200it [00:43,  4.49it/s]
200it [00:44,  4.58it/s]
200it [00:44,  4.39it/s]
200it [00:44,  4.41it/s]
200it [00:44,  4.60it/s]
200it [00:44,  4.63it/s]
200it [00:44,  4.50it/s]
0it [00:00, ?it/s]

  train accuracy:		99.81 %
  validation accuracy:		98.11 %


200it [00:44,  4.26it/s]
200it [00:43,  4.56it/s]
200it [00:44,  4.44it/s]
200it [00:43,  4.49it/s]
200it [00:44,  4.53it/s]
200it [00:44,  4.52it/s]
200it [00:44,  4.58it/s]
200it [00:44,  4.51it/s]
200it [00:44,  4.51it/s]
200it [00:44,  4.33it/s]
0it [00:00, ?it/s]

  train accuracy:		99.82 %
  validation accuracy:		97.88 %


200it [00:44,  4.53it/s]
200it [00:44,  4.39it/s]
200it [00:44,  4.50it/s]
200it [00:44,  4.65it/s]
200it [00:44,  4.27it/s]
200it [00:44,  4.38it/s]
200it [00:44,  4.53it/s]
200it [00:44,  4.63it/s]
200it [00:44,  4.44it/s]


In [12]:
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500):
    inputs, targets = batch
    acc = accuracy_fun(inputs, targets)
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))

Final results:
  test accuracy:		98.20 %


In [None]:
import lasagne
import theano.tensor as T
import theano
from lasagne.nonlinearities import softmax
from lasagne.layers import InputLayer, DenseLayer, get_output
from lasagne.regularization import regularize_layer_params_weighted, l2, l1
from lasagne.regularization import regularize_layer_params
layer_in = InputLayer((100, 20))
layer1 = DenseLayer(layer_in, num_units=3)
layer2 = DenseLayer(layer1, num_units=5, nonlinearity=softmax)
x = T.matrix('x')  # shp: num_batch x num_features
y = T.ivector('y') # shp: num_batch
l_out = get_output(layer2, x)
loss = T.mean(T.nnet.categorical_crossentropy(l_out, y))
layers = {layer1: 0.1, layer2: 0.5}
l2_penalty = regularize_layer_params_weighted(layers, l2)
l1_penalty = regularize_layer_params(layer2, l1) * 1e-4
loss = loss + l2_penalty + l1_penalty