In [1]:
import numpy as np
import matplotlib.pyplot as plt
import lasagne.layers.dnn
import random

%matplotlib inline

 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5110)


In [2]:
from cifar import load_CIFAR10
plt.rcParams['figure.figsize'] = (10.0, 8.0) 

cifar10_dir = './cifar10/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

In [3]:
X_train.shape

(50000, 3, 32, 32)

In [4]:
def cifar_to_augmentation(img):
    return np.transpose(img, (1, 2, 0))

def augmentation_to_cifar(img):
    return np.transpose(img, (2, 0, 1))

In [5]:
from imgaug import augmenters as iaa

seq = iaa.Sequential([
#     iaa.Crop(px=(0, 1)), # crop images from each side by 0 to 16px (randomly chosen)
    iaa.Sometimes(0.1, iaa.Crop(px=(0, 1))),
    iaa.Fliplr(0.5),
    iaa.Sometimes(0.5, iaa.AdditiveGaussianNoise(scale=0.1*255)),
    iaa.Sometimes(0.5, iaa.Affine(rotate=5))
])

In [6]:
import lasagne
from theano import tensor as T
from lasagne.nonlinearities import *
import lasagne.layers.dnn

input_X = T.tensor4("X")
target_y = T.vector("target Y integer",dtype='int32')

In [7]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False, augment=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        if augment:
            to_aug_x = map(cifar_to_augmentation, inputs[excerpt])
            to_aug_x = np.array(list(to_aug_x))
            to_aug_x = map(augmentation_to_cifar, seq.augment_images(to_aug_x))
            inp = np.array(list(to_aug_x))
        else:
            inp = inputs[excerpt]
        yield inp, targets[excerpt]

In [8]:
val_ind = random.sample(range(len(X_train)), len(X_train) // 5)
X_val = X_train[val_ind]
y_val = y_train[val_ind]
X_train = np.delete(X_train, val_ind, axis=0)
y_train = np.delete(y_train, val_ind, axis=0)

In [9]:
def send(text):
    import requests
    telegram_token = '224136417:AAGJ2kKaoPiiksBVJ-8AslDTavFDL6btqqE'
    requests.get('https://api.telegram.org/bot{}/sendMessage?chat_id=277146928&text={}'.format(telegram_token, text))

In [10]:
from copy import deepcopy
import time

In [11]:
def iter_epoch(X_train, y_train, batch_size, train_fun, augment):
    f = open('nn-output.txt', 'a')
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_acc = 0
    train_batches = 0
    start_time = time.time()
    cnt = 0
    for batch in iterate_minibatches(X_train, y_train, batch_size, augment):
        cnt += 1
        if cnt % (len(X_train) // batch_size // 2) == 0:
            print(epoch, cnt * batch_size / len(X_train))
        inputs, targets = batch
        train_err_batch, train_acc_batch= train_fun(inputs, targets)
        train_err += train_err_batch
        train_acc += train_acc_batch
        train_batches += 1

    # And a full pass over the validation data:
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, batch_size):
        inputs, targets = batch
        val_acc += accuracy_fun(inputs, targets)
        val_batches += 1

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time), file=f)
    send("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches), file=f)
    print("  train accuracy:\t\t{:.2f} %".format(train_acc / train_batches * 100), file=f)
    send("  train accuracy:\t\t{:.2f} %".format(train_acc / train_batches * 100))
    print("  validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100), file=f)
    send("  validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100))
    f.close()
    
def test():
    test_acc = 0
    test_batches = 0
    for batch in iterate_minibatches(X_test, y_test, 500):
        inputs, targets = batch
        acc = accuracy_fun(inputs, targets)
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test accuracy:\t\t{:.2f} %".format(
        test_acc / test_batches * 100))
    return  test_acc / test_batches * 100
    
def reset_net():
    def add_normalized_conv(net, num_filters):
        net = lasagne.layers.dnn.Conv2DDNNLayer(net, num_filters=num_filters, filter_size=(3, 3), pad='same', nonlinearity=lasagne.nonlinearities.identity)
        net = lasagne.layers.dnn.BatchNormDNNLayer(net, epsilon=1e-3)
        net = lasagne.layers.NonlinearityLayer(net)
        return net

    net = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input_X)

    net = add_normalized_conv(net, 64)
    net = lasagne.layers.dropout_channels(net, p=0.3)
    net = add_normalized_conv(net, 64)
    net = lasagne.layers.dnn.MaxPool2DDNNLayer(net, pool_size=(2, 2))

    net = add_normalized_conv(net, 128)
    net = lasagne.layers.dropout_channels(net, p=0.4)
    net = add_normalized_conv(net, 128)

    net = add_normalized_conv(net, 256)
    net = lasagne.layers.dropout_channels(net, p=0.4)
    net = add_normalized_conv(net, 256)
    net = lasagne.layers.dnn.MaxPool2DDNNLayer(net, pool_size=(2, 2))

    net = add_normalized_conv(net, 512)
    net = lasagne.layers.dropout_channels(net, p=0.4)
    net = add_normalized_conv(net, 512)
    net = lasagne.layers.dnn.MaxPool2DDNNLayer(net, pool_size=(2, 2))

    net = lasagne.layers.dropout_channels(net, p=0.5)
    net = lasagne.layers.DenseLayer(net, 512)
    net = lasagne.layers.dnn.BatchNormDNNLayer(net)
    net = lasagne.layers.NonlinearityLayer(net)

    net = lasagne.layers.DenseLayer(net, num_units = 10, nonlinearity=softmax)
    
    y_predicted = lasagne.layers.get_output(net)
    all_weights = lasagne.layers.get_all_params(net, trainable=True)
    return all_weights, y_predicted, net

In [None]:
weights, y_predicted, net = reset_net()
loss = lasagne.objectives.categorical_crossentropy(predictions=y_predicted, targets=target_y).mean()
accuracy = lasagne.objectives.categorical_accuracy(y_predicted, target_y).mean()

learning_rate = theano.shared(lasagne.utils.floatX(1e-4))
updates = lasagne.updates.adam(loss, weights, learning_rate=learning_rate) 
train_fun = theano.function([input_X, target_y],[loss, accuracy], updates=updates, allow_input_downcast=True)
accuracy_fun = theano.function([input_X, target_y],
                               accuracy, 
                               allow_input_downcast=True)
augment_each_iter_net = {}

num_epochs = 200 #количество проходов по данным
batch_size = 20 #размер мини-батча
send('NEW NETWORK')
f = open('nn-output.txt', 'a')
print('NEW NETWORK', file=f)
for epoch in range(num_epochs):
    iter_epoch(X_train, y_train, batch_size, train_fun, augment=True)
    if epoch % 4 == 0:
        send("  test accuracy:\t\t{:.2f} %".format(test()))

0 0.5
0 1.0
Final results:
  test accuracy:		38.98 %
1 0.5
1 1.0
2 0.5
2 1.0
3 0.5
3 1.0
4 0.5
4 1.0
Final results:
  test accuracy:		65.74 %
5 0.5
5 1.0
6 0.5
6 1.0
7 0.5
7 1.0
8 0.5
8 1.0
Final results:
  test accuracy:		72.57 %
9 0.5
9 1.0
10 0.5
10 1.0
11 0.5
11 1.0
12 0.5
12 1.0
Final results:
  test accuracy:		76.50 %
13 0.5
13 1.0
14 0.5
14 1.0
15 0.5
15 1.0
16 0.5
16 1.0
Final results:
  test accuracy:		78.54 %
17 0.5
17 1.0
18 0.5
18 1.0
19 0.5
19 1.0
20 0.5
20 1.0
Final results:
  test accuracy:		79.34 %
21 0.5
21 1.0
22 0.5
22 1.0
23 0.5
23 1.0
24 0.5
24 1.0
Final results:
  test accuracy:		78.97 %
25 0.5
25 1.0
26 0.5
26 1.0
27 0.5
27 1.0
28 0.5
28 1.0
Final results:
  test accuracy:		79.89 %
29 0.5
29 1.0
30 0.5
30 1.0
31 0.5
31 1.0
32 0.5
32 1.0
Final results:
  test accuracy:		80.59 %
33 0.5
33 1.0
34 0.5
34 1.0
35 0.5
35 1.0
36 0.5
36 1.0
Final results:
  test accuracy:		80.56 %
37 0.5
37 1.0
38 0.5
38 1.0
39 0.5
39 1.0
40 0.5
40 1.0
Final results:
  test accuracy:		80.

На нормальном количестве эпох получилось на 3% больше :) А ещё была другая сетка:
![title](nn2.png)