In [10]:
'''Train a simple deep CNN on the CIFAR10 small images dataset.

GPU run command:
    THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10_cnn.py

It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs.
(it's still underfitting at that point, though).

Note: the data was pickled with Python 2, and some encoding issues might prevent you
from loading it in Python 3. You might have to load it in Python 2,
save it in a different format, load it in Python 3 and repickle it.
'''

from __future__ import print_function
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras import backend as K
from keras.activations import relu
from keras.layers.advanced_activations import Quorum

batch_size = 32
nb_classes = 10
nb_epoch = 200

def step(X):
    return K.switch(X < 0, 0, 1)

# input image dimensions
img_rows, img_cols = 32, 32
# the CIFAR10 images are RGB
img_channels = 3

# the data, shuffled and split between train and test sets
print("Loading data...")
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

model = Sequential()

model.add(Convolution2D(8, 3, 3, border_mode='same', init='he_normal',
                        input_shape=(img_channels, img_rows, img_cols)))
model.add(Activation('relu'))
#model.add(Convolution2D(16, 3, 3))
#model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, init="he_normal"))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

# let's train the model using SGD + momentum (how original).
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
print("Compiling model...")
model.compile(loss='categorical_crossentropy', optimizer=sgd)
print("Finished compiling!")
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

Loading data...
Compiling model...
Finished compiling!


In [11]:
import numpy as np
from sys import stdout as std

def plot_random_weights(model, X_test, Y_test, min_val=-0.01, max_val=1.01, interval=0.1):
    x_plot = []
    y_plot = []
    z_plot = []
    weights_to_vary = []
    
    for index, layer in enumerate(model.layers):
        ln = layer.__class__.__name__
        if "Dense" in ln or "Conv" in ln:
            i = np.random.randint(len(layer.W.get_value()))
            j = np.random.randint(len(layer.W.get_value()[i]))
            pair = (index, i, j)
            weights_to_vary.append(pair)
            
    pair_one = np.random.randint(len(weights_to_vary))
    pair_two = np.random.randint(len(weights_to_vary))
    
    while pair_one == pair_two:
        pair_two = np.random.randint(len(weights_to_vary))
        
    pair_one = weights_to_vary[pair_one]
    pair_two = weights_to_vary[pair_two]
    weights_one = model.layers[pair_one[0]].W.get_value()
    weights_two = model.layers[pair_two[0]].W.get_value()
    
    print("Starting analysis...")
    for I in np.arange(min_val, max_val, interval):
        for J in np.arange(min_val, max_val, interval):
            std.write("Currently calculating i={}, j={}\n".format(I, J))
            std.flush()
            weights_one[pair_one[1]][pair_one[2]] = I
            weights_two[pair_two[1]][pair_two[2]] = J
            model.layers[pair_one[0]].W.set_value(weights_one)
            model.layers[pair_two[0]].W.set_value(weights_two)
            loss = model.evaluate(X_test, Y_test, verbose=0)
            x_plot.append(I)
            y_plot.append(J)
            z_plot.append(loss)
    
    return x_plot, y_plot, z_plot

x_plot, y_plot, z_plot = plot_random_weights(model, X_test, Y_test)
np_x = np.array(x_plot)
np_y = np.array(y_plot)
np_z = np.array(z_plot)
np.save("x", np_x)
np.save("y", np_y)
np.save("z", np_z)

Starting analysis...
Currently calculating i=-0.01, j=-0.01
Currently calculating i=-0.01, j=0.09
Currently calculating i=-0.01, j=0.19
Currently calculating i=-0.01, j=0.29
Currently calculating i=-0.01, j=0.39
Currently calculating i=-0.01, j=0.49
Currently calculating i=-0.01, j=0.59
Currently calculating i=-0.01, j=0.69
Currently calculating i=-0.01, j=0.79
Currently calculating i=-0.01, j=0.89
Currently calculating i=-0.01, j=0.99
Currently calculating i=0.09, j=-0.01
Currently calculating i=0.09, j=0.09
Currently calculating i=0.09, j=0.19
Currently calculating i=0.09, j=0.29
Currently calculating i=0.09, j=0.39
Currently calculating i=0.09, j=0.49
Currently calculating i=0.09, j=0.59
Currently calculating i=0.09, j=0.69
Currently calculating i=0.09, j=0.79
Currently calculating i=0.09, j=0.89
Currently calculating i=0.09, j=0.99
Currently calculating i=0.19, j=-0.01
Currently calculating i=0.19, j=0.09
Currently calculating i=0.19, j=0.19
Currently calculating i=0.19, j=0.29
Cur

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

x_plot = np.load("x.npy")
y_plot = np.load("y.npy")
z_plot = np.load("z.npy")

fig = plt.figure(figsize=plt.figaspect(0.5))
fig.suptitle("Loss function as a function of weights")
ax = fig.add_subplot(1, 1, 1, projection='3d')
ax._axis3don = False
ax.plot_trisurf(x_plot, y_plot, z_plot, cmap=cm.jet, linewidth=0.2)
plt.show()

##### 