In [1]:
import random
import numpy as np
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
  """
  Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
  it for the linear classifier. These are the same steps as we used for the
  SVM, but condensed to a single function.  
  """
  # Load the raw CIFAR-10 data
  cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
  X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
  
  # subsample the data
  mask = range(num_training, num_training + num_validation)
  X_val = X_train[mask]
  y_val = y_train[mask]
  mask = range(num_training)
  X_train = X_train[mask]
  y_train = y_train[mask]
  mask = range(num_test)
  X_test = X_test[mask]
  y_test = y_test[mask]
  
  # Preprocessing: reshape the image data into rows
  X_train = np.reshape(X_train, (X_train.shape[0], -1))
  X_val = np.reshape(X_val, (X_val.shape[0], -1))
  X_test = np.reshape(X_test, (X_test.shape[0], -1))
  
  # Normalize the data: subtract the mean image
  mean_image = np.mean(X_train, axis = 0)
  X_train -= mean_image
  X_val -= mean_image
  X_test -= mean_image
  
  # add bias dimension and transform into columns
  X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T
  X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T
  X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T
  
  return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print 'Train data shape: ', X_train.shape
print 'Train labels shape: ', y_train.shape
print 'Validation data shape: ', X_val.shape
print 'Validation labels shape: ', y_val.shape
print 'Test data shape: ', X_test.shape
print 'Test labels shape: ', y_test.shape

Train data shape:  (3073, 49000)
Train labels shape:  (49000,)
Validation data shape:  (3073, 1000)
Validation labels shape:  (1000,)
Test data shape:  (3073, 1000)
Test labels shape:  (1000,)


In [3]:
def one_hot(x,n):
    if type(x) == list: x = np.array(x)
    x = x.flatten()
    o_h = np.zeros((len(x),n))
    o_h[np.arange(len(x)),x] = 1
    return o_h

In [4]:
y_train_o_h = one_hot(y_train,10)
y_val_o_h = one_hot(y_val,10)
y_test_o_h = one_hot(y_test,10)
print 'Train labels shape: ', y_train_o_h.shape
print 'Train labels shape: ', y_val_o_h.shape
print 'Train labels shape: ', y_test_o_h.shape

X_train_t = np.transpose(X_train)
X_val_t = np.transpose(X_val)
X_test_t = np.transpose(X_test)
print 'Train data shape: ', X_train_t.shape
print 'Validation data shape: ', X_val_t.shape
print 'Test data shape: ', X_test_t.shape

Train labels shape:  (49000, 10)
Train labels shape:  (1000, 10)
Train labels shape:  (1000, 10)
Train data shape:  (49000, 3073)
Validation data shape:  (1000, 3073)
Test data shape:  (1000, 3073)


In [5]:
import theano
from theano import tensor as T

INFO (theano.gof.compilelock): Waiting for existing lock by process '29464' (I am process '31414')
INFO (theano.gof.compilelock): To manually release the lock, delete /home/naoki/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-2.7.12-64/lock_dir
INFO (theano.gof.compilelock): Waiting for existing lock by process '29464' (I am process '31414')
INFO (theano.gof.compilelock): To manually release the lock, delete /home/naoki/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-2.7.12-64/lock_dir
Using gpu device 2: GeForce GTX 1080 (CNMeM is disabled, cuDNN 5105)


In [6]:
def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape, factor=0.00001):
    return theano.shared(floatX(np.random.randn(*shape) * factor))

def sgd(cost, params, lr=8.910000e-07):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        updates.append([p, p - g * lr])
    return updates

def model(X, w_h, w_o):
    h = T.nnet.hard_sigmoid(T.dot(X, w_h))
    return T.nnet.softmax(T.dot(h, w_o))

In [7]:
X = T.fmatrix()
Y = T.fmatrix()

w_h = init_weights((3073, 70))
w_o = init_weights((70, 10))

py_x = model(X, w_h, w_o)
y_pred = T.argmax(py_x, axis=1)


In [8]:
#cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) + 0.5 * 2.610000e+01 * ((w_h * w_h).sum() + (w_o * w_o).sum())
cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
params = [w_h, w_o]
updates = sgd(cost, params, lr=0.01)

In [9]:
train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)

In [12]:
print "Initial Error:", np.mean(np.argmax(y_test_o_h, axis=1) == predict(X_test_t))
for i in range(20):
    for start, end in zip(range(0, len(X_train_t), 128), range(128, len(X_train_t), 128)):
        cost = train(X_train_t[start:end], y_train_o_h[start:end])
    print i, np.mean(np.argmax(y_test_o_h, axis=1) == predict(X_test_t))

Initial Error: 0.414
0 0.398
1 0.39
2 0.403
3 0.41
4 0.391
5 0.397
6 0.4
7 0.389
8 0.397
9 0.397
10 0.397
11 0.398
12 0.395
13 0.4
14 0.4
15 0.405
16 0.404
17 0.412
18 0.391
19 0.396


In [13]:
def evaluate(n_hidden_units, learning_rate, num_epochs, regularization_strength):
    X = T.fmatrix()
    Y = T.fmatrix()
    w_h = init_weights((3073, n_hidden_units))
    w_o = init_weights((n_hidden_units, 10))
    py_x = model(X, w_h, w_o)
    y_pred = T.argmax(py_x, axis=1)
    cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) + 0.5 * regularization_strength * ((w_h * w_h).sum() + (w_o * w_o).sum())
    params = [w_h, w_o]
    updates = sgd(cost, params, lr=learning_rate)
    train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
    predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)
    for i in range(num_epochs):
        for start, end in zip(range(0, len(X_train_t), 128), range(128, len(X_train_t), 128)):
            cost = train(X_train_t[start:end], y_train_o_h[start:end])
    return np.mean(np.argmax(y_val_o_h, axis=1) == predict(X_val_t))    

In [14]:
results = {}
settings = {}
best_val = -1

In [15]:
cnt = 0
while cnt < 10:
    # Best Settings: # hidden units = 300, lr = 0.004318, # epochs = 6, reg = 0.1245 -->> 0.428
    # Best Settings: # hidden units = 500, lr = 0.00517, # epochs = 6, reg = 0.0835 -->> 0.434
    cnt += 1
    n_hidden_units = np.random.choice(range(300,650,50))
    #n_hidden_units = 250
    learning_rate = np.random.uniform(0.002, 0.0075)
    #learning_rate = 0.008
    #num_epochs = np.random.choice(range(10,60,10))
    num_epochs = 6
    regularization_strength = np.random.uniform(0.075, 0.15)
    #regularization_strength = 0.04

    print "%d. evaluating: # hidden units = %d, lr = %0.4g, # epochs = %d, reg = %0.4g" % (cnt, n_hidden_units, learning_rate, num_epochs, regularization_strength),
    val = evaluate(n_hidden_units, learning_rate, num_epochs, regularization_strength)
    if val > best_val:
        best_val = val
        settings[best_val] = [n_hidden_units, learning_rate, num_epochs, regularization_strength]
    print "--->>>", val
best = settings[best_val]
print "\n-----\nBest Settings: # hidden units = %d, lr = %0.4g, # epochs = %d, reg = %0.4g -->> %0.3f" % (
    best[0], best[1], best[2], best[3], best_val)

1. evaluating: # hidden units = 500, lr = 0.002589, # epochs = 6, reg = 0.1392 --->>> 0.429
2. evaluating: # hidden units = 450, lr = 0.005034, # epochs = 6, reg = 0.116 --->>> 0.427
3. evaluating: # hidden units = 500, lr = 0.004449, # epochs = 6, reg = 0.07981 --->>> 0.443
4. evaluating: # hidden units = 500, lr = 0.007192, # epochs = 6, reg = 0.1459 --->>> 0.431
5. evaluating: # hidden units = 450, lr = 0.004753, # epochs = 6, reg = 0.1209 --->>> 0.434
6. evaluating: # hidden units = 500, lr = 0.007277, # epochs = 6, reg = 0.1327 --->>> 0.421
7. evaluating: # hidden units = 550, lr = 0.004006, # epochs = 6, reg = 0.1478 --->>> 0.441
8. evaluating: # hidden units = 350, lr = 0.006646, # epochs = 6, reg = 0.08071 --->>> 0.419
9. evaluating: # hidden units = 550, lr = 0.004749, # epochs = 6, reg = 0.1207 --->>> 0.442
10. evaluating: # hidden units = 500, lr = 0.005215, # epochs = 6, reg = 0.1484 --->>> 0.437

-----
Best Settings: # hidden units = 500, lr = 0.004449, # epochs = 6, reg =

In [27]:
val = evaluate(500, 0.006, 100, 0.1)
print val

0.447
