In [None]:
%matplotlib inline
from pylab import *
import dataset
import fullyconn
from fullyconn import FullyConnLayer, InputLayer
from IPython import display

# Loading data

In [None]:
data = dataset.load_dataset()
train_data = data[:-len(data)/2]
train_data = np.reshape(train_data, (len(train_data), 3 * 40 * 40))
test_data = data[len(data)/2:]
test_data = np.reshape(test_data, (len(test_data), 3 * 40 * 40))

 # Model plan (fcc autoencoder 1600-30-1600 with MSE loss)

In [None]:
def build_nn():
    print("Building NN")
    rng = np.random.RandomState(42)
    nn = fullyconn.MLP()
    
    nn.add_layer(InputLayer(name="input",
                            dimension=40 * 40 * 3))
    
    nn.add_layer(FullyConnLayer(name="bottleneck-1",
                                dimension=150))
    nn.add_layer(FullyConnLayer(name="bottleneck-2",
                                dimension=100))
    nn.add_layer(FullyConnLayer(name="face-embedding",
                                dimension=50))
    nn.add_layer(FullyConnLayer(name="reconstruction-1",
                                dimension=100))
    nn.add_layer(FullyConnLayer(name="reconstruction2",
                                dimension=150))
    nn.add_layer(FullyConnLayer(name="output",
                                dimension=40 * 40 * 3))
    return nn

# Evaluation function (test set)

In [None]:
def precision(dataset, encode_decode):
    # L1 distance between original example and encoded-decoded example
    precision = np.absolute(dataset - encode_decode(dataset)).mean()
    return precision

# Reconstruction visualisation

In [None]:
import Image
def show_reconstruction(example, encode_decode):
    reconstructed = encode_decode(np.array([example]))
    
    example = np.array(example.reshape(40, 40, 3), dtype="float")
    reconstructed = np.array(reconstructed.reshape(40, 40, 3), dtype="float")
    
    figure()
    subplot(121)
    imshow(example, interpolation="nearest")
    subplot(122)
    imshow(reconstructed, interpolation="nearest")

# Hack

In [None]:
av = train_data[0]
for i in range(1, 100):
    av += train_data[i]
av /= 100
show_reconstruction(av, lambda x:x)

samples = 500000
filter = np.zeros((40, 40, 3), dtype="float32")
mean = [20, 20]
cov = [[0,45],[95, 0]]
for sample in np.round(np.random.multivariate_normal(mean,cov,samples)):
    if sample[0] < 40 and sample[1] < 40:
        for i in range(3):
            filter[sample[0]][sample[1]][i] += 0.002
            filter[sample[0]][sample[1]][i] = min(1.0, filter[sample[0]][sample[1]][i])
filter = filter.reshape(40*40*3)
show_reconstruction(filter, lambda x:x)
show_reconstruction(np.multiply(filter, av), lambda x:x)

# Model training

In [None]:
def train_network(nn, learning_rate, regularization, batch_size, steps):
    test_epoch = []
    train_epoch = []
    train = nn.build_train(learning_rate, regularization, filter)
    eval  = nn.build_eval()
    for step_id in range(steps):
        if step_id % 1 == 0:
            display.clear_output(wait=True)
            test_precision = precision(test_data, eval)
            train_precision = precision(train_data, eval)
            
            train_epoch.append(train_precision * 100)
            test_epoch.append(test_precision * 100)
            
            x = linspace(0, step_id, len(train_epoch))
            
            plot(x, test_epoch, 'r')
            plot(x, train_epoch, 'b')
            for i in range(10, 20):
                show_reconstruction(test_data[i], eval)
            show()
            

        for batch_id in range(0, len(train_data) / batch_size):
            ts = train_data[batch_id * batch_size: (batch_id + 1) * batch_size] 

            train(ts, ts)

In [None]:
nn = build_nn()

In [None]:
train_network(nn, 0.0001, 0.00, 30, 200)

In [None]:
import pickle
with open('first_embedding.dat', 'wb') as f:
    pickle.dump(nn, f)

In [None]:
import theano
encode = theano.function(inputs  = [nn.input],
                         outputs = nn.layers[2].output)
encoded_test_data = np.array(encode(test_data))
from sklearn.neighbors import NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(encoded_test_data)
from scipy import misc
for j in range(10):
    query = test_data[j]
    encoded_query = encode([query.reshape(40*40*3)])
    _, neighbor = nbrs.kneighbors(encoded_query)
    i = 1
    figure()
    subplot(1, 5, 1)
    imshow(np.reshape(query, (40, 40, 3)))
    for n in neighbor[0][1:]:
        i = i + 1
        subplot(1, 5, i)
        imshow(np.reshape(test_data[n], (40, 40, 3)))