- AE설명 : http://solarisailab.com/archives/113
- VAE : http://nolsigan.com/blog/what-is-variational-autoencoder/
- Auto-Encoding Variational Bayes : https://arxiv.org/abs/1312.6114  [[code](https://github.com/ikostrikov/TensorFlow-VAE-GAN-DRAW)]
- Tutorial on Variational Autoencoders : https://arxiv.org/abs/1606.05908
- GAN : http://aliensunmin.github.io/project/accv16tutorial/media/generative.pdf

In [50]:
import numpy as np
from sklearn.utils import shuffle

## Download Data

In [45]:
# download mnist here: http://yann.lecun.com/exdb/mnist/
!wget -P data/ -c http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
!wget -P data/ -c http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
!wget -P data/ -c http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
!wget -P data/ -c http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz  
# unzip
!gzip -d data/train-images-idx3-ubyte.gz
!gzip -d data/train-labels-idx1-ubyte.gz
!gzip -d data/t10k-images-idx3-ubyte.gz
!gzip -d data/t10k-labels-idx1-ubyte.gz  

--2017-04-13 03:14:16--  http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Resolving yann.lecun.com... 216.165.22.6
Connecting to yann.lecun.com|216.165.22.6|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9912422 (9.5M) [application/x-gzip]
Saving to: 'data/train-images-idx3-ubyte.gz'


2017-04-13 03:14:19 (3.53 MB/s) - 'data/train-images-idx3-ubyte.gz' saved [9912422/9912422]

--2017-04-13 03:14:19--  http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Resolving yann.lecun.com... 216.165.22.6
Connecting to yann.lecun.com|216.165.22.6|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 28881 (28K) [application/x-gzip]
Saving to: 'data/train-labels-idx1-ubyte.gz'


2017-04-13 03:14:20 (92.0 KB/s) - 'data/train-labels-idx1-ubyte.gz' saved [28881/28881]

--2017-04-13 03:14:20--  http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Resolving yann.lecun.com... 216.165.22.6
Connecting to yann.lecun.com|216.165.22.6|:80...

In [51]:
def load_mnist():
    with open('data/train-images-idx3-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    X_train = data[16:].reshape(60000, 28 * 28).astype(np.float32)
    with open('data/train-labels-idx1-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    y_train = data[8:].reshape(60000).astype(np.uint8)

    with open('data/t10k-images-idx3-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    X_test = data[16:].reshape(10000, 28 * 28).astype(np.float32)
    with open('data/t10k-labels-idx1-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    y_test = data[8:].reshape(10000).astype(np.uint8)

    X_train, y_train = shuffle(X_train, y_train)
    X_test, y_test = shuffle(X_test, y_test)

    X_train /= 255.
    X_test /= 255.

    return X_train, y_train, X_test, y_test

In [52]:
X_train, _, X_test, _ = load_mnist()
print(X_train.shape, X_test.shape)
print(X_train.min(), X_test.min())
print(X_train.mean(), X_test.mean())
print(X_train.max(), X_test.max())

(60000, 784) (10000, 784)
0.0 0.0
0.130661 0.132515
1.0 1.0


In [53]:
X_train[0]

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.  

In [17]:
def get_batch_idx(N, batch_size):
    num_batches = int((N + batch_size - 1) / batch_size)
    for i in range(num_batches):
        start, end = i * batch_size, (i + 1) * batch_size
        idx = slice(start, end)

        yield idx

In [31]:
X = np.random.random((14, 4))
print('Original data')
print(X)
print('As batches')
for idx in get_batch_idx(X.shape[0], 4):
    print(X[idx])

Original data
[[ 0.20202756  0.71674543  0.40515169  0.26908579]
 [ 0.99560066  0.83913098  0.79367377  0.43882338]
 [ 0.11479176  0.58573877  0.87067218  0.29892666]
 [ 0.95999942  0.11536383  0.16154735  0.5996127 ]
 [ 0.54871874  0.00921023  0.00923285  0.58679924]
 [ 0.55599164  0.55512009  0.51649735  0.41297811]
 [ 0.76172963  0.24646588  0.55570662  0.33067711]
 [ 0.39736591  0.62373095  0.1030668   0.80245893]
 [ 0.29214797  0.08662856  0.00884072  0.87150083]
 [ 0.07755372  0.43004725  0.44463644  0.44727887]
 [ 0.35454402  0.48810394  0.6811334   0.74935368]
 [ 0.79277212  0.4468647   0.00539177  0.60968035]
 [ 0.50616836  0.19045664  0.71723208  0.21069559]
 [ 0.82841097  0.576699    0.7192957   0.41491335]]
As batches
[[ 0.20202756  0.71674543  0.40515169  0.26908579]
 [ 0.99560066  0.83913098  0.79367377  0.43882338]
 [ 0.11479176  0.58573877  0.87067218  0.29892666]
 [ 0.95999942  0.11536383  0.16154735  0.5996127 ]]
[[ 0.54871874  0.00921023  0.00923285  0.58679924]
 [ 0

In [57]:
from lasagne.layers import get_all_param_values
import os
import pickle
from lasagne.layers import InputLayer
from lasagne.layers import ConcatLayer
from lasagne.layers import DenseLayer
from lasagne.layers import get_all_layers
from lasagne.layers import get_all_params
from lasagne.nonlinearities import linear, rectify, sigmoid
import theano
import theano.tensor as T
from lasagne.layers import get_output
from lasagne.layers import get_all_params
from lasagne.updates import nesterov_momentum

In [58]:
# forward pass for the encoder, q(z|x)
def create_encoder_func(layers):
    X = T.fmatrix('X')
    X_batch = T.fmatrix('X_batch')

    Z = get_output(layers['l_encoder_out'], X, deterministic=True)

    encoder_func = theano.function(
        inputs=[theano.In(X_batch)],
        outputs=Z,
        givens={
            X: X_batch,
        },
    )

    return encoder_func


# forward pass for the decoder, p(x|z)
def create_decoder_func(layers):
    Z = T.fmatrix('Z')
    Z_batch = T.fmatrix('Z_batch')

    X = get_output(
        layers['l_decoder_out'],
        inputs={
            layers['l_encoder_out']: Z
        },
        deterministic=True
    )

    decoder_func = theano.function(
        inputs=[theano.In(Z_batch)],
        outputs=X,
        givens={
            Z: Z_batch,
        },
    )

    return decoder_func


# forward/backward (optional) pass for the encoder/decoder pair
def create_encoder_decoder_func(layers, apply_updates=False):
    X = T.fmatrix('X')
    X_batch = T.fmatrix('X_batch')

    X_hat = get_output(layers['l_decoder_out'], X, deterministic=False)

    # reconstruction loss
    encoder_decoder_loss = T.mean(
        T.mean(T.sqr(X - X_hat), axis=1)
    )

    if apply_updates:
        # all layers that participate in the forward pass should be updated
        encoder_decoder_params = get_all_params(
            layers['l_decoder_out'], trainable=True)

        encoder_decoder_updates = nesterov_momentum(
            encoder_decoder_loss, encoder_decoder_params, 0.01, 0.9)
    else:
        encoder_decoder_updates = None

    encoder_decoder_func = theano.function(
        inputs=[theano.In(X_batch)],
        outputs=encoder_decoder_loss,
        updates=encoder_decoder_updates,
        givens={
            X: X_batch,
        },
    )

    return encoder_decoder_func


# forward/backward (optional) pass for discriminator
def create_discriminator_func(layers, apply_updates=False):
    X = T.fmatrix('X')
    pz = T.fmatrix('pz')

    X_batch = T.fmatrix('X_batch')
    pz_batch = T.fmatrix('pz_batch')

    # the discriminator receives samples from q(z|x) and p(z)
    # and should predict to which distribution each sample belongs
    discriminator_outputs = get_output(
        layers['l_discriminator_out'],
        inputs={
            layers['l_prior_in']: pz,
            layers['l_encoder_in']: X,
        },
        deterministic=False,
    )

    # label samples from q(z|x) as 1 and samples from p(z) as 0
    discriminator_targets = T.vertical_stack(
        T.ones((X_batch.shape[0], 1)),
        T.zeros((pz_batch.shape[0], 1))
    )

    discriminator_loss = T.mean(
        T.nnet.binary_crossentropy(
            discriminator_outputs,
            discriminator_targets,
        )
    )

    if apply_updates:
        # only layers that are part of the discriminator should be updated
        discriminator_params = get_all_params(
            layers['l_discriminator_out'], trainable=True, discriminator=True)

        discriminator_updates = nesterov_momentum(
            discriminator_loss, discriminator_params, 0.1, 0.0)
    else:
        discriminator_updates = None

    discriminator_func = theano.function(
        inputs=[
            theano.In(X_batch),
            theano.In(pz_batch),
        ],
        outputs=discriminator_loss,
        updates=discriminator_updates,
        givens={
            X: X_batch,
            pz: pz_batch,
        },
    )

    return discriminator_func


# forward/backward (optional) pass for the generator
# note that the generator is the same network as the encoder,
# but updated separately
def create_generator_func(layers, apply_updates=False):
    X = T.fmatrix('X')
    X_batch = T.fmatrix('X_batch')

    # no need to pass an input to l_prior_in here
    generator_outputs = get_output(
        layers['l_encoder_out'], X, deterministic=False)

    # so pass the output of the generator as the output of the concat layer
    discriminator_outputs = get_output(
        layers['l_discriminator_out'],
        inputs={
            layers['l_prior_encoder_concat']: generator_outputs,
        },
        deterministic=False
    )

    # the discriminator learns to predict 1 for q(z|x),
    # so the generator should fool it into predicting 0
    generator_targets = T.zeros_like(X_batch.shape[0])

    # so the generator needs to push the discriminator's output to 0
    generator_loss = T.mean(
        T.nnet.binary_crossentropy(
            discriminator_outputs,
            generator_targets,
        )
    )

    if apply_updates:
        # only layers that are part of the generator (i.e., encoder)
        # should be updated
        generator_params = get_all_params(
            layers['l_discriminator_out'], trainable=True, generator=True)

        generator_updates = nesterov_momentum(
            generator_loss, generator_params, 0.1, 0.0)
    else:
        generator_updates = None

    generator_func = theano.function(
        inputs=[
            theano.In(X_batch),
        ],
        outputs=generator_loss,
        updates=generator_updates,
        givens={
            X: X_batch,
        },
    )

    return generator_func

In [67]:
def save_weights(weights, filename):
    with open(filename, 'wb') as f:
        pickle.dump(weights, f, protocol=pickle.HIGHEST_PROTOCOL)


def load_weights(layer, filename):
    with open(filename, 'rb') as f:
        src_params_list = pickle.load(f)

    dst_params_list = get_all_params(layer)
    # assign the parameter values stored on disk to the model
    for src_params, dst_params in zip(src_params_list, dst_params_list):
        dst_params.set_value(src_params)


def build_model():
    num_input = 28 * 28
    # should really use more dimensions, but this is nice for visualization
    num_code = 2
    num_hidden = 1000

    l_encoder_in = InputLayer((None, num_input), name='l_encoder_in')

    # first layer of the encoder/generator
    l_dense1 = DenseLayer(
        l_encoder_in, num_units=num_hidden, nonlinearity=rectify,
        name='l_encoder_dense1',
    )
    l_dense1.params[l_dense1.W].add('generator')
    l_dense1.params[l_dense1.b].add('generator')

    # second layer of the encoder/generator
    l_dense2 = DenseLayer(
        l_dense1, num_units=num_hidden, nonlinearity=rectify,
        name='l_encoder_dense2',
    )
    l_dense2.params[l_dense2.W].add('generator')
    l_dense2.params[l_dense2.b].add('generator')

    # output of the encoder/generator: q(z|x)
    l_encoder_out = DenseLayer(
        l_dense2, num_units=num_code, nonlinearity=linear,
        name='l_encoder_out',
    )
    l_encoder_out.params[l_encoder_out.W].add('generator')
    l_encoder_out.params[l_encoder_out.b].add('generator')

    # first layer of the decoder
    l_decoder_in = DenseLayer(
        l_encoder_out, num_units=num_hidden, nonlinearity=rectify,
        name='l_decoder_dense1',
    )
    # second layer of the decoder
    l_dense5 = DenseLayer(
        l_decoder_in, num_units=num_hidden, nonlinearity=rectify,
        name='l_decoder_dense2',
    )

    # output of the decoder: p(x|z)
    l_decoder_out = DenseLayer(
        l_dense5, num_units=num_input, nonlinearity=sigmoid,
        name='l_decoder_out',
    )

    # input layer providing samples from p(z)
    l_prior = InputLayer((None, num_code), name='l_prior_in')

    # concatenate samples from q(z|x) to samples from p(z)
    l_concat = ConcatLayer(
        [l_encoder_out, l_prior], axis=0, name='l_prior_encoder_concat',
    )

    # first layer of the discriminator
    l_dense6 = DenseLayer(
        l_concat, num_units=num_hidden, nonlinearity=rectify,
        name='l_discriminator_dense1',
    )
#     l_dense6.params[l_dense6.W].add('discriminator')
#     l_dense6.params[l_dense6.b].add('discriminator')

#     # second layer of the discriminator
#     l_dense7 = DenseLayer(
#         l_dense6, num_units=num_hidden, nonlinearity=rectify,
#         name='l_discriminator_dense2',
#     )
#     l_dense7.params[l_dense7.W].add('discriminator')
#     l_dense7.params[l_dense7.b].add('discriminator')

#     # output layer of the discriminator
#     l_discriminator_out = DenseLayer(
#         l_dense7, num_units=1, nonlinearity=sigmoid,
#         name='l_discriminator_out',
#     )
#     l_discriminator_out.params[l_discriminator_out.W].add('discriminator')
#     l_discriminator_out.params[l_discriminator_out.b].add('discriminator')

#     model_layers = get_all_layers([l_decoder_out, l_discriminator_out])

#     # put all layers in a dictionary for convenience
#     return {layer.name: layer for layer in model_layers}

In [68]:
#layer_dict = 
build_model()

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

In [20]:
def train_autoencoder():
    print('building model')
    layers = build_model()

    max_epochs = 5000
    batch_size = 128
    weightsfile = os.path.join('weights', 'weights_train_val.pickle')

    print('compiling theano functions for training')
    print('  encoder/decoder')
    encoder_decoder_update = create_encoder_decoder_func(
        layers, apply_updates=True)
    print('  discriminator')
    discriminator_update = create_discriminator_func(
        layers, apply_updates=True)
    print('  generator')
    generator_update = create_generator_func(
        layers, apply_updates=True)

    print('compiling theano functions for validation')
    print('  encoder/decoder')
    encoder_decoder_func = create_encoder_decoder_func(layers)
    print('  discriminator')
    discriminator_func = create_discriminator_func(layers)
    print('  generator')
    generator_func = create_generator_func(layers)

    print('loading data')
    X_train, y_train, X_test, y_test = load_mnist()

    try:
        for epoch in range(1, max_epochs + 1):
            print('epoch %d' % (epoch))

            # compute loss on training data and apply gradient updates
            train_reconstruction_losses = []
            train_discriminative_losses = []
            train_generative_losses = []
            for train_idx in get_batch_idx(X_train.shape[0], batch_size):
                X_train_batch = X_train[train_idx]
                # 1.) update the encoder/decoder to min. reconstruction loss
                train_batch_reconstruction_loss =\
                    encoder_decoder_update(X_train_batch)

                # sample from p(z)
                pz_train_batch = np.random.uniform(
                    low=-2, high=2,
                    size=(X_train_batch.shape[0], 2)).astype(
                        np.float32)

                # 2.) update discriminator to separate q(z|x) from p(z)
                train_batch_discriminative_loss =\
                    discriminator_update(X_train_batch, pz_train_batch)

                # 3.)  update generator to output q(z|x) that mimic p(z)
                train_batch_generative_loss = generator_update(X_train_batch)

                train_reconstruction_losses.append(
                    train_batch_reconstruction_loss)
                train_discriminative_losses.append(
                    train_batch_discriminative_loss)
                train_generative_losses.append(
                    train_batch_generative_loss)

            # average over minibatches
            train_reconstruction_losses_mean = np.mean(
                train_reconstruction_losses)
            train_discriminative_losses_mean = np.mean(
                train_discriminative_losses)
            train_generative_losses_mean = np.mean(
                train_generative_losses)

            print('  train: rec = %.6f, dis = %.6f, gen = %.6f' % (
                train_reconstruction_losses_mean,
                train_discriminative_losses_mean,
                train_generative_losses_mean,
            ))

            # compute loss on test data
            test_reconstruction_losses = []
            test_discriminative_losses = []
            test_generative_losses = []
            for test_idx in get_batch_idx(X_test.shape[0], batch_size):
                X_test_batch = X_test[test_idx]
                test_batch_reconstruction_loss =\
                    encoder_decoder_func(X_test_batch)

                # sample from p(z)
                pz_test_batch = np.random.uniform(
                    low=-2, high=2,
                    size=(X_test.shape[0], 2)).astype(
                        np.float32)

                test_batch_discriminative_loss =\
                    discriminator_func(X_test_batch, pz_test_batch)

                test_batch_generative_loss = generator_func(X_test_batch)

                test_reconstruction_losses.append(
                    test_batch_reconstruction_loss)
                test_discriminative_losses.append(
                    test_batch_discriminative_loss)
                test_generative_losses.append(
                    test_batch_generative_loss)

            test_reconstruction_losses_mean = np.mean(
                test_reconstruction_losses)
            test_discriminative_losses_mean = np.mean(
                test_discriminative_losses)
            test_generative_losses_mean = np.mean(
                test_generative_losses)

            print('  test: rec = %.6f, dis = %.6f, gen = %.6f' % (
                test_reconstruction_losses_mean,
                test_discriminative_losses_mean,
                test_generative_losses_mean,
            ))

    except KeyboardInterrupt:
        print('caught ctrl-c, stopped training')
        weights = get_all_param_values([
            layers['l_decoder_out'],
            layers['l_discriminator_out'],
        ])
        print('saving weights to %s' % (weightsfile))
        save_weights(weights, weightsfile)

In [None]:
train_autoencoder()

In [5]:
import numpy
import pickle
import os
import theano

In [6]:
def load_mnist_dataset():
    dataset = pickle.load(open('data/mnist.pkl','rb'))
    train_set_x = numpy.concatenate((dataset[0][0],dataset[1][0]),axis=0)
    train_set_y = numpy.concatenate((dataset[0][1],dataset[1][1]),axis=0)
    return ((train_set_x,train_set_y),(dataset[2][0],dataset[2][1]))

In [7]:
def _shared_dataset(data_xy):
    data_x, data_y = data_xy
    shared_x = theano.shared(numpy.asarray(data_x,
                                           dtype=theano.config.floatX), borrow=True)
    shared_y = theano.shared(numpy.asarray(data_y,
                                           dtype='int32'), borrow=True)
    return shared_x, shared_y

In [8]:
def load_mnist_full():
    dataset = load_mnist_dataset()

    train_set_x, train_set_y = dataset[0]
    test_set_x, test_set_y = dataset[1]

    train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
    test_set_x, test_set_y = _shared_dataset((test_set_x, test_set_y))

    return [(train_set_x, train_set_y), (test_set_x, test_set_y)]

In [9]:
def load_mnist_for_validation(n_v = 10000):
    dataset = load_mnist_dataset()

    train_set_x, train_set_y = dataset[0]

    randix = numpy.random.permutation(train_set_x.shape[0])

    valid_set_x = train_set_x[randix[:n_v]]
    valid_set_y = train_set_y[randix[:n_v]]
    train_set_x = train_set_x[randix[n_v:]]
    train_set_y = train_set_y[randix[n_v:]]

    train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
    valid_set_x, valid_set_y = _shared_dataset((valid_set_x, valid_set_y))

    return [(train_set_x, train_set_y), (valid_set_x, valid_set_y)]

In [10]:
def load_mnist_for_semi_sup(n_l=1000, n_v=1000):
    dataset = load_mnist_dataset()

    _train_set_x, _train_set_y = dataset[0]

    rand_ind = numpy.random.permutation(_train_set_x.shape[0])
    _train_set_x = _train_set_x[rand_ind]
    _train_set_y = _train_set_y[rand_ind]

    s_c = n_l / 10.0
    train_set_x = numpy.zeros((n_l, 28 ** 2))
    train_set_y = numpy.zeros(n_l)
    for i in range(10):
        ind = numpy.where(_train_set_y == i)[0]
        train_set_x[i * s_c:(i + 1) * s_c, :] = _train_set_x[ind[0:s_c], :]
        train_set_y[i * s_c:(i + 1) * s_c] = _train_set_y[ind[0:s_c]]
        _train_set_x = numpy.delete(_train_set_x, ind[0:s_c], 0)
        _train_set_y = numpy.delete(_train_set_y, ind[0:s_c])

    print(rand_ind)
    rand_ind = numpy.random.permutation(train_set_x.shape[0])
    train_set_x = train_set_x[rand_ind]
    train_set_y = train_set_y[rand_ind]
    valid_set_x = _train_set_x[:n_v]
    valid_set_y = _train_set_y[:n_v]
    # ul_train_set_x = _train_set_x[n_v:]
    train_set_ul_x = numpy.concatenate((train_set_x, _train_set_x[n_v:]), axis=0)
    train_set_ul_x = train_set_ul_x[numpy.random.permutation(train_set_ul_x.shape[0])]
    ul_train_set_y = _train_set_y[n_v:]  # dummy

    train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
    train_set_ul_x, ul_train_set_y = _shared_dataset((train_set_ul_x, ul_train_set_y))
    valid_set_x, valid_set_y = _shared_dataset((valid_set_x, valid_set_y))

    return [(train_set_x, train_set_y, train_set_ul_x), (valid_set_x, valid_set_y)]