In [1]:
-

Using Theano backend.


Loaded MNIST


In [9]:
import tensorflow as tf

# Utils 

class AttributeDict(dict):
    __getattr__ = dict.__getitem__

    def __setattr__(self, a, b):
        self.__setitem__(a, b)


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

def resize_images(X, height_factor, width_factor):
    shape = X.get_shape()
    original_shape = tuple([i.__int__() for i in shape])
    
    new_shape = tf.shape(X)[1:3]
    new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32'))
    X = tf.image.resize_nearest_neighbor(X, new_shape)
    X.set_shape((None, original_shape[1] * height_factor if original_shape[1] is not None else None,
                original_shape[2] * width_factor if original_shape[2] is not None else None, None))
    return X

def up_sample_2x2(X):
    return resize_images(X, 2, 2)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def create_network(input_shape, filters, filters_size, dense, dropout):
    d = AttributeDict()
    
    d.x = tf.placeholder(tf.float32, shape=[None] + list(input_shape))
    d.y = tf.placeholder(tf.float32, shape=[None, dense[-1]])
    
    d.input_shape = input_shape
    d.do_tr = dropout
    d.filters = filters
    d.filters_size = filters_size
    d.dense = dense

    d.W = []
    d.b = []
    d.do = []
    
    d.h = [d.x]
    d.mp_h = []
    
    L_conv = len(filters)
    filters = [input_shape[-1]] + filters
    
    for i in range(L_conv):
        # Convolution
        wi = weight_variable((filters_size[i], filters_size[i], filters[i], filters[i+1]))
        bi = bias_variable((filters[i+1],))
        hi = tf.nn.relu(conv2d(d['h'][-1], wi) + bi)
        
        if dropout[i] != .0:
            doi = tf.placeholder(tf.float32)
            hi = tf.nn.dropout(hi, keep_prob=1.0 - doi)
            d.do.append(doi)
        else:
            d.do.append(None)
            
        hi = max_pool_2x2(hi)
        
        d.W.append(wi)
        d.b.append(bi)
        d.h.append(hi)
        
        #d.mp_h.append(max_pool_2x2(hi))
    
    L_dense = len(dense)
    conv_output_shape = input_shape[0] / (2**L_conv)
    dense = [filters[-1] * (conv_output_shape ** 2),] + dense
    
    print d.h
    for i in range(L_dense):
        if i == 0:
            hi = tf.reshape(d.h[-1], shape=(-1, dense[0]))
        wi = weight_variable((dense[i], dense[i+1]))
        bi = bias_variable((dense[i+1],))
        hi = tf.nn.relu(tf.matmul(hi, wi) + bi)
        
        if dropout[L_conv + i] != .0:
            doi = tf.placeholder(tf.float32)
            hi = tf.nn.dropout(hi, keep_prob=1.0 - doi)
            d.do.append(doi)
        else:
            d.do.append(None)
        
        d.W.append(wi)
        d.b.append(bi)
        d.h.append(hi)
    return d

def fit(sess, net, loss, data, metrics={}, iterations=500):
    train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
    sess.run(tf.global_variables_initializer())
    
    
    metrics['loss'] = loss
    to_fetch = [metrics[k] for k in metrics]
    to_fetch_labels = [k for k in metrics]
    
    feed_dict = {} 
    for i in range(len(net.do_tr)):
        if net.do[i] != None:
            feed_dict[net.do[i]] = net.do_tr[i]
    
    print "Training stage"
    for i in range(iterations):
        batch = data.next_batch(50)  
        feed_dict[net.x] = batch[0]
        feed_dict[net.y] = batch[1]
        if i%100 == 0:
            fetched = sess.run(to_fetch, feed_dict=feed_dict)
            for i in range(len(fetched)):
                print "{} {} ".format(to_fetch_labels[i], fetched[i]),
            print ''
                
        train_step.run(feed_dict=feed_dict)
        
    print "Testing stage"
    feed_dict = {net.x: data.X_test, net.y: data.Y_test}
    for i in range(len(net.do)):
        if net.do[i] != None:
            feed_dict[net.do[i]] = .0
    
    fetched = sess.run(to_fetch, feed_dict=feed_dict)
    
    for i in range(len(fetched)):
        print("{} {}".format(to_fetch_labels[i], fetched[i]))
    

def fit_semisupervised(sess, net, loss, data, metrics={}):
    L_conv = len(net.filters)
    
    net.d = AttributeDict()
    net.d.W = [None for i in range(L_conv)]
    net.d.b = [None for i in range(L_conv)]
    net.d.h = [None for i in range(L_conv+1)]
    
    # Layerwise initialization
    filters = [input_shape[-1]] + net.filters

    for i in range(L_conv):
        print i
        net.d.h[i+1] = net.h[i+1]
        print net.d.h[i+1]

        hi = up_sample_2x2(net.d.h[i+1])
        wi = weight_variable((net.filters_size[i], net.filters_size[i], filters[i+1], filters[i]))
        bi = bias_variable((filters[i],))
        if i > 0:
            hi = tf.nn.relu(conv2d(hi, wi) + bi)
        else:
            hi = conv2d(hi, wi) + bi
            
        net.d.W[i] = wi
        net.d.b[i] = bi
        net.d.h[i] = hi
        
        for j in reversed(range(i)):
            net.d.h[j] = up_sample_2x2(net.d.h[j+1])
            if j > 0:
                net.d.h[j] = tf.nn.relu(conv2d(net.d.h[j], net.d.W[j]) + net.d.b[j])
            else:
                net.d.h[j] = conv2d(net.d.h[j], net.d.W[j]) + net.d.b[j]
        
        # fit
        layerwise_loss = tf.reduce_mean(tf.reduce_sum(tf.square(net.d.h[0] - net.x), axis=(1, 2)))
        fit(sess, net, layerwise_loss, data, iterations=500)
    
    # Decoding pathway training
    
    # Enconding/decoding finetuning
    
    return net

# 2 Difference between run on variables vs run on session
# 3 Fit swwae

data = SemiSupervisedData(50000)
input_shape = (28, 28, 1)
filters = [32, 64]
filters_size = [5, 5]
dense = [1024, 10]
dropout = [.0, .0, .5, .0]

sess = tf.InteractiveSession() 

architecture = {'input_shape':(28, 28, 1), 'filters':[32, 64], 'filters_size':[5, 5], 
                'dense':[1024, 10], 'dropout':[.0, .0, .5, .0]}

net = create_network(**architecture)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net.h[-1], net.y))
correct_prediction = tf.equal(tf.argmax(net.h[-1],1), tf.argmax(net.y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

fit(sess, net, loss, data, metrics={'accuracy':accuracy}, iterations=1)

fit_semisupervised(sess, net, loss, data)

sess.close()


'''
aug_net = augment_network(net, **architecture)
loss = ...
fit(sess, aug_net, loss)
'''

# Fit 
''' 
How to implement s_loss + u_loss?

Papers suchs as SWWAE[1] mentions that u and s objetives should be calculated in the same phase. I assumed
that the labeled data used on a SGD iteration should be used for class/rec phase, and unlabeled data should 
used just for rec phase. However, ladder networks implementation[2] shows that labeled could be used for
classification only and unlabeled stream could include labeled and unlabeled data, computing only the 
reconstruction loss. So we have two options:

1) s_loss(labeled) + u_loss(unlabeled + labeled)
2) s_loss(labeled) + u_lsss(unlabeled*) # unlabeled* iterated over the whole dataset (labeled+unlabeled)

# The diference between both is that in 2) u_loss will not necessarily consider the same labeled images used
to compute s_loss

[1] https://arxiv.org/pdf/1506.02351v8.pdf
[2] https://github.com/rinuboney/ladder/blob/master/input_data.py
'''
'''
for i in range(L):
    diff_i = augmented_network['h'][i] - augmented_network['ih'][i]
    loss_i = tf.reduce_mean(tf.reduce_sum(tf.square(diff_i), axis=(1, 2)))
    fit(sess, data, loss_i)
'''
'''
s_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
u_loss = tf.reduce_mean(tf.reduce_sum(tf.square(x - x_reconstructed), axis=(1, 2)))
loss = s_loss + u_loss
'''
    

[<tf.Tensor 'Placeholder_21:0' shape=(?, 28, 28, 1) dtype=float32>, <tf.Tensor 'MaxPool_14:0' shape=(?, 14, 14, 32) dtype=float32>, <tf.Tensor 'MaxPool_15:0' shape=(?, 7, 7, 64) dtype=float32>]
Training stage
loss 5.71879720688  accuracy 0.119999997318  
Testing stage
loss 2.55711627007
accuracy 0.17950001359
0
Tensor("MaxPool_14:0", shape=(?, 14, 14, 32), dtype=float32)
Training stage
loss 764.595581055  
loss 82.1227416992  
loss 37.47706604  
loss 24.4473075867  
loss 20.0887870789  
Testing stage
loss 17.7117424011
1
Tensor("MaxPool_15:0", shape=(?, 7, 7, 64), dtype=float32)
Training stage
loss 37217.7265625  
loss 487.566833496  
loss 317.579833984  
loss 225.838027954  
loss 191.764831543  
Testing stage
loss 162.297546387


'\ns_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))\nu_loss = tf.reduce_mean(tf.reduce_sum(tf.square(x - x_reconstructed), axis=(1, 2)))\nloss = s_loss + u_loss\n'

In [None]:
# 