In [3]:
import random
import numpy as np
import time
import tensorflow as tf 
from tensorflow.examples.tutorials.mnist import input_data
import math
import pdb

#mnist = input_data.read_data_sets("/tmp/data",one_hot=False)
mnist = input_data.read_data_sets('data/fashion')

Extracting data/fashion/train-images-idx3-ubyte.gz
Extracting data/fashion/train-labels-idx1-ubyte.gz
Extracting data/fashion/t10k-images-idx3-ubyte.gz
Extracting data/fashion/t10k-labels-idx1-ubyte.gz


In [4]:
def create_pairs(x, digit_indices):
    '''Positive and negative pair creation.
    Alternates between positive and negative pairs.
    '''
    pairs = []
    labels = []
    n = min([len(digit_indices[d]) for d in range(10)]) - 1
    for d in range(10):
        for i in range(n):
            z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
            pairs += [[x[z1], x[z2]]]
            inc = random.randrange(1, 10)
            dn = (d + inc) % 10
            z1, z2 = digit_indices[d][i], digit_indices[dn][i]
            pairs += [[x[z1], x[z2]]]
            labels += [1, 0]
    return np.array(pairs), np.array(labels)

In [5]:
def mlp(input_, input_dim, output_dim, name="mlp"):
    with tf.variable_scope(name):
        w = tf.get_variable('w',[input_dim,output_dim],tf.float32,tf.random_normal_initializer(mean = 0.001,stddev=0.02))
        return tf.nn.relu(tf.matmul(input_,w))

In [6]:
def build_model_mlp(X_, _dropout, model_type='m1'):
    if model_type == 'm1':
        l1 = mlp(X_,784,128,name='l1')
        l1 = tf.nn.dropout(l1,_dropout)
        l2 = mlp(l1,128,128,name='l2')
        l2 = tf.nn.dropout(l2,_dropout)
        l3 = mlp(l2,128,128,name='l3')
        return l3
    elif model_type == 'm2':
        l1 = mlp(X_,784,128,name='l1')
        l1 = tf.nn.dropout(l1,_dropout)
        l2 = mlp(l1,128,128,name='l2')
        l2 = tf.nn.dropout(l2,_dropout)
        l3 = mlp(l2,128,128,name='l3')
        l3 = tf.nn.dropout(l3,_dropout)
        l4 = mlp(l3,128,128,name='l4')
        return l4

In [7]:
def contrastive_loss(y, d, batch_size):
    tmp = y *tf.square(d)
    #tmp= tf.mul(y,tf.square(d))
    tmp2 = (1-y) * tf.square(tf.maximum((1 - d),0))
    return tf.reduce_sum(tmp +tmp2)/batch_size/2

In [8]:
def compute_accuracy(prediction, labels):
    return labels[prediction.ravel() < 0.5].mean()
    #return tf.reduce_mean(labels[prediction.ravel() < 0.5])

In [9]:
def next_batch(s, e, inputs, labels):
    input1 = inputs[s:e, 0]
    input2 = inputs[s:e, 1]
    y= np.reshape(labels[s:e], (len(range(s, e)), 1))
    return input1, input2, y

In [10]:
def train_and_test(model1, model2, images_L, images_R):
    X_train = mnist.train._images
    y_train = mnist.train._labels
    X_test = mnist.test._images
    y_test = mnist.test._labels
    batch_size = 128
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = 0.001
    learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 10, 0.1,  staircase=True)

    # create training+test positive and negative pairs:
    digit_indices = [np.where(y_train == i)[0] for i in range(10)]
    tr_pairs, tr_y = create_pairs(X_train, digit_indices)
    digit_indices = [np.where(y_test == i)[0] for i in range(10)]
    te_pairs, te_y = create_pairs(X_test, digit_indices)
    labels = tf.placeholder(tf.float32,shape=([None,1]),name='gt')

    # set distance layer
    distance  = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(model1,model2),2),1,keep_dims=True))
    
    loss = contrastive_loss(labels, distance, batch_size)
    # contrastice loss:
    t_vars = tf.trainable_variables()
    d_vars  = [var for var in t_vars if 'l' in var.name]
    batch = tf.Variable(0)
    optimizer = tf.train.AdamOptimizer(learning_rate = 0.0001).minimize(loss)

    # Create a summary to monitor cost tensor
    tf.summary.scalar("loss", loss)
    # Create a summary to monitor accuracy tensor
    #tf.summary.scalar("accuracy", acc)
    # Merge all summaries into a single op
    merged_summary_op = tf.summary.merge_all()
    
    # Launch the graph:
    with tf.Session() as sess:
        #sess.run(init)
        tf.initialize_all_variables().run()
        summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
        # Training cycle:
        for epoch in range(10):
            avg_loss = 0.
            avg_acc = 0.
            total_batch = int(X_train.shape[0]/batch_size)
            start_time = time.time()
            # Loop over all batches:
            for i in range(total_batch):
                s = i * batch_size
                e = (i+1) * batch_size
                # Fit training using batch data:
                input1, input2, y = next_batch(s, e, tr_pairs, tr_y)
                _, loss_value, predict, summary = sess.run([optimizer, loss, distance, merged_summary_op], 
                                                           feed_dict={images_L:input1, images_R:input2, labels:y, dropout_f:0.9})
                feature1 = model1.eval(feed_dict={images_L:input1, dropout_f:0.9})
                feature2 = model2.eval(feed_dict={images_R:input2, dropout_f:0.9})
                tr_acc = compute_accuracy(predict, y)
                if math.isnan(tr_acc) and epoch != 0:
                    print('tr_acc %0.2f' % tr_acc)
                    
                avg_loss += loss_value
                avg_acc += tr_acc*100
            #print('epoch %d loss %0.2f' %(epoch,avg_loss/total_batch))
            duration = time.time() - start_time
            summary_writer.add_summary(summary, epoch)
            print('Epoch %d  time: %f loss %0.5f acc %0.2f' %(epoch, duration, avg_loss/(total_batch), avg_acc/total_batch))
            
        y = np.reshape(tr_y, (tr_y.shape[0], 1))
        predict = distance.eval(feed_dict={images_L:tr_pairs[:, 0], images_R:tr_pairs[:, 1], labels:y, dropout_f:1.0})
        tr_acc = compute_accuracy(predict, y)
        print('Accuracy on training set %0.2f' % (100 * tr_acc))

        # Test model:
        predict = distance.eval(feed_dict={images_L:te_pairs[:, 0], images_R:te_pairs[:, 1], labels:y, dropout_f:1.0})
        y = np.reshape(te_y, (te_y.shape[0], 1))
        te_acc = compute_accuracy(predict, y)
    print('Accuracy on test set %0.2f' % (100 * te_acc))

In [11]:
# Initializing the variables:
init = tf.global_variables_initializer()
logs_path = "./siamese-logs/"

images_L = tf.placeholder(tf.float32,shape=([None,784]),name='L')
images_R = tf.placeholder(tf.float32,shape=([None,784]),name='R')
dropout_f = tf.placeholder("float")

In [12]:
with tf.variable_scope("siamese_model1") as scope:
    m1_net_1 = build_model_mlp(images_L, dropout_f, 'm1')
    scope.reuse_variables()
    m1_net_2 = build_model_mlp(images_R, dropout_f, 'm1')
train_and_test(m1_net_1, m1_net_2, images_L, images_R)

Instructions for updating:
Use `tf.global_variables_initializer` instead.
Epoch 0  time: 11.759466 loss 0.09413 acc 70.20
Epoch 1  time: 11.815649 loss 0.06534 acc 80.72
Epoch 2  time: 11.868871 loss 0.05345 acc 86.05
Epoch 3  time: 11.806733 loss 0.04554 acc 89.41
Epoch 4  time: 12.173197 loss 0.03922 acc 91.60
Epoch 5  time: 11.856660 loss 0.03446 acc 93.25
Epoch 6  time: 11.631263 loss 0.03084 acc 94.24
Epoch 7  time: 11.995389 loss 0.02775 acc 95.09
Epoch 8  time: 11.895437 loss 0.02522 acc 95.87
Epoch 9  time: 11.898722 loss 0.02324 acc 96.48
Accuracy on training set 90.48
Accuracy on test set 90.48


In [10]:
with tf.variable_scope("siamese_model2") as scope:
    m2_net_1 = build_model_mlp(images_L, dropout_f, 'm2')
    scope.reuse_variables()
    m2_net_2 = build_model_mlp(images_R, dropout_f, 'm2')
train_and_test(m2_net_1, m2_net_2, images_L, images_R)

Instructions for updating:
Use `tf.global_variables_initializer` instead.
Epoch 0  time: 13.246230 loss 0.11343 acc 65.29
Epoch 1  time: 13.486138 loss 0.07500 acc 76.06
Epoch 2  time: 13.503401 loss 0.05956 acc 81.92
Epoch 3  time: 13.640658 loss 0.05192 acc 84.77
Epoch 4  time: 13.366747 loss 0.04579 acc 87.22
Epoch 5  time: 13.340147 loss 0.04150 acc 88.71
Epoch 6  time: 13.224046 loss 0.03855 acc 89.68
Epoch 7  time: 13.370544 loss 0.03569 acc 90.56
Epoch 8  time: 13.281561 loss 0.03354 acc 91.44
Epoch 9  time: 13.340416 loss 0.03152 acc 92.00
Accuracy on training set 80.09
Accuracy on test set 80.00
