In [1]:
import random
import numpy as np
import time
import tensorflow as tf 
from tensorflow.examples.tutorials.mnist import input_data
import math
import pdb

#mnist = input_data.read_data_sets("/tmp/data",one_hot=False)
mnist = input_data.read_data_sets('data/fashion')

Extracting data/fashion/train-images-idx3-ubyte.gz
Extracting data/fashion/train-labels-idx1-ubyte.gz
Extracting data/fashion/t10k-images-idx3-ubyte.gz
Extracting data/fashion/t10k-labels-idx1-ubyte.gz


In [2]:
def create_pairs(x, digit_indices):
    '''Positive and negative pair creation.
    Alternates between positive and negative pairs.
    '''
    pairs = []
    labels = []
    n = min([len(digit_indices[d]) for d in range(10)]) - 1
    for d in range(10):
        for i in range(n):
            z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
            pairs += [[x[z1], x[z2]]]
            inc = random.randrange(1, 10)
            dn = (d + inc) % 10
            z1, z2 = digit_indices[d][i], digit_indices[dn][i]
            pairs += [[x[z1], x[z2]]]
            labels += [1, 0]
    return np.array(pairs), np.array(labels)

In [3]:
def mlp(input_, input_dim, output_dim, name="mlp"):
    with tf.variable_scope(name):
        w = tf.get_variable('w',[input_dim,output_dim],tf.float32,tf.random_normal_initializer(mean = 0.001,stddev=0.02))
        return tf.nn.relu(tf.matmul(input_,w))

In [4]:
def mlpnet(image, _dropout):
    l1 = mlp(image,784,128,name='l1')
    l1 = tf.nn.dropout(l1,_dropout)
    l2 = mlp(l1,128,128,name='l2')
    l2 = tf.nn.dropout(l2,_dropout)
    l3 = mlp(l2,128,128,name='l3')
    return l3

In [5]:
def build_model_mlp(X_, _dropout):
    model = mlpnet(X_, _dropout)
    return model

In [6]:
def contrastive_loss(y, d):
    tmp= y *tf.square(d)
    #tmp= tf.mul(y,tf.square(d))
    tmp2 = (1-y) *tf.square(tf.maximum((1 - d),0))
    return tf.reduce_sum(tmp +tmp2)/batch_size/2

In [7]:
def compute_accuracy(prediction, labels):
    return labels[prediction.ravel() < 0.5].mean()
    #return tf.reduce_mean(labels[prediction.ravel() < 0.5])

In [8]:
def accuracy(prediction, labels):
    return labels[tf.Session().run(tf.reshape(prediction, [-1]) < 0.5)].mean()
    
    #return tf.reduce_mean(labels[prediction.ravel() < 0.5])

In [9]:
def next_batch(s, e, inputs, labels):
    input1 = inputs[s:e, 0]
    input2 = inputs[s:e, 1]
    y= np.reshape(labels[s:e], (len(range(s, e)), 1))
    return input1, input2, y

In [10]:
# Initializing the variables:
init = tf.global_variables_initializer()
logs_path = "./siamese-logs/"

# the data, shuffled and split between train and test sets:
X_train = mnist.train._images
y_train = mnist.train._labels
X_test = mnist.test._images
y_test = mnist.test._labels
batch_size = 128
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.001
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 10, 0.1,  staircase=True)

# create training+test positive and negative pairs:
digit_indices = [np.where(y_train == i)[0] for i in range(10)]
tr_pairs, tr_y = create_pairs(X_train, digit_indices)
digit_indices = [np.where(y_test == i)[0] for i in range(10)]
te_pairs, te_y = create_pairs(X_test, digit_indices)

images_L = tf.placeholder(tf.float32,shape=([None,784]),name='L')
images_R = tf.placeholder(tf.float32,shape=([None,784]),name='R')
labels = tf.placeholder(tf.float32,shape=([None,1]),name='gt')
dropout_f = tf.placeholder("float")

with tf.variable_scope("siamese") as scope:
    model1 = build_model_mlp(images_L, dropout_f)
    scope.reuse_variables()
    model2 = build_model_mlp(images_R, dropout_f)

distance  = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(model1,model2),2),1,keep_dims=True))
loss = contrastive_loss(labels, distance)
acc = accuracy(distance, np.reshape(tr_y, (tr_y.shape[0], 1)))



# contrastice loss:
t_vars = tf.trainable_variables()
d_vars  = [var for var in t_vars if 'l' in var.name]
batch = tf.Variable(0)
optimizer = tf.train.AdamOptimizer(learning_rate = 0.0001).minimize(loss)
#optimizer = tf.train.RMSPropOptimizer(0.0001,momentum=0.9,epsilon=1e-6).minimize(loss)

# Create a summary to monitor cost tensor
tf.summary.scalar("loss", loss)
# Create a summary to monitor accuracy tensor
tf.summary.scalar("accuracy", acc)
# Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()

# Launch the graph:
with tf.Session() as sess:
    #sess.run(init)
    tf.initialize_all_variables().run()
    summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
    # Training cycle:
    for epoch in range(30):
        avg_loss = 0.
        avg_acc = 0.
        total_batch = int(X_train.shape[0]/batch_size)
        start_time = time.time()
        # Loop over all batches:
        for i in range(total_batch):
            s = i * batch_size
            e = (i+1) * batch_size
            # Fit training using batch data:
            input1, input2, y = next_batch(s, e, tr_pairs, tr_y)
            _, loss_value, predict, summary = sess.run([optimizer, loss, distance, merged_summary_op], feed_dict={images_L:input1, images_R:input2, labels:y, dropout_f:0.9})
            feature1 = model1.eval(feed_dict={images_L:input1, dropout_f:0.9})
            feature2 = model2.eval(feed_dict={images_R:input2, dropout_f:0.9})
            tr_acc = compute_accuracy(predict, y)
            if math.isnan(tr_acc) and epoch != 0:
                print('tr_acc %0.2f' % tr_acc)
                pdb.set_trace()
            avg_loss += loss_value
            avg_acc += tr_acc*100
        #print('epoch %d loss %0.2f' %(epoch,avg_loss/total_batch))
        duration = time.time() - start_time
        summary_writer.add_summary(summary, epoch)
        print('Epoch %d  time: %f loss %0.5f acc %0.2f' %(epoch, duration, avg_loss/(total_batch), avg_acc/total_batch))
    y = np.reshape(tr_y, (tr_y.shape[0], 1))
    predict = distance.eval(feed_dict={images_L:tr_pairs[:, 0], images_R:tr_pairs[:, 1], labels:y, dropout_f:1.0})
    tr_acc = compute_accuracy(predict, y)
    print('Accuracy on training set %0.2f' % (100 * tr_acc))

    # Test model:
    predict = distance.eval(feed_dict={images_L:te_pairs[:, 0], images_R:te_pairs[:, 1], labels:y, dropout_f:1.0})
    y = np.reshape(te_y, (te_y.shape[0], 1))
    te_acc = compute_accuracy(predict, y)
print('Accuracy on test set %0.2f' % (100 * te_acc))

InvalidArgumentError: You must feed a value for placeholder tensor 'L' with dtype float and shape [?,784]
	 [[Node: L = Placeholder[dtype=DT_FLOAT, shape=[?,784], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'L', defined at:
  File "/opt/conda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/opt/conda/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/opt/conda/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/opt/conda/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/opt/conda/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/opt/conda/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-10-89475c4634a4>", line 21, in <module>
    images_L = tf.placeholder(tf.float32,shape=([None,784]),name='L')
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1548, in placeholder
    return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 2094, in _placeholder
    name=name)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'L' with dtype float and shape [?,784]
	 [[Node: L = Placeholder[dtype=DT_FLOAT, shape=[?,784], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]


In [None]:
mnist.train._labels