In [1]:
import tensorflow as tf
from sklearn.svm import LinearSVC
import time

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Baseline functions
Helpful for checking that standard classification has high accuracy

In [3]:
def baseline_loss(features, classes):
    return tf.losses.sparse_softmax_cross_entropy(classes, features)
    
def baseline_acc(features, classes):
    predictions = tf.argmax(features, axis=1)
    return tf.reduce_mean(tf.cast(tf.equal(predictions, classes), tf.float32))

### Pairwise functions

In [4]:
def pair_split(features, classes):
    f1, f2 = tf.split(features, 2)
    c1, c2 = tf.split(classes, 2)
    return f1, f2, c1, c2

def pair_hinge_loss(features, classes):
    f1, f2, c1, c2 = pair_split(features, classes)
    inner_products = tf.reduce_sum(tf.multiply(f1, f2), axis=1)
    similarities = tf.sign(tf.cast(tf.equal(c1, c2), tf.float32) - 0.5)
    scores = tf.multiply(similarities, inner_products)
    return tf.reduce_mean(tf.maximum(1.0 - scores, 0))
    
def pair_log_loss(features, classes):
    f1, f2, c1, c2 = pair_split(features, classes)
    inner_products = tf.reduce_sum(tf.multiply(f1, f2), axis=1)
    similarities = tf.sign(tf.cast(tf.equal(c1, c2), tf.float32) - 0.5)
    scores = tf.multiply(similarities, inner_products)
    return tf.reduce_mean(tf.log1p(scores))

def pair_acc(features, classes):
    f1, f2, c1, c2 = pair_split(features, classes)
    predictions = tf.sign(tf.reduce_sum(tf.multiply(f1, f2), axis=1))
    similarities = tf.sign(tf.cast(tf.equal(c1, c2), tf.float32) - 0.5)
    return tf.reduce_mean(tf.cast(tf.equal(predictions, similarities), tf.float32))

### Triplet functions

In [5]:
def triplet_hinge_loss(features, classes):
    f1, f2, f3 = tf.split(features, 3)
    inner_products = tf.reduce_sum(tf.multiply(f1, f2 - f3), axis=1)
    return tf.reduce_mean(tf.maximum(1.0 - inner_products, 0))

def triplet_log_loss(features, classes):
    f1, f2, f3 = tf.split(features, 3)
    inner_products = tf.reduce_sum(tf.multiply(f1, f2 - f3), axis=1)
    return tf.reduce_mean(tf.log1p(-inner_products))

def triplet_acc(features, classes):
    f1, f2, f3 = tf.split(features, 3)
    inner_products = tf.reduce_sum(tf.multiply(f1, f2 - f3), axis=1)    
    return tf.reduce_mean(tf.cast(tf.greater(inner_products, 0), tf.float32))

### Helper functions for network

In [6]:
def conv2d(name, l_input, w, b, s):
    return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(l_input, w, strides=[1, s, s, 1], padding='SAME'),b), name=name)

def max_pool(name, l_input, k):
    return tf.nn.max_pool(l_input, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME', name=name)

def norm(name, l_input, lsize=4):
    return tf.nn.lrn(l_input, lsize, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name=name)

def fc_batch_relu(x, W, b):
    return tf.nn.relu(tf.layers.batch_normalization(tf.matmul(x, W) + b))

def reg_loss_fn(W):
    return tf.nn.l2_loss(W['wd1']) + tf.nn.l2_loss(W['wd2']) + tf.nn.l2_loss(W['out'])

### Model

In [7]:
class ConvModel(object):
    def __init__(self, x, y, num_features, loss_fn, acc_fn, lr, reg, dropout):
        """ init the model with hyper-parameters etc """
        self.x = x
        self.y = y
        self.num_features = num_features
        self.loss_fn = loss_fn
        self.acc_fn = acc_fn
        self.dropout = dropout        
        
        initializer = tf.contrib.layers.xavier_initializer(uniform=False)
        self.weights = {
            'wc1': tf.Variable(initializer([3, 3, 1, 16])),
            'wc2': tf.Variable(initializer([3, 3, 16, 32])),
            'wc3': tf.Variable(initializer([3, 3, 32, 64])),
            'wd1': tf.Variable(initializer([7*7*64, 1024])),
            'wd2': tf.Variable(initializer([1024, 128])),
            'out': tf.Variable(initializer([128, num_features]))
        }
        self.biases = {
            'bc1': tf.Variable(initializer([16])),
            'bc2': tf.Variable(initializer([32])),
            'bc3': tf.Variable(initializer([64])),
            'bd1': tf.Variable(initializer([1024])),
            'bd2': tf.Variable(initializer([128])),
            'out': tf.Variable(initializer([num_features]))
        }
        
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.optimizer = tf.train.AdamOptimizer(lr)
            
        self.features = self.feature_model()
        self.acc = acc_fn(self.features, self.y)
        self.loss = loss_fn(self.features, self.y) + reg * reg_loss_fn(self.weights)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.optimize = tf.train.AdamOptimizer(lr).minimize(self.loss)

    def feature_model(self):
        # Reshape input picture
        input = tf.reshape(self.x, shape=[-1, 28, 28, 1])

        conv1 = conv2d('conv1', input, self.weights['wc1'], self.biases['bc1'], 1)
        conv1 = tf.nn.dropout(conv1, self.dropout)

        conv2 = conv2d('conv2', conv1, self.weights['wc2'], self.biases['bc2'], 1)
        pool2 = max_pool('pool2', conv2, k=2)
        pool2 = tf.nn.dropout(pool2, self.dropout)

        conv3 = conv2d('conv3', pool2, self.weights['wc3'], self.biases['bc3'], 1)
        pool3 = max_pool('pool3', conv3, k=2)
        pool3 = tf.nn.dropout(pool3, self.dropout)

        dense1 = tf.reshape(pool3, [-1, self.weights['wd1'].get_shape().as_list()[0]])
        dense1 = fc_batch_relu(dense1, self.weights['wd1'], self.biases['bd1'])
        dense2 = fc_batch_relu(dense1, self.weights['wd2'], self.biases['bd2'])

        out = tf.matmul(dense2, self.weights['out']) + self.biases['out']
        return out

In [8]:
class ConvModelSmall(object):
    def __init__(self, x, y, num_features, loss_fn, acc_fn, lr, reg, dropout):
        """ init the model with hyper-parameters etc """
        self.x = x
        self.y = y
        self.num_features = num_features
        self.loss_fn = loss_fn
        self.acc_fn = acc_fn
        self.dropout = dropout        
        
        initializer = tf.contrib.layers.xavier_initializer(uniform=False)
        self.weights = {
            'wc1': tf.Variable(initializer([3, 3, 1, 16])),
            'wc2': tf.Variable(initializer([3, 3, 16, 32])),
            'wd1': tf.Variable(initializer([7*7*32, 512])),
            'wd2': tf.Variable(initializer([512, 128])),
            'out': tf.Variable(initializer([128, num_features]))
        }
        self.biases = {
            'bc1': tf.Variable(initializer([16])),
            'bc2': tf.Variable(initializer([32])),
            'bd1': tf.Variable(initializer([512])),
            'bd2': tf.Variable(initializer([128])),
            'out': tf.Variable(initializer([num_features]))
        }
        
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.optimizer = tf.train.AdamOptimizer(lr)
            
        self.features = self.feature_model()
        self.acc = acc_fn(self.features, self.y)
        self.loss = loss_fn(self.features, self.y) + reg * reg_loss_fn(self.weights)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.optimize = tf.train.AdamOptimizer(lr).minimize(self.loss)

    def feature_model(self):
        # Reshape input picture
        input = tf.reshape(self.x, shape=[-1, 28, 28, 1])

        conv1 = conv2d('conv1', input, self.weights['wc1'], self.biases['bc1'], 2)
        conv1 = tf.nn.dropout(conv1, self.dropout)
        conv2 = conv2d('conv2', conv1, self.weights['wc2'], self.biases['bc2'], 2)
        conv2 = tf.nn.dropout(conv2, self.dropout)
        dense1 = tf.reshape(conv2, [-1, self.weights['wd1'].get_shape().as_list()[0]])
        dense1 = fc_batch_relu(dense1, self.weights['wd1'], self.biases['bd1'])
        dense2 = fc_batch_relu(dense1, self.weights['wd2'], self.biases['bd2'])

        out = tf.matmul(dense2, self.weights['out']) + self.biases['out']
        return out

In [9]:
class ConvModelSmaller(object):
    def __init__(self, x, y, num_features, loss_fn, acc_fn, lr, reg, dropout):
        """ init the model with hyper-parameters etc """
        self.x = x
        self.y = y
        self.num_features = num_features
        self.loss_fn = loss_fn
        self.acc_fn = acc_fn
        self.dropout = dropout        
        
        initializer = tf.contrib.layers.xavier_initializer(uniform=False)
        self.weights = {
            'wc1': tf.Variable(initializer([3, 3, 1, 16])),
            'wc2': tf.Variable(initializer([3, 3, 16, 16])),
            'wc3': tf.Variable(initializer([3, 3, 16, 32])),
            'wd1': tf.Variable(initializer([4*4*32, 256])),
            'wd2': tf.Variable(initializer([256, 64])),
            'out': tf.Variable(initializer([64, num_features]))
        }
        self.biases = {
            'bc1': tf.Variable(initializer([16])),
            'bc2': tf.Variable(initializer([16])),
            'bc3': tf.Variable(initializer([32])),
            'bd1': tf.Variable(initializer([256])),
            'bd2': tf.Variable(initializer([64])),
            'out': tf.Variable(initializer([num_features]))
        }
        
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.optimizer = tf.train.AdamOptimizer(lr)
            
        self.features = self.feature_model()
        self.acc = acc_fn(self.features, self.y)
        self.loss = loss_fn(self.features, self.y) + reg * reg_loss_fn(self.weights)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.optimize = tf.train.AdamOptimizer(lr).minimize(self.loss)

    def feature_model(self):
        # Reshape input picture
        input = tf.reshape(self.x, shape=[-1, 28, 28, 1])

        conv1 = conv2d('conv1', input, self.weights['wc1'], self.biases['bc1'], 2)
        conv1 = tf.nn.dropout(conv1, self.dropout)
        conv2 = conv2d('conv2', conv1, self.weights['wc2'], self.biases['bc2'], 2)
        conv2 = tf.nn.dropout(conv2, self.dropout)
        conv3 = conv2d('conv3', conv2, self.weights['wc3'], self.biases['bc3'], 2)
        conv3 = tf.nn.dropout(conv3, self.dropout)

        dense1 = tf.reshape(conv3, [-1, self.weights['wd1'].get_shape().as_list()[0]])
        dense1 = fc_batch_relu(dense1, self.weights['wd1'], self.biases['bd1'])
        dense2 = fc_batch_relu(dense1, self.weights['wd2'], self.biases['bd2'])

        out = tf.matmul(dense2, self.weights['out']) + self.biases['out']
        return out

In [10]:
def test_model(model, loss_fn, acc_fn, num_features, num_steps, lr, keep_prob, reg):

    x = tf.placeholder(tf.float32, shape=[None, 784])
    y = tf.placeholder(tf.int64, shape=[None])
    dropout = tf.placeholder(tf.float32)
    model = model(x, y, num_features, loss_fn, acc_fn, lr, reg, dropout)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        print("begin training // num_features: %g, lr: %g, reg: %g"%(num_features, lr, reg))
        train_time = time.time()
        for step in range(num_steps):
            x_, y_ = mnist.train.next_batch(200)     
            sess.run(model.optimize, feed_dict={x:x_, y:y_, dropout:keep_prob})
            
            if step % 1000 == 0:
                train_loss = sess.run(model.loss, feed_dict={x:x_, y:y_, dropout: 1.0})                
                train_error = 1 - sess.run(model.acc, feed_dict={x:x_, y:y_, dropout: 1.0})
                
                x_, y_ = mnist.test.next_batch(1000)
                test_loss = sess.run(model.loss, feed_dict={x:x_, y:y_, dropout: 1.0})                
                test_error = 1 - sess.run(model.acc, feed_dict={x:x_, y:y_, dropout: 1.0})
                print("\tstep %d: train loss %g, train error %g, test loss %g, test error %g"%
                      (step, train_loss, train_error, test_loss, test_error))  
                
                svc = LinearSVC(random_state=0)
                f1 = sess.run(model.features, feed_dict={x:mnist.train.images, y:mnist.train.labels, dropout: 1.0}) 
                svc.fit(f1, mnist.train.labels)
                f2 = sess.run(model.features, feed_dict={x:mnist.test.images, y:mnist.test.labels, dropout: 1.0})
                print("\tclassification accuracy: {:.4f}".format(svc.score(f2, mnist.test.labels)))
        train_time = time.time() - train_time
        print("end training // time elapsed: %.4f s"%(train_time))
        
        eval_test_time = time.time()
        x_, y_ = mnist.test.next_batch(1000)
        test_error = 1 - sess.run(model.acc, feed_dict={x:x_, y:y_, dropout:1.0})
        eval_test_time = time.time() - eval_test_time
        print("test set error: %.4f // time elapsed: %.4f s"%(test_error, eval_test_time))
        
        svc = LinearSVC(random_state=0)
        f1 = sess.run(model.features, feed_dict={x:mnist.train.images, y:mnist.train.labels, dropout: 1.0}) 
        svc.fit(f1, mnist.train.labels)
        f2 = sess.run(model.features, feed_dict={x:mnist.test.images, y:mnist.test.labels, dropout: 1.0})
        print("classification accuracy: {:.4f}".format(svc.score(f2, mnist.test.labels)))

# Current experiments (uniform)

In [11]:
test_model(model=ConvModelSmaller, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=10, num_steps=20000, lr=0.0001, keep_prob=0.8, reg=0)

begin training // num_features: 10, lr: 0.0001, reg: 0
	step 0: train loss 3.07616, train error 0.88, test loss 3.18369, test error 0.91
	classification accuracy: 0.4628
	step 1000: train loss 1.00625, train error 0.92, test loss 1.00607, test error 0.926
	classification accuracy: 0.4631
	step 2000: train loss 1.00508, train error 0.8, test loss 1.00558, test error 0.818
	classification accuracy: 0.5554
	step 3000: train loss 1.04824, train error 0.53, test loss 0.89831, test error 0.42
	classification accuracy: 0.7421
	step 4000: train loss 0.74638, train error 0.3, test loss 0.776152, test error 0.332
	classification accuracy: 0.7948
	step 5000: train loss 0.63737, train error 0.23, test loss 0.590983, test error 0.198
	classification accuracy: 0.8082
	step 6000: train loss 0.701635, train error 0.2, test loss 0.620889, test error 0.194
	classification accuracy: 0.8058
	step 7000: train loss 0.634198, train error 0.23, test loss 0.53412, test error 0.168
	classification accuracy: 0.8

In [13]:
test_model(model=ConvModelSmaller, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=20, num_steps=20000, lr=0.0001, keep_prob=0.8, reg=0)

begin training // num_features: 20, lr: 0.0001, reg: 0
	step 0: train loss 2.70419, train error 0.9, test loss 2.58948, test error 0.864
	classification accuracy: 0.6301
	step 1000: train loss 1.01106, train error 0.86, test loss 1.01179, test error 0.91
	classification accuracy: 0.6387
	step 2000: train loss 1.00765, train error 0.89, test loss 1.00763, test error 0.888
	classification accuracy: 0.6924
	step 3000: train loss 0.997378, train error 0.52, test loss 0.916495, test error 0.444
	classification accuracy: 0.7816
	step 4000: train loss 0.756259, train error 0.28, test loss 0.853314, test error 0.384
	classification accuracy: 0.7976
	step 5000: train loss 0.776865, train error 0.29, test loss 0.620479, test error 0.246
	classification accuracy: 0.8369
	step 6000: train loss 0.541543, train error 0.21, test loss 0.64115, test error 0.226
	classification accuracy: 0.8626
	step 7000: train loss 0.518648, train error 0.14, test loss 0.563742, test error 0.22
	classification accurac

# ConvModelSmaller experiments

Halving the learning rate isn't helpful. Smaller convnet gets reasonably good results: 0.9146 accuracy on 10 features, 0.9508 on 20 features.

In [14]:
test_model(model=ConvModelSmaller, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=10, num_steps=20000, lr=0.0001, keep_prob=0.8, reg=0)

begin training // num_features: 10, lr: 0.0001, reg: 0
	step 0: train loss 3.13961, train error 0.86, test loss 3.36935, test error 0.92
	classification accuracy: 0.4942
	step 1000: train loss 1.00971, train error 0.88, test loss 1.00981, test error 0.894
	classification accuracy: 0.5608
	step 2000: train loss 1.00426, train error 0.73, test loss 1.00526, test error 0.714
	classification accuracy: 0.6266
	step 3000: train loss 0.920731, train error 0.36, test loss 0.83678, test error 0.366
	classification accuracy: 0.7437
	step 4000: train loss 0.601978, train error 0.22, test loss 0.763397, test error 0.316
	classification accuracy: 0.7906
	step 5000: train loss 0.761131, train error 0.28, test loss 0.642095, test error 0.248
	classification accuracy: 0.8248
	step 6000: train loss 0.464199, train error 0.16, test loss 0.572754, test error 0.204
	classification accuracy: 0.8437
	step 7000: train loss 0.649026, train error 0.23, test loss 0.495997, test error 0.168
	classification accur

In [12]:
test_model(model=ConvModelSmaller, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=10, num_steps=20000, lr=0.00005, keep_prob=0.8, reg=0)

begin training // num_features: 10, lr: 5e-05, reg: 0
	step 0: train loss 2.18399, train error 0.87, test loss 2.20302, test error 0.878
	classification accuracy: 0.5234
	step 1000: train loss 1.00991, train error 0.89, test loss 1.01056, test error 0.904
	classification accuracy: 0.5251
	step 2000: train loss 1.006, train error 0.81, test loss 1.00606, test error 0.812
	classification accuracy: 0.5679
	step 3000: train loss 0.997809, train error 0.45, test loss 0.924546, test error 0.424
	classification accuracy: 0.6992
	step 4000: train loss 0.831077, train error 0.37, test loss 0.868367, test error 0.426
	classification accuracy: 0.7383
	step 5000: train loss 0.778174, train error 0.34, test loss 0.882081, test error 0.374
	classification accuracy: 0.7817
	step 6000: train loss 0.890469, train error 0.34, test loss 0.845914, test error 0.364
	classification accuracy: 0.8015
	step 7000: train loss 0.548851, train error 0.22, test loss 0.724886, test error 0.282
	classification accura

In [None]:
test_model(model=ConvModelSmaller, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=20, num_steps=20000, lr=0.0001, keep_prob=0.8, reg=0)

begin training // num_features: 20, lr: 0.0001, reg: 0
	step 0: train loss 1.88371, train error 0.9, test loss 1.82962, test error 0.878
	classification accuracy: 0.5797
	step 1000: train loss 1.01715, train error 0.92, test loss 1.01732, test error 0.906
	classification accuracy: 0.6039
	step 2000: train loss 0.903325, train error 0.4, test loss 0.918699, test error 0.48
	classification accuracy: 0.7637
	step 3000: train loss 0.688052, train error 0.29, test loss 0.802041, test error 0.336
	classification accuracy: 0.8151
	step 4000: train loss 0.692119, train error 0.29, test loss 0.570496, test error 0.244
	classification accuracy: 0.8355
	step 5000: train loss 0.418868, train error 0.17, test loss 0.575287, test error 0.212
	classification accuracy: 0.8603
	step 6000: train loss 0.43912, train error 0.19, test loss 0.541096, test error 0.148
	classification accuracy: 0.8616
	step 7000: train loss 0.335555, train error 0.1, test loss 0.42836, test error 0.152
	classification accurac

In [None]:
test_model(model=ConvModelSmaller, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=20, num_steps=20000, lr=0.00005, keep_prob=0.8, reg=0)

begin training // num_features: 20, lr: 5e-05, reg: 0
	step 0: train loss 2.1085, train error 0.93, test loss 2.05052, test error 0.902
	classification accuracy: 0.5347
	step 1000: train loss 1.0293, train error 0.95, test loss 1.0279, test error 0.916
	classification accuracy: 0.5777
	step 2000: train loss 1.02165, train error 0.89, test loss 1.02285, test error 0.904
	classification accuracy: 0.5938


# ConvModelSmall experiments

It appears that dropout rates matter much less. With a smaller model, we also seem to learn more quickly or learn better.
Keep prob: 0.9
    20 features: 0.9785, 0.0100
    10features: 0.9754, 0.0280
Keep prob: 0.8
    10 features: 0.9559, 0.0380
    20 features: 0.9783, 0.0240

In [12]:
test_model(model=ConvModelSmall, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=10, num_steps=20000, lr=0.0001, keep_prob=0.9, reg=0)

begin training
	step 0: train loss 1.43393, train error 0.89, test loss 1.42729, test error 0.886
	classification accuracy: 0.4344
	step 1000: train loss 0.765165, train error 0.31, test loss 0.861398, test error 0.364
	classification accuracy: 0.7969
	step 2000: train loss 0.761658, train error 0.27, test loss 0.636762, test error 0.22
	classification accuracy: 0.8368
	step 3000: train loss 0.383273, train error 0.16, test loss 0.48594, test error 0.164
	classification accuracy: 0.8262
	step 4000: train loss 0.486939, train error 0.18, test loss 0.428506, test error 0.15
	classification accuracy: 0.8262
	step 5000: train loss 0.386481, train error 0.09, test loss 0.341318, test error 0.106
	classification accuracy: 0.8561
	step 6000: train loss 0.272224, train error 0.07, test loss 0.474743, test error 0.1
	classification accuracy: 0.8584
	step 7000: train loss 0.247179, train error 0.1, test loss 0.330533, test error 0.1
	classification accuracy: 0.8831
	step 8000: train loss 0.23922

In [11]:
test_model(model=ConvModelSmall, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=20, num_steps=20000, lr=0.0001, keep_prob=0.9, reg=0)

begin training
	step 0: train loss 2.45935, train error 0.9, test loss 2.44679, test error 0.89
	classification accuracy: 0.6239
	step 1000: train loss 0.822141, train error 0.25, test loss 0.701273, test error 0.27
	classification accuracy: 0.8474
	step 2000: train loss 0.536236, train error 0.16, test loss 0.559991, test error 0.176
	classification accuracy: 0.8884
	step 3000: train loss 0.451835, train error 0.13, test loss 0.387406, test error 0.116
	classification accuracy: 0.8964
	step 4000: train loss 0.287364, train error 0.09, test loss 0.384703, test error 0.128
	classification accuracy: 0.9106
	step 5000: train loss 0.351404, train error 0.09, test loss 0.254328, test error 0.078
	classification accuracy: 0.9162
	step 6000: train loss 0.159584, train error 0.07, test loss 0.254165, test error 0.084
	classification accuracy: 0.9254
	step 7000: train loss 0.143335, train error 0.04, test loss 0.201902, test error 0.056
	classification accuracy: 0.9370
	step 8000: train loss 0.

In [13]:
test_model(model=ConvModelSmall, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=10, num_steps=20000, lr=0.0001, keep_prob=0.8, reg=0)

begin training
	step 0: train loss 2.01089, train error 0.9, test loss 1.93519, test error 0.866
	classification accuracy: 0.4117
	step 1000: train loss 1.0546, train error 0.53, test loss 0.917702, test error 0.436
	classification accuracy: 0.6805
	step 2000: train loss 0.658991, train error 0.24, test loss 0.788234, test error 0.284
	classification accuracy: 0.8004
	step 3000: train loss 0.73328, train error 0.28, test loss 0.644832, test error 0.236
	classification accuracy: 0.8276
	step 4000: train loss 0.587281, train error 0.17, test loss 0.455392, test error 0.138
	classification accuracy: 0.8088
	step 5000: train loss 0.453873, train error 0.17, test loss 0.359427, test error 0.098
	classification accuracy: 0.8352
	step 6000: train loss 0.3686, train error 0.15, test loss 0.309451, test error 0.094
	classification accuracy: 0.8547
	step 7000: train loss 0.326878, train error 0.12, test loss 0.392441, test error 0.116
	classification accuracy: 0.8711
	step 8000: train loss 0.377

In [17]:
test_model(model=ConvModelSmall, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=20, num_steps=20000, lr=0.0001, keep_prob=0.8, reg=0)

begin training
	step 0: train loss 1.88991, train error 0.93, test loss 1.84976, test error 0.912
	classification accuracy: 0.5970
	step 1000: train loss 0.983534, train error 0.44, test loss 0.848343, test error 0.354
	classification accuracy: 0.8087
	step 2000: train loss 0.708855, train error 0.25, test loss 0.659009, test error 0.228
	classification accuracy: 0.8661
	step 3000: train loss 0.443177, train error 0.15, test loss 0.475632, test error 0.156
	classification accuracy: 0.8837
	step 4000: train loss 0.354823, train error 0.09, test loss 0.30488, test error 0.112
	classification accuracy: 0.9022
	step 5000: train loss 0.228264, train error 0.08, test loss 0.331379, test error 0.092
	classification accuracy: 0.9243
	step 6000: train loss 0.241761, train error 0.07, test loss 0.268142, test error 0.086
	classification accuracy: 0.9345
	step 7000: train loss 0.279377, train error 0.08, test loss 0.307102, test error 0.068
	classification accuracy: 0.9404
	step 8000: train loss 

# ConvModel experiments

Regularization is harming accuracy and loss. Probably using too much before. 
20 features:
    0.5 dropout: 79.56% classification accuracy and 7.80% similarity error
    0.8 dropout: 98.83% classification accuracy and 0.4% similarily error
10 features: 
    0.8 dropout: 90.48% classification accuracy and 3.0% similarity error

In [9]:
test_model(model=ConvModel, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=10, num_steps=20000, lr=0.0001, keep_prob=0.8, reg=0)

begin training
	step 0: train loss 1.43683, train error 0.91, test loss 1.42038, test error 0.896
	classification accuracy: 0.5842
	step 1000: train loss 0.773272, train error 0.3, test loss 0.830149, test error 0.304
	classification accuracy: 0.7427
	step 2000: train loss 0.448876, train error 0.14, test loss 0.50523, test error 0.184
	classification accuracy: 0.8478
	step 3000: train loss 0.284996, train error 0.1, test loss 0.426906, test error 0.122
	classification accuracy: 0.8194
	step 4000: train loss 0.471221, train error 0.19, test loss 0.356038, test error 0.126
	classification accuracy: 0.8103
	step 5000: train loss 0.267139, train error 0.1, test loss 0.274423, test error 0.094
	classification accuracy: 0.7827
	step 6000: train loss 0.282087, train error 0.11, test loss 0.27891, test error 0.104
	classification accuracy: 0.8253
	step 7000: train loss 0.245154, train error 0.1, test loss 0.279555, test error 0.1
	classification accuracy: 0.8371
	step 8000: train loss 0.20394

In [10]:
test_model(model=ConvModel, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=20, num_steps=20000, lr=0.0001, keep_prob=0.8, reg=0)

begin training
	step 0: train loss 1.53103, train error 0.93, test loss 1.50622, test error 0.912
	classification accuracy: 0.7057
	step 1000: train loss 0.853747, train error 0.44, test loss 0.860823, test error 0.392
	classification accuracy: 0.8366
	step 2000: train loss 0.686833, train error 0.16, test loss 0.514372, test error 0.182
	classification accuracy: 0.8904
	step 3000: train loss 0.38264, train error 0.14, test loss 0.339215, test error 0.114
	classification accuracy: 0.9215
	step 4000: train loss 0.469941, train error 0.17, test loss 0.417657, test error 0.094
	classification accuracy: 0.9448
	step 5000: train loss 0.184857, train error 0.07, test loss 0.244433, test error 0.052
	classification accuracy: 0.9541
	step 6000: train loss 0.130037, train error 0.05, test loss 0.145431, test error 0.062
	classification accuracy: 0.9670
	step 7000: train loss 0.0942735, train error 0.02, test loss 0.0868136, test error 0.026
	classification accuracy: 0.9717
	step 8000: train los

In [127]:
test_model(model=ConvModel, loss_fn=pair_hinge_loss, acc_fn=pair_acc, 
           num_features=20, num_steps=20000, lr=0.0001, keep_prob=0.5, reg=0)

begin training
	step 0: train loss 1.72427, train error 0.91, test loss 1.65946, test error 0.872
	classification accuracy: 0.7294
	step 1000: train loss 1.26688, train error 0.86, test loss 1.30334, test error 0.908
	classification accuracy: 0.6746
	step 2000: train loss 1.35572, train error 0.89, test loss 1.35587, test error 0.88
	classification accuracy: 0.7566
	step 3000: train loss 1.02514, train error 0.47, test loss 1.00683, test error 0.488
	classification accuracy: 0.8013
	step 4000: train loss 0.626289, train error 0.19, test loss 0.715968, test error 0.238
	classification accuracy: 0.7764
	step 5000: train loss 0.605368, train error 0.19, test loss 0.491022, test error 0.148
	classification accuracy: 0.7822
	step 6000: train loss 0.313229, train error 0.1, test loss 0.376109, test error 0.136
	classification accuracy: 0.8110
	step 7000: train loss 0.327111, train error 0.08, test loss 0.365875, test error 0.126
	classification accuracy: 0.8133
	step 8000: train loss 0.43922