In [None]:
caffe_root = '../'  # this file should be run from {caffe_root}/examples (otherwise change this line)

import sys
sys.path.insert(0, caffe_root + 'python')
sys.path.insert(0, caffe_root + 'examples/tripletloss')
import caffe

In [None]:
%%writefile tripletloss/tripletloss_layer.py
import caffe
import numpy as np


class TripletLossLayer(caffe.Layer):
    """
    Compute the Triplet Loss based on the Google's FaceNet paper.
    """

    def setup(self, bottom, top):
        # check if input pair is a triplet
        if len(bottom) != 3:
            raise Exception("Need three inputs to compute triplet loss. The bottom length was {}".format(len(bottom)))
            
        params = eval(self.param_str)
        try:
            self.margin = float(params['margin'])
        except:
            self.margin = 1.0

    def reshape(self, bottom, top):
        # check input shapes match
        if bottom[0].count != bottom[1].count or bottom[1].count != bottom[2].count:
            raise Exception("Inputs must have the same dimension.")
        # differences are shape of inputs
        self.diff_pos = np.zeros_like(bottom[0].data, dtype=np.float32)
        self.diff_neg = np.zeros_like(bottom[0].data, dtype=np.float32)
        # normalize
        self.norm_anc = self.normalize(bottom[0].data)
        print('norm_anc = {}'.format(self.norm_anc))
        self.norm_pos = self.normalize(bottom[1].data)
        print('norm_pos = {}'.format(self.norm_pos))
        self.norm_neg = self.normalize(bottom[2].data)
        print('norm_neg = {}'.format(self.norm_neg))
        # loss
        self.batch_size = bottom[0].count / len(bottom)
        print('batch_size = {}'.format(self.batch_size))
        self.loss = np.zeros(self.batch_size, dtype=np.float32)
        # loss output is scalar
        top[0].reshape(1)
        
    def normalize(self, array):
        # ||f(x)||_2=1
        l2 = np.linalg.norm(array, ord=2, axis=1, keepdims=True)
        # avoid to devide by zero
        l2[l2==0] = 1
        return array / l2

    def forward(self, bottom, top):
        """ computes a loss
        Note that the Loss is not averaged by the number of triplet sets.
        Loss = SUM[i->N](Di_pos - Di_neg + margin), 0 <= i <= N(the batch size)
        Dpos = sqrt(L2(IMGi_acr - IMGi_pos))
        Dneg = sqrt(L2(IMGi_acr - IMGi_neg))
        """
        
        self.diff_pos[...] = self.norm_anc - self.norm_pos
        print('diff_pos = {}'.format(self.diff_pos))
        self.diff_neg[...] = self.norm_anc - self.norm_neg
        print('diff_neg = {}'.format(self.diff_neg))
        dist_pos = np.sum(self.diff_pos**2, axis=1)
        print('dist_pos = {}'.format(dist_pos))
        dist_neg = np.sum(self.diff_neg**2, axis=1)
        print('dist_neg = {}'.format(dist_neg))
        # calculate a loss for each item
        for i in range(self.batch_size):
            loss = dist_pos[i] - dist_neg[i] + self.margin
            print('loss[{}] = {}'.format(i, loss))
            self.loss[i] = max(0, loss)
        total_loss = np.sum(self.loss)
        print('total loss = {}, mini_batch_size={}'.format(total_loss, self.batch_size))
        top[0].data[...] = total_loss / self.batch_size

    def backward(self, top, propagate_down, bottom):
        """ computes a gradient w.r.t. each IMG
        dL/dDorg = SUM[i->N]{2(IMGi_neg - IMGi_pos)} if Lossi > 0 else 0
        dL/dDpos = SUM[i->N](-2(IMGi_anc - IMGi_pos)) if Lossi > 0 else 0
        dL/dDneg = SUM[i->N](2(IMGi_anc - IMGi_neg)) if Lossi > 0 else 0
        """
        # gradient w.r.t. Dorg
        diff_org = self.norm_neg - self.norm_pos
        for i in range(self.batch_size):
            if self.loss[i] == 0:
                diff_org[i] = 0
        bottom[0].diff[...] = 2 * diff_org
        print('org diff = {}'.format(bottom[0].diff))
        
        # gradient w.r.t. Dpos
        for i in range(self.batch_size):
            self.diff_pos[i] = 0
        bottom[1].diff[...] = -2 * self.diff_pos
        print('pos diff = {}'.format(bottom[1].diff))
        
        # gradient w.r.t. Dneg
        for i in range(self.batch_size):
            self.diff_neg[i] = 0
        bottom[2].diff[...] = 2 * self.diff_neg
        print('neg diff = {}'.format(bottom[2].diff))

# Hello Test Forward

In [None]:
import tempfile
import numpy as np
from caffe import layers as L

def load_net(net_proto):
    f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
    f.write(str(net_proto))
    f.close()
    return caffe.Net(f.name, caffe.TEST)

def example_network(batch_size):
    n = caffe.NetSpec()

    # we use the dummy data layer to control the 
    # shape of the inputs to the layer we are testing
    ip_dims = [3*batch_size, 3]
    label_dims = [batch_size]
    n.ip, n.label = L.DummyData(shape=[dict(dim=ip_dims),dict(dim=label_dims)],
                                        transform_param=dict(scale=1.0/255.0),
                                        ntop=2)
    
    n.slice_anc, n.slice_pos, n.slice_neg = L.Slice(n.ip, slice_param=dict(axis=0), ntop=3)
    
    n.triplet = L.Python(n.slice_anc, n.slice_pos, n.slice_neg, python_param=dict(module='tripletloss_layer', layer='TripletLossLayer', param_str='{\"margin\": 1.0}'))
    return n.to_proto()


In [None]:
IMG_ANC = [1.0, 1.0, 1.0]
# ||f(IMG_ANC)||_2 = sqrt(1**2 + 1**2 + 1**2) = 1.73...
IMG_POS = [1.0, 1.0, 1.0]
# ||f(IMG_POS)||_2 = sqrt(1**2 + 1**2 + 1**2) = 1.73...
IMG_NEG = [0., 0., 0.]
# ||f(IMG_NEG)||_2 = sqrt(0**2 + 0**2 + 0**2) = 0
ip_data = np.array([IMG_ANC, IMG_POS, IMG_NEG], dtype=np.float)
print('ip_data shape = {}'.format(ip_data.shape))

net_proto = example_network(1)
net = load_net(net_proto)
net.blobs['ip'].data[...] = ip_data

net.forward()

for name in net.blobs:
    print('{}'.format(name))
    print('value = {}'.format(net.blobs[name].data))
    
net.backward()

In [None]:
IMG_ANC = [1.0, 1.0, 1.0]
IMG_POS = [0.5, 0.5, 0.5]
IMG_NEG = [0., 0., 0.]
ip_data = np.array([IMG_ANC, IMG_POS, IMG_NEG], dtype=np.float)

net_proto = example_network(1)
net = load_net(net_proto)
net.blobs['ip'].data[...] = ip_data

net.forward()

for name in net.blobs:
    print('{}'.format(name))
    print('value = {}'.format(net.blobs[name].data))
    


In [None]:
IMG_ANC = [1.0, 1.0, 1.0]
IMG_POS = [0., 0., 0.]
IMG_NEG = [1.0, 1.0, 1.0]
ip_data = np.array([IMG_ANC, IMG_POS, IMG_NEG], dtype=np.float)

net_proto = example_network(1)
net = load_net(net_proto)
net.blobs['ip'].data[...] = ip_data

net.forward()

for name in net.blobs:
    print('{}'.format(name))
    print('value = {}'.format(net.blobs[name].data))
    


In [None]:
IMG_ANC = [1.0, 1.0, 1.0]
IMG_POS = [0.5, 0.5, 0.5]
IMG_NEG = [0.5, 0.5, 0.5]
ip_data = np.array([IMG_ANC, IMG_POS, IMG_NEG], dtype=np.float)

net_proto = example_network(1)
net = load_net(net_proto)
net.blobs['ip'].data[...] = ip_data

net.forward()

for name in net.blobs:
    print('{}'.format(name))
    print('value = {}'.format(net.blobs[name].data))
    


In [None]:
IMG_ANC = [1.0, 1.0, 1.0]
IMG_ANC2 = [1.0, 1.0, 1.0]
IMG_POS = [1.0, 1.0, 1.0]
IMG_POS2 = [0.5, 0.5, 0.5]
IMG_NEG = [0., 0., 0.]
IMG_NEG2 = [0.5, 0.5, 0.5]
ip_data = np.array([IMG_ANC, IMG_ANC2, IMG_POS, IMG_POS2, IMG_NEG, IMG_NEG2], dtype=np.float)

net_proto = example_network(2)
net = load_net(net_proto)
net.blobs['ip'].data[...] = ip_data

net.forward()

for name in net.blobs:
    print('{}'.format(name))
    print('value = {}'.format(net.blobs[name].data))
    

In [None]:
IMG_ANC = [10.0, 5.0, 100.0]
IMG_POS = [30.0, 10.0, 20.0]
IMG_NEG = [100., 2., 50.]
ip_data = np.array([IMG_ANC, IMG_POS, IMG_NEG], dtype=np.float)
print('ip_data shape = {}'.format(ip_data.shape))

net_proto = example_network(1)
net = load_net(net_proto)
net.blobs['ip'].data[...] = ip_data

net.forward()

for name in net.blobs:
    print('{}'.format(name))
    print('value = {}'.format(net.blobs[name].data))

# Hello Training with MNIST

In [None]:
%%writefile tripletloss/mnist_tripletloss_train_test.prototxt
name: "mnist_tripletloss_train_test"
layer {
  name: "triplet_data"
  type: "ImageData"
  top: "triplet_data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    scale: 0.00390625
  }
  image_data_param {
    source: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/trainlist.txt"
    batch_size: 96
  }
}
layer {
  name: "triplet_data"
  type: "ImageData"
  top: "triplet_data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    scale: 0.00390625
  }
  image_data_param {
    source: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/trainlist.txt"
    batch_size: 96
  }
}
layer {
  name: "slice_triplet"
  type: "Slice"
  bottom: "triplet_data"
  top: "anchor"
  top: "positive"
  top: "negative"
  slice_param {
    slice_dim: 0
  }
}

################# ANCHOR #############
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "anchor"
  top: "conv1"
  param {
    name: "conv1_w"
    lr_mult: 1
  }
  param {
    name: "conv1_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    name: "conv2_w"
    lr_mult: 1
  }
  param {
    name: "conv2_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "ip1"
  param {
    name: "ip1_w"
    lr_mult: 1
  }
  param {
    name: "ip1_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "ip2"
  param {
    name: "ip2_w"
    lr_mult: 1
  }
  param {
    name: "ip2_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "feat"
  type: "InnerProduct"
  bottom: "ip2"
  top: "feat"
  param {
    name: "feat_w"
    lr_mult: 1
  }
  param {
    name: "feat_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}

###################### POSITIVE ###################

layer {
  name: "conv1_p"
  type: "Convolution"
  bottom: "positive"
  top: "conv1_p"
  param {
    name: "conv1_w"
    lr_mult: 1
  }
  param {
    name: "conv1_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool1_p"
  type: "Pooling"
  bottom: "conv1_p"
  top: "pool1_p"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2_p"
  type: "Convolution"
  bottom: "pool1_p"
  top: "conv2_p"
  param {
    name: "conv2_w"
    lr_mult: 1
  }
  param {
    name: "conv2_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool2_p"
  type: "Pooling"
  bottom: "conv2_p"
  top: "pool2_p"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1_p"
  type: "InnerProduct"
  bottom: "pool2_p"
  top: "ip1_p"
  param {
    name: "ip1_w"
    lr_mult: 1
  }
  param {
    name: "ip1_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1_p"
  type: "ReLU"
  bottom: "ip1_p"
  top: "ip1_p"
}
layer {
  name: "ip2_p"
  type: "InnerProduct"
  bottom: "ip1_p"
  top: "ip2_p"
  param {
    name: "ip2_w"
    lr_mult: 1
  }
  param {
    name: "ip2_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "feat_p"
  type: "InnerProduct"
  bottom: "ip2_p"
  top: "feat_p"
  param {
    name: "feat_w"
    lr_mult: 1
  }
  param {
    name: "feat_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}

######################### NEGATIVE #########################

layer {
  name: "conv1_n"
  type: "Convolution"
  bottom: "negative"
  top: "conv1_n"
  param {
    name: "conv1_w"
    lr_mult: 1
  }
  param {
    name: "conv1_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool1_n"
  type: "Pooling"
  bottom: "conv1_n"
  top: "pool1_n"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2_n"
  type: "Convolution"
  bottom: "pool1_n"
  top: "conv2_n"
  param {
    name: "conv2_w"
    lr_mult: 1
  }
  param {
    name: "conv2_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool2_n"
  type: "Pooling"
  bottom: "conv2_n"
  top: "pool2_n"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1_n"
  type: "InnerProduct"
  bottom: "pool2_n"
  top: "ip1_n"
  param {
    name: "ip1_w"
    lr_mult: 1
  }
  param {
    name: "ip1_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1_n"
  type: "ReLU"
  bottom: "ip1_n"
  top: "ip1_n"
}
layer {
  name: "ip2_n"
  type: "InnerProduct"
  bottom: "ip1_n"
  top: "ip2_n"
  param {
    name: "ip2_w"
    lr_mult: 1
  }
  param {
    name: "ip2_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "feat_n"
  type: "InnerProduct"
  bottom: "ip2_n"
  top: "feat_n"
  param {
    name: "feat_w"
    lr_mult: 1
  }
  param {
    name: "feat_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}

############# Triplet Loss ###############
layer {
  name: "tripletloss"
  type: "Python"
  bottom: "feat"
  bottom: "feat_p"
  bottom: "feat_n"
  top: "loss"
  python_param {
    module: "tripletloss_layer"
    layer: "TripletLossLayer"
    param_str: '{\"margin\": 1.0}'
  }
  include{
        phase: TRAIN
  }
}


In [None]:
from mnist import MNIST
import os
mnist_data_dir = os.path.join(caffe_root, 'data/mnist')
mndata = MNIST(mnist_data_dir)
images, labels = mndata.load_training()
print('loaded {} images, {} labels'.format(len(images), len(labels)))
print('sample image at 0 = {}'.format(images[0]))

In [None]:
import cv2
from StringIO import StringIO

img_dir = '/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images'
if not os.path.exists(img_dir):
    os.makedirs(img_dir)

# create a training list
triplet_dict = {'anchor': None, 'positive': None, 'negative': None}
anchor_list = StringIO()
pos_list = StringIO()
neg_list = StringIO()
triplet_no = 0
for i, l in zip(images, labels):
    array = np.array(i)
    img = array.reshape((28, 28))
    
    if triplet_dict['anchor'] is None:
        # this becomes an anchor
        triplet_dict['anchor'] = [img, l]
    elif triplet_dict['positive'] is None:
        # check if this is the same label
        if triplet_dict['anchor'][1] == l:
            # this becomes a postive one
            triplet_dict['positive'] = [img, l]
    elif triplet_dict['anchor'][1] != l:
        # this becomes a negative one
        triplet_dict['negative'] = [img, l]
        
    if triplet_dict['negative'] is None:
        continue
    
    # write
    anchor_path = os.path.join(img_dir, '{}_anchor.jpg'.format(triplet_no))
    pos_path = os.path.join(img_dir, '{}_positive.jpg'.format(triplet_no))
    neg_path = os.path.join(img_dir, '{}_negative.jpg'.format(triplet_no))
    
    # image
    cv2.imwrite(anchor_path, triplet_dict['anchor'][0])
    cv2.imwrite(pos_path, triplet_dict['positive'][0])
    cv2.imwrite(neg_path, triplet_dict['negative'][0])
    
    # sample
    anchor_list.write('{} {}\n'.format(anchor_path, triplet_dict['anchor'][1]))
    pos_list.write('{} {}\n'.format(pos_path, triplet_dict['positive'][1]))
    neg_list.write('{} {}\n'.format(neg_path, triplet_dict['negative'][1]))
    
    # reset
    triplet_dict['anchor'] = None
    triplet_dict['positive'] = None
    triplet_dict['negative'] = None
    
    triplet_no += 1
    
# finally, write sample list
with open(os.path.join(img_dir, '../' ,'trainlist.txt'), 'w') as f:
    # write anchors first
    f.write(anchor_list.getvalue())
    anchor_list.close()
    # positive
    f.write(pos_list.getvalue())
    pos_list.close()
    # negative
    f.write(neg_list.getvalue())
    neg_list.close()

In [None]:
%%writefile tripletloss/mnist_tripletloss_solver.prototxt
# The train/test net protocol buffer definition
train_net: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist_tripletloss_train_test.prototxt"
test_net: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist_tripletloss_train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/"

In [None]:
caffe.set_device(0)
caffe.set_mode_gpu()

solver = caffe.SGDSolver('/home/researcher/caffe-tripletloss/examples/tripletloss/mnist_tripletloss_solver.prototxt')

In [None]:
# each output is (batch size, feature dim, spatial dim)
[(k, v.data.shape) for k, v in solver.net.blobs.items()]

In [None]:
# just print the weight sizes (we'll omit the biases)
[(k, v[0].data.shape) for k, v in solver.net.params.items()]

In [None]:
solver.net.forward()  # train net

In [None]:
solver.step(1)

In [None]:
%%time
niter = 10
# losses will also be stored in the log
train_loss = np.zeros(niter)

# the main solver loop
for it in range(niter):
    print('~~~~~~~~~~ iteration {} ~~~~~~~~~~~~'.format(it))
    solver.step(1)  # SGD by Caffe
    
    # store the train loss
    train_loss[it] = solver.net.blobs['loss'].data

In [None]:
train_loss