In [None]:
caffe_root = '../'  # this file should be run from {caffe_root}/examples (otherwise change this line)

import sys
sys.path.insert(0, caffe_root + 'python')
sys.path.insert(0, caffe_root + 'examples/tripletloss')
import caffe

# Hello Test Forward

In [None]:
import tempfile
import numpy as np
from caffe import layers as L, params as P

def load_net(net_proto):
    f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
    f.write(str(net_proto))
    f.close()
    return caffe.Net(f.name, caffe.TEST)

def l2normed(embeddings, dim):
    """Returns L2-normalized instances of vec; i.e., for each instance x in embeddings,
    computes  x / ((x ** 2).sum() ** 0.5). Assumes embeddings has shape N x dim."""
    denom = L.Reduction(embeddings, axis=1, operation=P.Reduction.SUMSQ)
    denom = L.Power(denom, power=(-0.5))
    denom = L.Reshape(denom, num_axes=0, axis=-1, shape=dict(dim=[1]))
    denom = L.Tile(denom, axis=1, tiles=dim)
    return L.Eltwise(embeddings, denom, operation=P.Eltwise.PROD)

def example_network(batch_size):
    n = caffe.NetSpec()

    # we use the dummy data layer to control the 
    # shape of the inputs to the layer we are testing
    ip_dims = [3*batch_size, 3]
    label_dims = [batch_size]
    n.ip, n.label = L.DummyData(shape=[dict(dim=ip_dims),dict(dim=label_dims)],
                                        transform_param=dict(scale=1.0/255.0),
                                        ntop=2)
    
    n.slice_anc, n.slice_pos, n.slice_neg = L.Slice(n.ip, slice_param=dict(axis=0), ntop=3)
    n.slice_anc_norm = l2normed(n.slice_anc, 3)
    n.slice_pos_norm = l2normed(n.slice_pos, 3)
    n.slice_neg_norm = l2normed(n.slice_neg, 3)
    n.triplet = L.Python(n.slice_anc_norm, n.slice_pos_norm, n.slice_neg_norm, loss_weight=1, python_param=dict(module='tripletloss_layer', layer='TripletLossLayer', param_str='{\"margin\": 1.0, \"debug\": 1}'))
    return n.to_proto()


In [None]:
IMG_ANC = [1.0, 1.0, 1.0]
# ||f(IMG_ANC)||_2 = sqrt(1**2 + 1**2 + 1**2) = 1.73...
IMG_POS = [1.0, 1.0, 1.0]
# ||f(IMG_POS)||_2 = sqrt(1**2 + 1**2 + 1**2) = 1.73...
IMG_NEG = [0., 0., 0.]
# ||f(IMG_NEG)||_2 = sqrt(0**2 + 0**2 + 0**2) = 0

# embeddings is an 1D-array of features
# here, the size of features is 3, 3*32bit = 96bit
# (batch_size, feature_size)
embeddings = np.array([IMG_ANC, IMG_POS, IMG_NEG], dtype=np.float32)
print('embeddings shape = {}'.format(embeddings.shape))

net_proto = example_network(1)
with open('tripletloss/mnist_tripletloss_train_test_10_auto.prototxt', 'w') as f:
    f.write(str(net_proto))
net = load_net(net_proto)
net.blobs['ip'].data[...] = embeddings

net.forward()

for name in net.blobs:
    print('{}'.format(name))
    print('value = {}'.format(net.blobs[name].data))
    
print('running backward...')
net.backward()

print('diff anc = {}'.format(net.blobs['slice_anc'].diff))
print('diff pos = {}'.format(net.blobs['slice_pos'].diff))
print('diff neg = {}'.format(net.blobs['slice_neg'].diff))

In [None]:
IMG_ANC = [10.0, 5.0, 100.0]
IMG_POS = [30.0, 10.0, 20.0]
IMG_NEG = [100., 2., 50.]
embeddings = np.array([IMG_ANC, IMG_POS, IMG_NEG], dtype=np.float32)
print('embeddings shape = {}'.format(embeddings.shape))

net_proto = example_network(1)
net = load_net(net_proto)
net.blobs['ip'].data[...] = embeddings

net.forward()

for name in net.blobs:
    print('{}'.format(name))
    print('value = {}'.format(net.blobs[name].data))
    
print('running backward...')
net.backward()

print('diff anc = {}'.format(net.blobs['slice_anc'].diff))
print('diff pos = {}'.format(net.blobs['slice_pos'].diff))
print('diff neg = {}'.format(net.blobs['slice_neg'].diff))

# omoindrot's Training with MNIST

embeddings size = 10

In [None]:
%%writefile tripletloss/mnist_omoindrot_tripletloss_train_test_10.prototxt
name: "mnist_tripletloss_train_test_10"
layer {
  name: "triplet_data"
  type: "ImageData"
  top: "triplet_data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    scale: 0.00390625
  }
  image_data_param {
    source: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/trainlist_64.txt"
    batch_size: 192
  }
}
layer {
  name: "triplet_data"
  type: "ImageData"
  top: "triplet_data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    scale: 0.00390625
  }
  image_data_param {
    source: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/trainlist_64.txt"
    batch_size: 192
  }
}
layer {
  name: "slice_triplet"
  type: "Slice"
  bottom: "triplet_data"
  top: "anchor"
  top: "positive"
  top: "negative"
  slice_param {
    slice_dim: 0
  }
}

################# ANCHOR #############
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "anchor"
  top: "conv1"
  param {
    name: "conv1_w"
    lr_mult: 1
  }
  param {
    name: "conv1_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    name: "conv2_w"
    lr_mult: 1
  }
  param {
    name: "conv2_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "ip1"
  param {
    name: "ip1_w"
    lr_mult: 1
  }
  param {
    name: "ip1_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "feat"
  param {
    name: "ip2_w"
    lr_mult: 1
  }
  param {
    name: "ip2_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}

############# L2 Normalization ############

layer {
  name: "Reduction1"
  type: "Reduction"
  bottom: "feat"
  top: "Reduction1"
  reduction_param {
    operation: SUMSQ
    axis: 1
  }
}
layer {
  name: "Power1"
  type: "Power"
  bottom: "Reduction1"
  top: "Power1"
  power_param {
    power: -0.5
  }
}
layer {
  name: "Reshape1"
  type: "Reshape"
  bottom: "Power1"
  top: "Reshape1"
  reshape_param {
    shape {
      dim: 1
    }
    axis: -1
    num_axes: 0
  }
}
layer {
  name: "Tile1"
  type: "Tile"
  bottom: "Reshape1"
  top: "Tile1"
  tile_param {
    axis: 1
    tiles: 10
  }
}
layer {
  name: "slice_anc_norm"
  type: "Eltwise"
  bottom: "feat"
  bottom: "Tile1"
  top: "slice_anc_norm"
  eltwise_param {
    operation: PROD
  }
}

############# Triplet Loss ###############
layer {
  name: "tripletloss"
  type: "Python"
  bottom: "slice_anc_norm"
  top: "loss"
  loss_weight: 1
  python_param {
    module: "tripletloss_layer"
    layer: "TripletLossLayer"
    param_str: '{\"margin\": 1.0}'
  }
  include {
    phase: TRAIN
  }
}
layer {
  name: "pos_dist"
  type: "Python"
  bottom: "slice_anc_norm"
  bottom: "slice_pos_norm"
  top: "pos_dist"
  python_param {
    module: "tripletloss_layer"
    layer: "PairwiseDistanceLayer"
    param_str: '{\"debug\": 0}'
  }
  include {
    phase: TEST
  }
}
layer {
  name: "neg_dist"
  type: "Python"
  bottom: "slice_anc_norm"
  bottom: "slice_neg_norm"
  top: "neg_dist"
  python_param {
    module: "tripletloss_layer"
    layer: "PairwiseDistanceLayer"
    param_str: '{\"debug\": 0}'
  }
  include {
    phase: TEST
  }
}
layer{
  name: "silence"
  type: "Silence"
  bottom: "label"
}

In [None]:
from mnist import MNIST
import os
mnist_data_dir = os.path.join(caffe_root, 'data/mnist')
mndata = MNIST(mnist_data_dir)
images, labels = mndata.load_training()
print('loaded {} images, {} labels'.format(len(images), len(labels)))
print('sample image at 0 = {}'.format(images[0]))

In [None]:
import cv2
from StringIO import StringIO

img_dir = '/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images'
if not os.path.exists(img_dir):
    os.makedirs(img_dir)

# create a training list
triplet_dict = {'anchor': None, 'positive': None, 'negative': None}
anchor_list = StringIO()
pos_list = StringIO()
neg_list = StringIO()
batch_str = StringIO()
triplet_no = 0
batch_size = 0
for i, l in zip(images, labels):
    array = np.array(i)
    img = array.reshape((28, 28))
    
    if triplet_dict['anchor'] is None:
        # this becomes an anchor
        triplet_dict['anchor'] = [img, l]
    elif triplet_dict['positive'] is None:
        # check if this is the same label
        if triplet_dict['anchor'][1] == l:
            # this becomes a postive one
            triplet_dict['positive'] = [img, l]
    elif triplet_dict['anchor'][1] != l:
        # this becomes a negative one
        triplet_dict['negative'] = [img, l]
        
    if triplet_dict['negative'] is None:
        continue
    
    # write
    anchor_path = os.path.join(img_dir, '{}_anchor.jpg'.format(triplet_no))
    pos_path = os.path.join(img_dir, '{}_positive.jpg'.format(triplet_no))
    neg_path = os.path.join(img_dir, '{}_negative.jpg'.format(triplet_no))
    
    # image
    cv2.imwrite(anchor_path, triplet_dict['anchor'][0])
    cv2.imwrite(pos_path, triplet_dict['positive'][0])
    cv2.imwrite(neg_path, triplet_dict['negative'][0])
    
    # sample
    anchor_list.write('{} {}\n'.format(anchor_path, triplet_dict['anchor'][1]))
    pos_list.write('{} {}\n'.format(pos_path, triplet_dict['positive'][1]))
    neg_list.write('{} {}\n'.format(neg_path, triplet_dict['negative'][1]))
    
    # reset
    triplet_dict['anchor'] = None
    triplet_dict['positive'] = None
    triplet_dict['negative'] = None
    
    triplet_no += 1
    batch_size += 1
    
    if batch_size == 64:
        # write anchors first
        batch_str.write(anchor_list.getvalue())
        anchor_list.close()
        anchor_list = StringIO()
        # positive
        batch_str.write(pos_list.getvalue())
        pos_list.close()
        pos_list = StringIO()
        # negative
        batch_str.write(neg_list.getvalue())
        neg_list.close()
        neg_list = StringIO()
        # reset
        batch_size = 0
    
# finally, write sample list
with open(os.path.join(img_dir, '../' ,'trainlist_64.txt'), 'w') as f:
    f.write(batch_str.getvalue())
    batch_str.close()
    anchor_list.close()
    pos_list.close()
    neg_list.close()

In [None]:
%%writefile tripletloss/mnist_tripletloss_solver_10.prototxt
# The train/test net protocol buffer definition
train_net: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist_tripletloss_train_test_10.prototxt"
test_net: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist_tripletloss_train_test_10.prototxt"
# samples = 192 * 77 = 14784
test_iter: 77
# test at every epoch
test_interval: 77
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every epoch
display: 77
# The maximum number of iterations = 10 epochs
max_iter: 770
# snapshot intermediate results at every epoch
snapshot: 77
snapshot_prefix: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/mnist_tripletloss"

In [None]:
caffe.set_device(0)
caffe.set_mode_gpu()

# reset solver to avoid a continuous training over multiple runs
solver = None
solver = caffe.SGDSolver('/home/researcher/caffe-tripletloss/examples/tripletloss/mnist_tripletloss_solver_10.prototxt')

In [None]:
# each output is (batch size, feature dim, spatial dim)
[(k, v.data.shape) for k, v in solver.net.blobs.items()]

In [None]:
# just print the weight sizes (we'll omit the biases)
[(k, v[0].data.shape) for k, v in solver.net.params.items()]

In [None]:
%%time
itr_per_epoch = 77
niter = itr_per_epoch * 20

train_loss = np.zeros(niter)

# the main solver loop
for it in range(niter):
    solver.step(1)  # SGD by Caffe
    
    # store the train loss
    loss = solver.net.blobs['loss'].data
    
    # output every epoch
    if it % itr_per_epoch == 0:
        print('loss at epoch {} = {}'.format(it/itr_per_epoch, loss))
    
    train_loss[it] = loss

# Deploy Test

In [None]:
%%writefile tripletloss/mnist_tripletloss_deploy_10.prototxt
name: "mnist_tripletloss_deploy_10"
layer {
  name: "data"
  type: "Input"
  top: "data"
  input_param { shape: { dim: 2 dim: 3 dim: 28 dim: 28 } }
}
layer {
  name: "slice_pair"
  type: "Slice"
  bottom: "data"
  top: "foo"
  top: "bar"
  slice_param {
    slice_dim: 0
  }
}

# foo => anchor
# bar => positive

################# ANCHOR #############
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "foo"
  top: "conv1"
  param {
    name: "conv1_w"
    lr_mult: 1
  }
  param {
    name: "conv1_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    name: "conv2_w"
    lr_mult: 1
  }
  param {
    name: "conv2_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "ip1"
  param {
    name: "ip1_w"
    lr_mult: 1
  }
  param {
    name: "ip1_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "feat"
  param {
    name: "ip2_w"
    lr_mult: 1
  }
  param {
    name: "ip2_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}

###################### POSITIVE ###################

layer {
  name: "conv1_p"
  type: "Convolution"
  bottom: "bar"
  top: "conv1_p"
  param {
    name: "conv1_w"
    lr_mult: 1
  }
  param {
    name: "conv1_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool1_p"
  type: "Pooling"
  bottom: "conv1_p"
  top: "pool1_p"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2_p"
  type: "Convolution"
  bottom: "pool1_p"
  top: "conv2_p"
  param {
    name: "conv2_w"
    lr_mult: 1
  }
  param {
    name: "conv2_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool2_p"
  type: "Pooling"
  bottom: "conv2_p"
  top: "pool2_p"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1_p"
  type: "InnerProduct"
  bottom: "pool2_p"
  top: "ip1_p"
  param {
    name: "ip1_w"
    lr_mult: 1
  }
  param {
    name: "ip1_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1_p"
  type: "ReLU"
  bottom: "ip1_p"
  top: "ip1_p"
}
layer {
  name: "ip2_p"
  type: "InnerProduct"
  bottom: "ip1_p"
  top: "feat_p"
  param {
    name: "ip2_w"
    lr_mult: 1
  }
  param {
    name: "ip2_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}

############# L2 Normalization ############

layer {
  name: "Reduction1"
  type: "Reduction"
  bottom: "feat"
  top: "Reduction1"
  reduction_param {
    operation: SUMSQ
    axis: 1
  }
}
layer {
  name: "Power1"
  type: "Power"
  bottom: "Reduction1"
  top: "Power1"
  power_param {
    power: -0.5
  }
}
layer {
  name: "Reshape1"
  type: "Reshape"
  bottom: "Power1"
  top: "Reshape1"
  reshape_param {
    shape {
      dim: 1
    }
    axis: -1
    num_axes: 0
  }
}
layer {
  name: "Tile1"
  type: "Tile"
  bottom: "Reshape1"
  top: "Tile1"
  tile_param {
    axis: 1
    tiles: 10
  }
}
layer {
  name: "slice_anc_norm"
  type: "Eltwise"
  bottom: "feat"
  bottom: "Tile1"
  top: "slice_anc_norm"
  eltwise_param {
    operation: PROD
  }
}
layer {
  name: "Reduction2"
  type: "Reduction"
  bottom: "feat_p"
  top: "Reduction2"
  reduction_param {
    operation: SUMSQ
    axis: 1
  }
}
layer {
  name: "Power2"
  type: "Power"
  bottom: "Reduction2"
  top: "Power2"
  power_param {
    power: -0.5
  }
}
layer {
  name: "Reshape2"
  type: "Reshape"
  bottom: "Power2"
  top: "Reshape2"
  reshape_param {
    shape {
      dim: 1
    }
    axis: -1
    num_axes: 0
  }
}
layer {
  name: "Tile2"
  type: "Tile"
  bottom: "Reshape2"
  top: "Tile2"
  tile_param {
    axis: 1
    tiles: 10
  }
}
layer {
  name: "slice_pos_norm"
  type: "Eltwise"
  bottom: "feat_p"
  bottom: "Tile2"
  top: "slice_pos_norm"
  eltwise_param {
    operation: PROD
  }
}

############# Triplet Loss ###############
layer {
  name: "pos_dist"
  type: "Python"
  bottom: "slice_anc_norm"
  bottom: "slice_pos_norm"
  top: "pos_dist"
  python_param {
    module: "tripletloss_layer"
    layer: "PairwiseDistanceLayer"
    param_str: '{\"debug\": 0}'
  }
  include {
    phase: TEST
  }
}

In [None]:
import matplotlib.pyplot as plt
# display plots in this notebook
%matplotlib inline

# set display defaults
plt.rcParams['figure.figsize'] = (10, 10)        # large images
plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray'  # use grayscale output rather than a (potentially misleading) color heatmap

In [None]:
model_def = caffe_root + 'examples/tripletloss/mnist_tripletloss_deploy_10.prototxt'
model_weights = caffe_root + 'examples/tripletloss/mnist/mnist_tripletloss_iter_1463.caffemodel'

net = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)     # use test mode (e.g., don't perform dropout)

### test_data

mnistテストセット一覧

In [None]:
%%bash
ls /home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images/


In [None]:
import cv2

img_anc = cv2.imread('/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images/0_anchor.jpg')
print('img_anc shape = {}'.format(img_anc.shape))
plt.imshow(img_anc)

In [None]:
img_pos = cv2.imread('/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images/0_positive.jpg')
print('img_pos shape = {}'.format(img_pos.shape))
plt.imshow(img_pos)

In [None]:
img_neg = cv2.imread('/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images/0_negative.jpg')
print('img_neg shape = {}'.format(img_neg.shape))
plt.imshow(img_neg)

In [None]:
img_anc_transposed = img_anc.transpose(2, 0, 1)
print('img_anc_transposed shape = {}'.format(img_anc_transposed.shape))
img_pos_transposed = img_pos.transpose(2, 0, 1)
print('img_pos_transposed shape = {}'.format(img_pos_transposed.shape))
img_neg_transposed = img_neg.transpose(2, 0, 1)
print('img_neg_transposed shape = {}'.format(img_neg_transposed.shape))

In [None]:
# set a pair data
net.blobs['data'].data[...] = np.array([img_anc_transposed, img_pos_transposed])

# calculate distance
net.forward()

dist = net.blobs['pos_dist'].data
print('dist between anc and pos = {}'.format(dist))

In [None]:
# set a pair data
net.blobs['data'].data[...] = np.array([img_anc_transposed, img_neg_transposed])

# calculate distance
net.forward()

dist = net.blobs['pos_dist'].data
print('dist between anc and neg = {}'.format(dist))

### test no.1の時

In [None]:
img_anc = cv2.imread('/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images/1_anchor.jpg')
print('img_anc shape = {}'.format(img_anc.shape))
plt.imshow(img_anc)

In [None]:
img_pos = cv2.imread('/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images/1_positive.jpg')
print('img_anc shape = {}'.format(img_anc.shape))
plt.imshow(img_pos)

In [None]:
img_neg = cv2.imread('/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images/1_negative.jpg')
print('img_anc shape = {}'.format(img_anc.shape))
plt.imshow(img_neg)

In [None]:
img_anc_transposed = img_anc.transpose(2, 0, 1)
img_pos_transposed = img_pos.transpose(2, 0, 1)
img_neg_transposed = img_neg.transpose(2, 0, 1)

# set a pair data
net.blobs['data'].data[...] = np.array([img_anc_transposed, img_pos_transposed])

# calculate distance
net.forward()

dist = net.blobs['pos_dist'].data
print('dist between anc and pos = {}'.format(dist))

# set a pair data
net.blobs['data'].data[...] = np.array([img_anc_transposed, img_neg_transposed])

# calculate distance
net.forward()

dist = net.blobs['pos_dist'].data
print('dist between anc and neg = {}'.format(dist))

### Test No. n で実行してみる


In [None]:
# you can change 'test_num', then you will get images and the distances of the coressponding test set number.
test_num = 50

MNIST_ROOT = "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images/"
ANC_PATH = MNIST_ROOT + str(test_num) + "_anchor.jpg"
POS_PATH = MNIST_ROOT + str(test_num) + "_positive.jpg"
NEG_PATH = MNIST_ROOT + str(test_num) + "_negative.jpg"

#
img_anc = cv2.imread(ANC_PATH)
print('img_anc shape = {}'.format(img_anc.shape))

img_pos = cv2.imread(POS_PATH)
print('img_anc shape = {}'.format(img_anc.shape))

img_neg = cv2.imread(NEG_PATH)
print('img_anc shape = {}'.format(img_anc.shape))

img_verticle = np.concatenate((img_anc, img_pos, img_neg), axis = 0) #縦
plt.imshow(img_verticle)

img_anc_transposed = img_anc.transpose(2, 0, 1)
img_pos_transposed = img_pos.transpose(2, 0, 1)
img_neg_transposed = img_neg.transpose(2, 0, 1)

# set a pair data
net.blobs['data'].data[...] = np.array([img_anc_transposed, img_pos_transposed])

# calculate distance
net.forward()

dist = net.blobs['pos_dist'].data
print('dist between anc and pos = {}'.format(dist))

# set a pair data
net.blobs['data'].data[...] = np.array([img_anc_transposed, img_neg_transposed])

# calculate distance
net.forward()

dist = net.blobs['pos_dist'].data
print('dist between anc and neg = {}'.format(dist))

### nループして、それぞれの距離を算出する。

In [None]:
import copy

# number of iteration
test_iter = 4988

hard_triplets_indices = []
semi_hard_triplets_indices = []
easy_triplets_indices = []

MNIST_ROOT = "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images/"

for test_num in range(test_iter):
    
    print('test number: {}'.format(test_num))
    
    ANC_PATH = MNIST_ROOT + str(test_num) + "_anchor.jpg"
    POS_PATH = MNIST_ROOT + str(test_num) + "_positive.jpg"
    NEG_PATH = MNIST_ROOT + str(test_num) + "_negative.jpg"

    img_anc = cv2.imread(ANC_PATH)
    img_pos = cv2.imread(POS_PATH)
    img_neg = cv2.imread(NEG_PATH)

    img_anc_transposed = img_anc.transpose(2, 0, 1)
    img_pos_transposed = img_pos.transpose(2, 0, 1)
    img_neg_transposed = img_neg.transpose(2, 0, 1)

    # calculate distance between anc and pos
    net.blobs['data'].data[...] = np.array([img_anc_transposed, img_pos_transposed])
    net.forward()
    dist = net.blobs['pos_dist'].data
    dist_p = copy.deepcopy(dist)
    print('  dist between anc and pos = {}'.format(dist_p))

    # calculate distance between anc and neg
    net.blobs['data'].data[...] = np.array([img_anc_transposed, img_neg_transposed])
    net.forward()
    dist = net.blobs['pos_dist'].data
    dist_n = copy.deepcopy(dist)
    print('  dist between anc and neg = {}'.format(dist_n))
    
    # check result and append the test index to the hard triplets list if negative is closer than positive.
    if dist_n < dist_p:
        print('  hard triplet set')
        hard_triplets_indices.append(test_num)
        
    elif dist_n < dist_p + 1.0: #1.0 = margin
        print('  semi-hard triplet set')
        semi_hard_triplets_indices.append(test_num)
        
    else:
        print('  easy triplet set')
        easy_triplets_indices.append(test_num)

In [None]:
hard_triplets_list = np.array(hard_triplets_indices)
print("the number of list = {}".format(hard_triplets_list.shape[0]))
hard_triplets_list

In [None]:
semi_hard_triplets_list = np.array(semi_hard_triplets_indices)
print("the number of list = {}".format(semi_hard_triplets_list.shape[0]))
semi_hard_triplets_list

In [None]:
easy_triplets_list = np.array(easy_triplets_indices)
print("the number of list = {}".format(easy_triplets_list.shape[0]))
easy_triplets_list

In [None]:

MNIST_ROOT = "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images/"

for test_num in hard:
    
    print('test number: {}'.format(test_num))
    
    ANC_PATH = MNIST_ROOT + str(test_num) + "_anchor.jpg"
    POS_PATH = MNIST_ROOT + str(test_num) + "_positive.jpg"
    NEG_PATH = MNIST_ROOT + str(test_num) + "_negative.jpg"

    img_anc = cv2.imread(ANC_PATH)
    img_pos = cv2.imread(POS_PATH)
    img_neg = cv2.imread(NEG_PATH)

    img_anc_transposed = img_anc.transpose(2, 0, 1)
    img_pos_transposed = img_pos.transpose(2, 0, 1)
    img_neg_transposed = img_neg.transpose(2, 0, 1)

    # calculate distance between anc and pos
    net.blobs['data'].data[...] = np.array([img_anc_transposed, img_pos_transposed])
    net.forward()
    dist = net.blobs['pos_dist'].data
    dist_p = copy.deepcopy(dist)
    print('  dist between anc and pos = {}'.format(dist_p))

    # calculate distance between anc and neg
    net.blobs['data'].data[...] = np.array([img_anc_transposed, img_neg_transposed])
    net.forward()
    dist = net.blobs['pos_dist'].data
    dist_n = copy.deepcopy(dist)
    print('  dist between anc and neg = {}'.format(dist_n))

# Compute the distance matrix

### `_pairwise_distances` の Numpy版実装

In [None]:
def _pairwise_distances(embeddings, squared=False):
    """Compute the 2D matrix of distances between all the embeddings.

    Args:
        embeddings: tensor of shape (batch_size, embed_dim)
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.

    Returns:
        pairwise_distances: tensor of shape (batch_size, batch_size)
    """
    
    # Get the dot product between all embeddings
    # shape (batch_size, batch_size)
    dot_product = np.matmul(embeddings, embeddings.transpose())
    
    # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`.
    # This also provides more numerical stability (the diagonal of the result will be exactly 0).
    # shape (batch_size,)
    square_norm = np.diag(dot_product)
    
    # Compute the pairwise distance matrix as we have:
    # ||a - b||^2 = ||a||^2  - 2 <a, b> + ||b||^2
    # shape (batch_size, batch_size)
    distances = np.expand_dims(square_norm, 0) - 2.0 * dot_product + np.expand_dims(square_norm, 1)
    
    # Because of computation errors, some distances might be negative so we put everything >= 0.0
    distances = np.maximum(distances, 0.0)
    
    if not squared:
        # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal)
        # we need to add a small epsilon where distances == 0.0
        mask = np.equal(distances, 0.0).astype(np.float)
        distances = distances + mask * 1e-16

        distances = np.sqrt(distances)

        # Correct the epsilon added: set the distances on the mask to be exactly 0.0
        distances = distances * (1.0 - mask)
    
    return distances

In [None]:
# tf.shapeで言うところの(batch_size, embed_dim)
# 今回は(batch_size, embed_dim) = (3, 2)の例
embeddings_example = np.array([[-1.0, 2.0],
                               [0.5, 0.2],
                               [5.5, 1.0]])    
print embeddings_example

In [None]:
_pairwise_distances(embeddings_example, True)

In [None]:
# ベタ手法と比較
for pair in [(0, 1), (0, 2), (1, 2)]:
    dist = np.sum((embeddings_example[pair[0]] - embeddings_example[pair[1]])**2)
    print('dist between {} = {}'.format(pair, dist))

### `_get_triplet_mask` の Numpy版実装

In [None]:
def _get_triplet_mask(labels):
    """Return a 3D mask where mask[a, p, n] is True if the triplet (a, p, n) is valid.
    A triplet (i, j, k) is valid if:
        - i, j, k are distinct
        - labels[i] == labels[j] and labels[i] != labels[k]
    Args:
        labels: tf.int32 `Tensor` with shape [batch_size]
    """
    # Check that i, j and k are distinct
    indices_equal = np.eye(np.shape(labels)[0]).astype(np.bool)
    indices_not_equal = np.logical_not(indices_equal)
    i_not_equal_j = np.expand_dims(indices_not_equal, 2)
    i_not_equal_k = np.expand_dims(indices_not_equal, 1)
    j_not_equal_k = np.expand_dims(indices_not_equal, 0)

    distinct_indices = np.logical_and(np.logical_and(i_not_equal_j, i_not_equal_k), j_not_equal_k)


    # Check if labels[i] == labels[j] and labels[i] != labels[k]
    label_equal = np.equal(np.expand_dims(labels, 0), np.expand_dims(labels, 1))
    i_equal_j = np.expand_dims(label_equal, 2)
    i_equal_k = np.expand_dims(label_equal, 1)

    valid_labels = np.logical_and(i_equal_j, np.logical_not(i_equal_k))

    # Combine the two masks
    mask = np.logical_and(distinct_indices, valid_labels)

    return mask

In [None]:
label_test1 = np.array([1, 2, 3, 4, 5])

_get_triplet_mask(label_test1)

In [None]:
label_test2 = np.array([1, 2, 1, 4, 5])

_get_triplet_mask(label_test2)

In [None]:
label_test3 = np.array([1, 2, 1, 2, 3])

_get_triplet_mask(label_test3)

In [None]:
label_test4 = np.array([1, 1, 1, 1, 1])

_get_triplet_mask(label_test4)

### `batch_all_triplet_loss` の Numpy版実装

In [None]:
def batch_all_triplet_loss(labels, embeddings, margin, squared=False):
    """Build the triplet loss over a batch of embeddings.

    We generate all the valid triplets and average the loss over the positive ones.

    Args:
        labels: labels of the batch, of size (batch_size,)
        embeddings: tensor of shape (batch_size, embed_dim)
        margin: margin for triplet loss
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.

    Returns:
        triplet_loss: scalar tensor containing the triplet loss
    """
    # Get the pairwise distance matrix
    pairwise_dist = _pairwise_distances(embeddings, squared=squared)

    anchor_positive_dist = np.expand_dims(pairwise_dist, 2)
    anchor_negative_dist = np.expand_dims(pairwise_dist, 1)
    
    # Compute a 3D tensor of size (batch_size, batch_size, batch_size)
    # triplet_loss[i, j, k] will contain the triplet loss of anchor=i, positive=j, negative=k
    # Uses broadcasting where the 1st argument has shape (batch_size, batch_size, 1)
    # and the 2nd (batch_size, 1, batch_size)
    triplet_loss = anchor_positive_dist - anchor_negative_dist + margin
    
    # Put to zero the invalid triplets
    # (where label(a) != label(p) or label(n) == label(a) or a == p)
    mask = _get_triplet_mask(labels)
    mask = mask.astype(np.float)
    triplet_loss = np.multiply(mask, triplet_loss)
    
    # Remove negative losses (i.e. the easy triplets)
    triplet_loss = np.maximum(triplet_loss, 0.0)
    
    # Count number of positive triplets (where triplet_loss > 0)
    valid_triplets = np.greater(triplet_loss, 1e-16).astype(float)
    num_positive_triplets = np.sum(valid_triplets)
    num_valid_triplets = np.sum(mask)
    fraction_positive_triplets = num_positive_triplets / (num_valid_triplets + 1e-16)

    # Get final mean triplet loss over the positive valid triplets
    triplet_loss = np.sum(triplet_loss) / (num_positive_triplets + 1e-16)

    return triplet_loss, fraction_positive_triplets

#### TEST

In [None]:
"""Test the triplet loss with batch all triplet mining in a simple case.
   There is just one class in this super simple edge case, and we want to make sure that
   the loss is 0.
"""
num_data = 10
feat_dim = 6
margin = 0.2
num_classes = 1

embeddings = np.random.rand(num_data, feat_dim).astype(np.float32)
labels = np.random.randint(0, num_classes, size=(num_data)).astype(np.float32)

for squared in [True, False]:
    print('#{}'.format(squared))
    loss_np = 0.0

    # Compute the loss in TF.
    loss_tf, fraction = batch_all_triplet_loss(labels, embeddings, margin, squared=squared)
    
    print('  loss_tf  = {}'.format(loss_tf))
    print('  fraction = {}'.format(fraction))
    

In [None]:
def pairwise_distance_np(feature, squared=False):
    """Computes the pairwise distance matrix in numpy.
    Args:
        feature: 2-D numpy array of size [number of data, feature dimension]
        squared: Boolean. If true, output is the pairwise squared euclidean
                 distance matrix; else, output is the pairwise euclidean distance matrix.
    Returns:
        pairwise_distances: 2-D numpy array of size
                            [number of data, number of data].
    """
    triu = np.triu_indices(feature.shape[0], 1)
    upper_tri_pdists = np.linalg.norm(feature[triu[1]] - feature[triu[0]], axis=1)
    if squared:
        upper_tri_pdists **= 2.
    num_data = feature.shape[0]
    pairwise_distances = np.zeros((num_data, num_data))
    pairwise_distances[np.triu_indices(num_data, 1)] = upper_tri_pdists
    # Make symmetrical.
    pairwise_distances = pairwise_distances + pairwise_distances.T - np.diag(
            pairwise_distances.diagonal())
    return pairwise_distances


"""Test the triplet loss with batch all triplet mining"""
num_data = 10
feat_dim = 6
margin = 0.2
num_classes = 5

embeddings = np.random.rand(num_data, feat_dim).astype(np.float32)
labels = np.random.randint(0, num_classes, size=(num_data)).astype(np.float32)

for squared in [True, False]:
    print('#{}'.format(squared))
    pdist_matrix = pairwise_distance_np(embeddings, squared=squared)
    print(pdist_matrix.shape)

    loss_np = 0.0
    num_positives = 0.0
    num_valid = 0.0
    for i in range(num_data):
        for j in range(num_data):
            for k in range(num_data):
                distinct = (i != j and i != k and j != k)
                valid = (labels[i] == labels[j]) and (labels[i] != labels[k])
                if distinct and valid:
                    num_valid += 1.0

                    pos_distance = pdist_matrix[i][j]
                    neg_distance = pdist_matrix[i][k]

                    loss = np.maximum(0.0, pos_distance - neg_distance + margin)
                    loss_np += loss

                    num_positives += (loss > 0)

    loss_np /= num_positives

    # Compute the loss in TF.
    loss_tf, fraction = batch_all_triplet_loss(labels, embeddings, margin, squared=squared)
    print('  loss_tf  = {}'.format(loss_tf))
    print('  fraction = {}'.format(fraction))

### `batch_hard_triplet_loss` の Numpy版実装

In [None]:
def _get_anchor_positive_triplet_mask(labels):
    """Return a 2D mask where mask[a, p] is True iff a and p are distinct and have same label.
    Args:
        labels: tf.int32 `Tensor` with shape [batch_size]
    Returns:
        mask: tf.bool `Tensor` with shape [batch_size, batch_size]
    """
    # Check that i and j are distinct
    indices_equal = np.eye(np.shape(labels)[0]).astype(np.bool)
    indices_not_equal = np.logical_not(indices_equal)

    # Check if labels[i] == labels[j]
    # Uses broadcasting where the 1st argument has shape (1, batch_size) and the 2nd (batch_size, 1)
    labels_equal = np.equal(np.expand_dims(labels, 0), np.expand_dims(labels, 1))

    # Combine the two masks
    mask = np.logical_and(indices_not_equal, labels_equal)

    return mask

def _get_anchor_negative_triplet_mask(labels):
    """Return a 2D mask where mask[a, n] is True iff a and n have distinct labels.
    Args:
        labels: tf.int32 `Tensor` with shape [batch_size]
    Returns:
        mask: tf.bool `Tensor` with shape [batch_size, batch_size]
    """
    # Check if labels[i] != labels[k]
    # Uses broadcasting where the 1st argument has shape (1, batch_size) and the 2nd (batch_size, 1)
    labels_equal = np.equal(np.expand_dims(labels, 0), np.expand_dims(labels, 1))

    mask = np.logical_not(labels_equal)

    return mask


def batch_hard_triplet_loss(labels, embeddings, margin, squared=False):
    """Build the triplet loss over a batch of embeddings.

    For each anchor, we get the hardest positive and hardest negative to form a triplet.

    Args:
        labels: labels of the batch, of size (batch_size,)
        embeddings: tensor of shape (batch_size, embed_dim)
        margin: margin for triplet loss
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.

    Returns:
        triplet_loss: scalar tensor containing the triplet loss
    """
    # Get the pairwise distance matrix
    pairwise_dist = _pairwise_distances(embeddings, squared=squared)

    # For each anchor, get the hardest positive
    # First, we need to get a mask for every valid positive (they should have same label)
    mask_anchor_positive = _get_anchor_positive_triplet_mask(labels)
    mask_anchor_positive = (mask_anchor_positive).astype(np.float)

    # We put to 0 any element where (a, p) is not valid (valid if a != p and label(a) == label(p))
    anchor_positive_dist = np.multiply(mask_anchor_positive, pairwise_dist)

    # shape (batch_size, 1)
    hardest_positive_dist = np.amax(anchor_positive_dist, axis=1, keepdims=True)

    # For each anchor, get the hardest negative
    # First, we need to get a mask for every valid negative (they should have different labels)
    mask_anchor_negative = _get_anchor_negative_triplet_mask(labels)
    mask_anchor_negative = (mask_anchor_negative).astype(np.float)

    # We add the maximum value in each row to the invalid negatives (label(a) == label(n))
    max_anchor_negative_dist = np.amax(pairwise_dist, axis=1, keepdims=True)
    anchor_negative_dist = pairwise_dist + max_anchor_negative_dist * (1.0 - mask_anchor_negative)

    # shape (batch_size,)
    hardest_negative_dist = np.amin(anchor_negative_dist, axis=1, keepdims=True)

    # Combine biggest d(a, p) and smallest d(a, n) into final triplet loss
    triplet_loss = np.maximum(hardest_positive_dist - hardest_negative_dist + margin, 0.0)

    # Get final mean triplet loss
    triplet_loss = np.mean(triplet_loss)

    return triplet_loss

#### TEST

In [None]:
"""Test the triplet loss with batch hard triplet mining"""
num_data = 4
feat_dim = 3
margin = 0.2
num_classes = 5

embeddings = np.random.rand(num_data, feat_dim).astype(np.float32)
labels = np.random.randint(0, num_classes, size=(num_data)).astype(np.float32)

for squared in [True, False]:
    print('#{}'.format(squared))
    pdist_matrix = pairwise_distance_np(embeddings, squared=squared)

    loss_np = 0.0
    for i in range(num_data):
        # Select the hardest positive
        max_pos_dist = np.max(pdist_matrix[i][labels == labels[i]])

        # Select the hardest negative
        min_neg_dist = np.min(pdist_matrix[i][labels != labels[i]])

        loss = np.maximum(0.0, max_pos_dist - min_neg_dist + margin)
        loss_np += loss

    loss_np /= num_data

    # Compute the loss in TF.
    loss_tf = batch_hard_triplet_loss(labels, embeddings, margin, squared=squared)
    print('  loss_tf  = {}'.format(loss_tf))

In [None]:
print embeddings

In [None]:
print labels

In [None]:
txt_data = np.loadtxt("/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/trainlist_64.txt",usecols=(1,))

In [None]:
mnist_labels = txt_data[:64]

In [None]:
# you can change 'test_num', then you will get images and the distances of the coressponding test set number.
test_num = 0
mnist_embeddings = np.zeros((64,10))

for test_num in range(64):
    MNIST_ROOT = "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images/"
    ANC_PATH = MNIST_ROOT + str(test_num) + "_anchor.jpg"
    POS_PATH = MNIST_ROOT + str(test_num) + "_positive.jpg"
    NEG_PATH = MNIST_ROOT + str(test_num) + "_negative.jpg"

    #
    img_anc = cv2.imread(ANC_PATH)
    img_pos = cv2.imread(POS_PATH)
    img_neg = cv2.imread(NEG_PATH)

    plt.imshow(img_anc)

    img_anc_transposed = img_anc.transpose(2, 0, 1)
    img_pos_transposed = img_pos.transpose(2, 0, 1)
    img_neg_transposed = img_neg.transpose(2, 0, 1)

    # set a pair data
    net.blobs['data'].data[...] = np.array([img_anc_transposed, img_pos_transposed])

    # calculate distance
    net.forward()

    mnist_embeddings[test_num] = net.blobs['feat'].data

In [None]:
print(mnist_labels)
print(mnist_embeddings)

In [None]:
labels = mnist_labels
embeddings = mnist_embeddings

for squared in [True, False]:
    print('squared=#{}'.format(squared))
    pdist_matrix = pairwise_distance_np(embeddings, squared=squared)

    loss_np = 0.0
    for i in range(num_data):
        # Select the hardest positive
        max_pos_dist = np.max(pdist_matrix[i][labels == labels[i]])

        # Select the hardest negative
        min_neg_dist = np.min(pdist_matrix[i][labels != labels[i]])

        loss = np.maximum(0.0, max_pos_dist - min_neg_dist + margin)
        loss_np += loss

    loss_np /= num_data

    # Compute the loss in TF.
    loss_tf = batch_hard_triplet_loss(labels, embeddings, margin, squared=squared)
    print('  loss_tf  = {}'.format(loss_tf))

In [None]:
!nvidia-smi

In [None]:
!lscpu

# Online Triplet Mining

## omoindrot's blog  
### https://omoindrot.github.io/triplet-loss#offline-and-online-triplet-mining


-----


omoindrotのOnline triplet miningは
http://bamos.github.io/2016/01/19/openface-0.2.0/
の思想に基づいている。

Bartoszの洞察は、ネットワークを共有パラメータで複製する必要がなく、組み込みを三つ組にマッピングすることによって、独自の画像上で単一のネットワークを使用できること。


![説明図](https://omoindrot.github.io/assets/triplet_loss/online_triplet_loss.png "説明図")

![説明図](http://bamos.github.io/data/2016-01-19/optimization-after.png "説明図")

データセット内の15人から1人あたり20枚の画像をサンプリングし、ネットワークを介して300枚の画像すべてをGPU上で1回の順方向パスで送信して、300個の埋め込みを取得している。 次に、CPU上で、これらの埋め込みが2850トリプレットにマッピングされ、triplet関数に渡される。その後、微分は逆伝播でネットワークパスの元の画像へ戻ってマッピングされる。こうすれば1回でトリプレットの計算が済む。 


👉モデルは、siameseの3並列にする必要はなくなり、1つの単一のネットワークで事足りてしまうようになる。  

[課題]  
1.  どのようにネットワークを組むべきか。 
2.  また、逆伝播はどのように実装しているか・・・？Caffeでは手計算しかなさそう。  
https://arxiv.org/pdf/1703.07737.pdf　式(5)を微分して実装することになるか。
もしくは、http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1FilterLayer.html　をうまくつかえばできる？？

👉いけそうな未来が見えた。

以下がトリプレットロスのコード。

-----

```python
def batch_hard_triplet_loss(labels, embeddings, margin, squared=False):
    """Build the triplet loss over a batch of embeddings.

    For each anchor, we get the hardest positive and hardest negative to form a triplet.

    Args:
        labels: labels of the batch, of size (batch_size,)
        embeddings: tensor of shape (batch_size, embed_dim)
        margin: margin for triplet loss
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.

    Returns:
        triplet_loss: scalar tensor containing the triplet loss
    """
    
#1.
    # Get the pairwise distance matrix
    pairwise_dist = _pairwise_distances(embeddings, squared=squared)

#2.(a-pマスク)
    # For each anchor, get the hardest positive
    # First, we need to get a mask for every valid positive (they should have same label)
    mask_anchor_positive = _get_anchor_positive_triplet_mask(labels)
    mask_anchor_positive = tf.to_float(mask_anchor_positive)
    
#3.
    # We put to 0 any element where (a, p) is not valid (valid if a != p and label(a) == label(p))
    anchor_positive_dist = tf.multiply(mask_anchor_positive, pairwise_dist)

#5.
    # shape (batch_size, 1)
    hardest_positive_dist = tf.reduce_max(anchor_positive_dist, axis=1, keepdims=True)

#2.(a-nマスク)
    # For each anchor, get the hardest negative
    # First, we need to get a mask for every valid negative (they should have different labels)
    mask_anchor_negative = _get_anchor_negative_triplet_mask(labels)
    mask_anchor_negative = tf.to_float(mask_anchor_negative)

#4.
    # We add the maximum value in each row to the invalid negatives (label(a) == label(n))
    max_anchor_negative_dist = tf.reduce_max(pairwise_dist, axis=1, keepdims=True)
    anchor_negative_dist = pairwise_dist + max_anchor_negative_dist * (1.0 - mask_anchor_negative)

#6.
    # shape (batch_size,)
    hardest_negative_dist = tf.reduce_min(anchor_negative_dist, axis=1, keepdims=True)

#7.
    # Combine biggest d(a, p) and smallest d(a, n) into final triplet loss
    triplet_loss = tf.maximum(hardest_positive_dist - hardest_negative_dist + margin, 0.0)

    # Get final mean triplet loss
    triplet_loss = tf.reduce_mean(triplet_loss)

    return triplet_loss 
```

-----
やることをまとめると、以下。

1.  embedding出力層のoutから、全embeddeingの組合せの Distance を導出したマトリクスを作る
2.  labelから、a-pマスク・a-nマスクを作る。マスクはbackpropagationいらない。
3.  マトリクスとa-pマスクで、positive-distanceのみ残す。(filter layer)
4.  マトリクスとa-nマスクで、negative-distanceのみ残す。(filter layer)

5.  3.の出力をargmax layerで最大値のみ残す。次元削減
6.  4.の出力を x(-1) -> argmax -> x(-1) する。　(argminがない)

7.  triplet loss を各anchorに対して計算したのち　mean を出す。

ネットワークを設計すると　以下のようになる？

![network](online_mining_network.png "online mining network")



caffe の filter layerについて、mask側にはbackpropagationできない。
mask生成するpython layerが必要になりそうだが、ここでbackwardを実装する必要はないはず。
よって、mask生成プログラムのnp版がそのまま使えるはず。

pairwise distanceは、うまく今のレイヤを流用して作成する必要がありそう。
（既存のレイヤについて、出力が配列要素になるように改造すれば良いだけか？）

Triplet Lossも同様に、バッチサイズ分の次元の入力に対して出力し、meanを導出するというプロセスが必要。

negative側にはargmin layer なるものがなかったので、powerで-1倍するレイヤを挟むことによって実現する。


## 以下、まずはpython layerを作成するところから。

### pairwise distances layer (作成中)

In [None]:
%%writefile tripletloss/pairwise_distances_layer.py
import caffe
import numpy as np

class PairwiseDistancesLayer(caffe.Layer):
    """
    Compute the Triplet Loss based on the Google's FaceNet paper.
    """

    def setup(self, bottom, top):
        pass

    def reshape(self, bottom, top):
        print('# reshape start.')
        
        # self.batch_size: number of batch_size. [1]
        self.batch_size = bottom[0].data.shape[0]
        print('batch_size = {}'.format(self.batch_size))
        
        # self.diff: differences are shape of channel [channel]
        self.diff = np.zeros_like(bottom[0].data.shape[1], dtype=np.float32)
        print('diff = {}'.format(self.diff))
        
        # self.dist: distance is scalar [1]
        self.dist = np.zeros(1, dtype=np.float32)
        print('dist = {}'.format(self.dist))
        
        # normalize (# I still keep using this function.)
        self.norm = self.normalize(bottom[0].data)
        print('norm = \n{}'.format(self.norm))
        
        # pairwise distances output with shape [batch_size, batch_size]
        top[0].reshape(bottom[0].data.shape[0], bottom[0].data.shape[0])
        print('top[0] data shape = {}'.format(top[0].data.shape))
        
        print('# reshape end.')
        
    def normalize(self, array):
        # ||f(x)||_2=1
        l2 = np.linalg.norm(array, ord=2, axis=1, keepdims=True)
        # avoid to devide by zero
        l2[l2==0] = 1
        return array / l2

    def forward(self, bottom, top):
        """ computes a loss
        Note that the Loss is not averaged by the number of triplet sets.
        Loss = SUM[i->N](Di_pos - Di_neg + margin), 0 <= i <= N(the batch size)
        Dpos = sqrt(L2(IMGi_acr - IMGi_pos))
        Dneg = sqrt(L2(IMGi_acr - IMGi_neg))
        """
        
        print('# forward start.')
        
        for i in range(0, self.batch_size):
            for j in range(0, i):                
                print('i = {}'.format(i))
                print('j = {}'.format(j))
                print('self.norm = \n{}'.format(self.norm[i]))
                
                self.diff = self.norm[i] - self.norm[j]
                print('self.diff = \n{}'.format(self.diff))
                self.dist = np.sum(self.diff**2, axis=0)
                print('self.dist = {}'.format(self.dist))

                top[0].data[i,j] = self.dist
                top[0].data[j,i] = self.dist
                
        print('top[0] data = \n{}'.format(top[0].data))
        print('# forward end.')

    def backward(self, top, propagate_down, bottom): #now under constract.
        """ computes a gradient w.r.t. each IMG
        dL/dDorg = SUM[i->N]{2(IMGi_neg - IMGi_pos)} if Lossi > 0 else 0
        dL/dDpos = SUM[i->N](-2(IMGi_anc - IMGi_pos)) if Lossi > 0 else 0
        dL/dDneg = SUM[i->N](2(IMGi_anc - IMGi_neg)) if Lossi > 0 else 0
        """
        pass

#        # gradient w.r.t. Dorg
#        diff_org = self.norm_neg - self.norm_pos
#        for i in range(self.batch_size):
#            if self.loss[i] == 0:
#                diff_org[i] = 0
#        bottom[0].diff[...] = 2 * diff_org
#        print('org diff = {}'.format(bottom[0].diff))
#        
#        # gradient w.r.t. Dpos
#        for i in range(self.batch_size):
#            self.diff_pos[i] = 0
#        bottom[1].diff[...] = -2 * self.diff_pos
#        print('pos diff = {}'.format(bottom[1].diff))
#        
#        # gradient w.r.t. Dneg
#        for i in range(self.batch_size):
#            self.diff_neg[i] = 0
#        bottom[2].diff[...] = 2 * self.diff_neg
#        print('neg diff = {}'.format(bottom[2].diff))

In [None]:
caffe_root = '../'  # this file should be run from {caffe_root}/examples (otherwise change this line)

import sys
sys.path.insert(0, caffe_root + 'python')
sys.path.insert(0, caffe_root + 'examples/tripletloss')
import caffe

import tempfile
import numpy as np
from caffe import layers as L

def load_net(net_proto):
    f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
    f.write(str(net_proto))
    f.close()
    return caffe.Net(f.name, caffe.TEST)

def example_network(batch_size):
    n = caffe.NetSpec()

    # we use the dummy data layer to control the 
    # shape of the inputs to the layer we are testing
    ip_dims = [batch_size, 3]
    n.ip = L.DummyData(shape=[dict(dim=ip_dims)],
                                        transform_param=dict(scale=1.0/255.0),
                                        ntop=1)
    
    n.a_p_mask = L.Python(n.ip, ntop=1, python_param=dict(module='pairwise_distances_layer', layer='PairwiseDistancesLayer'))
    return n.to_proto()

In [None]:
IMG_ANC = [[1.0, 2.0, 3.0],
           [2.0, 3.0, 4.0],
           [3.0, 4.0, 5.0],
           [4.0, 5.0, 6.0],
           [5.0, 6.0, 7.0],
           [6.0, 7.0, 8.0]]
           
# embeddings is an 1D-array of features
# here, the size of features is 3, 3*32bit = 96bit
# (batch_size, feature_size)
embeddings = np.array([IMG_ANC], dtype=np.float32)
print('embeddings shape = {}'.format(embeddings.shape))

net_proto = example_network(6)
net = load_net(net_proto)
net.blobs['ip'].data[...] = embeddings

net.forward()

for name in net.blobs:
    print('# {}'.format(name))
    print('value = \n{}'.format(net.blobs[name].data))

In [None]:

    
net.backward()

### mean triplet loss layer

In [None]:
%%writefile tripletloss/mean_triplet_loss_layer.py
import caffe
import numpy as np

class MeanTripletLossLayer(caffe.Layer):
    """
    Compute the Triplet Loss based on the Google's FaceNet paper.
    """

    def setup(self, bottom, top):
        print('# setup start.')
        
        params = eval(self.param_str)
        try:
            self.margin = float(params['margin'])
        except:
            self.margin = 1.0
            
        print('margin = {}'.format(self.margin))
        print('# setup end.')

    def reshape(self, bottom, top):
        print('# reshape start.')
        
        # self.hardest_pos: [batch size]
        self.hardest_pos = np.zeros_like(bottom[0].data, dtype=np.float32)
        print('self.hardest_pos = {}'.format(self.hardest_pos))
        
        # self.hardest_neg: [batch size]
        self.hardest_neg = np.zeros_like(bottom[0].data, dtype=np.float32)
        print('self.hardest_neg = {}'.format(self.hardest_neg))
        
        # self.losses: [batch size]
        self.losses = np.zeros_like(bottom[0].data, dtype=np.float32)
        print('self.losses = {}'.format(self.losses))
        
        # pairwise distances output with shape [batch_size, batch_size]
        top[0].reshape(1)
        print('top[0] data shape = {}'.format(top[0].data.shape))
        
        print('# reshape end.')
        
    def forward(self, bottom, top):
        """ computes a loss
        Note that the Loss is not averaged by the number of triplet sets.
        Loss = SUM[i->N](Di_pos - Di_neg + margin), 0 <= i <= N(the batch size)
        Dpos = sqrt(L2(IMGi_acr - IMGi_pos))
        Dneg = sqrt(L2(IMGi_acr - IMGi_neg))
        """
        print('# forward start.')
        
        self.hardest_pos = bottom[0].data
        print('self.hardest_pos = {}'.format(self.hardest_pos))
        
        self.hardest_neg = bottom[1].data
        print('self.hardest_neg = {}'.format(self.hardest_neg))
        
        # Combine biggest d(a, p) and smallest d(a, n) into final triplet loss
        self.losses = np.maximum(self.hardest_pos - self.hardest_neg + self.margin, 0.0)
        print('self.losses = {}'.format(self.losses))

        # Get final mean triplet loss
        top[0].data[...] = np.mean(self.losses)
        print('loss = {}'.format(top[0].data))

        print('# forward end.')

    def backward(self, top, propagate_down, bottom): #now under constraction.
        """ computes a gradient w.r.t. each IMG
        dL/dDorg = SUM[i->N]{2(IMGi_neg - IMGi_pos)} if Lossi > 0 else 0
        dL/dDpos = SUM[i->N](-2(IMGi_anc - IMGi_pos)) if Lossi > 0 else 0
        dL/dDneg = SUM[i->N](2(IMGi_anc - IMGi_neg)) if Lossi > 0 else 0
        """
        pass

#        # gradient w.r.t. Dorg
#        diff_org = self.norm_neg - self.norm_pos
#        for i in range(self.batch_size):
#            if self.loss[i] == 0:
#                diff_org[i] = 0
#        bottom[0].diff[...] = 2 * diff_org
#        print('org diff = {}'.format(bottom[0].diff))
#        
#        # gradient w.r.t. Dpos
#        for i in range(self.batch_size):
#            self.diff_pos[i] = 0
#        bottom[1].diff[...] = -2 * self.diff_pos
#        print('pos diff = {}'.format(bottom[1].diff))
#        
#        # gradient w.r.t. Dneg
#        for i in range(self.batch_size):
#            self.diff_neg[i] = 0
#        bottom[2].diff[...] = 2 * self.diff_neg
#        print('neg diff = {}'.format(bottom[2].diff))

In [None]:
caffe_root = '../'  # this file should be run from {caffe_root}/examples (otherwise change this line)

import sys
sys.path.insert(0, caffe_root + 'python')
sys.path.insert(0, caffe_root + 'examples/tripletloss')
import caffe

import tempfile
import numpy as np
from caffe import layers as L

def load_net(net_proto):
    f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
    f.write(str(net_proto))
    f.close()
    return caffe.Net(f.name, caffe.TEST)

def example_network(batch_size):
    n = caffe.NetSpec()

    # we use the dummy data layer to control the 
    # shape of the inputs to the layer we are testing
    pos_dims = [batch_size]
    n.pos = L.DummyData(shape=[dict(dim=pos_dims)],
                                        transform_param=dict(scale=1.0/255.0),
                                        ntop=1)
    neg_dims = [batch_size]
    n.neg = L.DummyData(shape=[dict(dim=neg_dims)],
                                        transform_param=dict(scale=1.0/255.0),
                                        ntop=1)
    
    n.loss = L.Python(n.pos, n.neg, ntop=1, python_param=dict(module='mean_triplet_loss_layer', layer='MeanTripletLossLayer', param_str='{\"margin\": 2.0}'))
    return n.to_proto()

In [None]:
IMG_POS = [1.0, 2.0, 3.0, 3.0, 2.0, 1.0]
IMG_NEG = [3.0, 2.0, 1.0, 5.0, 3.0, 0.0]

# embeddings is an 1D-array of features
# here, the size of features is 3, 3*32bit = 96bit
# (batch_size, feature_size)
positives = np.array([IMG_POS], dtype=np.float32)
negatives = np.array([IMG_NEG], dtype=np.float32)

net_proto = example_network(6)
net = load_net(net_proto)
net.blobs['pos'].data[...] = positives
net.blobs['neg'].data[...] = negatives

net.forward()

for name in net.blobs:
    print('# {}'.format(name))
    print('value = \n{}'.format(net.blobs[name].data))

In [None]:

    
net.backward()

### anchor-positive & anchor-negative mask layer (完成！)

In [None]:
%%writefile tripletloss/triplet_mask_layer.py
import caffe
import numpy as np


class TripletMaskLayer(caffe.Layer):
    """
    Compute matrix shaped a-p mask and a-n mask.
    this layer is forward only.
    """

    def setup(self, bottom, top):
        print('# setup start.')
        print('# setup end.')

    def reshape(self, bottom, top):
        print('# reshape start.')
        
        # Check that i and j are distinct
        self.indices_equal = np.eye(bottom[0].data.shape[0]).astype(np.bool)
        self.indices_not_equal = np.logical_not(self.indices_equal)
        self.labels_equal = np.zeros((bottom[0].data.shape[0], bottom[0].data.shape[0])).astype(np.bool)
        
        
        print('bottom data shape = {}'.format(bottom[0].data.shape))
        print('indices_equal = \n{}'.format(self.indices_equal))
        print('indices_not_equal = \n{}'.format(self.indices_not_equal))
        print('labels_equal = \n{}'.format(self.labels_equal))
        
        # anchor-positive mask output with shape [batch_size, batch_size]
        top[0].reshape(bottom[0].data.shape[0], bottom[0].data.shape[0])
        print('top[0] data shape = {}'.format(top[0].data.shape))
        
        # anchor-negative mask output with shape [batch_size, batch_size]
        top[1].reshape(bottom[0].data.shape[0], bottom[0].data.shape[0])
        print('top[1] data shape = {}'.format(top[1].data.shape))
      
        print('# reshape end.')

        

    def forward(self, bottom, top):
        """1. make a 2D mask where mask[a, p] is True if a and p are distinct and have same label.
           2. make a 2D mask where mask[a, n] is True if a and n have distinct labels.
        Args:
            labels: np.int32 `ndarray` with shape [batch_size]
        Returns:
            2 masks: np.bool `ndarray` with shape [batch_size, batch_size]
            top[0]: anchor-positive mask.
            top[1]: anchor-negative mask.
        """
        print('# forward start.')
        
        # Check if labels[i] == labels[j]
        # Uses broadcasting where the 1st argument has shape (1, batch_size) and the 2nd (batch_size, 1)
        self.labels_equal = np.equal(np.expand_dims(bottom[0].data, 0), np.expand_dims(bottom[0].data, 1))
        
        print('labels_equal = \n{}'.format(self.labels_equal))
        
        # Combine the two masks
        top[0].data[...] = np.logical_and(self.indices_not_equal, self.labels_equal)
        top[1].data[...] = np.logical_not(self.labels_equal)
        
        print('a-p mask = \n{}'.format(top[0].data))
        print('a-n mask = \n{}'.format(top[0].data))
        
        print('# forward end.')

    def backward(self, top, propagate_down, bottom):
        # this layer is forward only.
        pass

-----
マスクレイヤが正しく動いているかをチェック。Hello test forward

In [None]:
caffe_root = '../'  # this file should be run from {caffe_root}/examples (otherwise change this line)

import sys
sys.path.insert(0, caffe_root + 'python')
sys.path.insert(0, caffe_root + 'examples/tripletloss')
import caffe

In [None]:
import tempfile
import numpy as np
from caffe import layers as L

def load_net(net_proto):
    f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
    f.write(str(net_proto))
    f.close()
    return caffe.Net(f.name, caffe.TEST)

def example_network(batch_size):
    n = caffe.NetSpec()

    # we use the dummy data layer to control the 
    # shape of the inputs to the layer we are testing
    ip_dims = [batch_size, 1]
    label_dims = [batch_size]
    n.label = L.DummyData(shape=[dict(dim=label_dims)],
                                 transform_param=dict(scale=1.0/255.0),
                                 ntop=1)
    
    n.a_p_mask, n.a_n_mask = L.Python(n.label, ntop=2, python_param=dict(module='triplet_mask_layer', layer='TripletMaskLayer'))
    return n.to_proto()

In [None]:
IMG_LABEL = [1, 2, 3, 4, 2, 2]

label_data = np.array(IMG_LABEL)
print('label_data shape = {}'.format(label_data.shape))


net_proto = example_network(label_data.shape[0])
net = load_net(net_proto)
net.blobs['label'].data[...] = label_data
net.forward()

for name in net.blobs:
    print('# {}'.format(name))
    print('value = \n{}'.format(net.blobs[name].data))

# Networkを設計する

In [None]:
%%writefile tripletloss/mnist_omoindrot_tripletloss_train_test_10.prototxt
name: "mnist_tripletloss_train_test_10"
layer {
  name: "triplet_data"
  type: "ImageData"
  top: "triplet_data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    scale: 0.00390625
  }
  image_data_param {
    source: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/trainlist_64.txt"
    batch_size: 64
  }
}
layer {
  name: "triplet_data"
  type: "ImageData"
  top: "triplet_data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    scale: 0.00390625
  }
  image_data_param {
    source: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/trainlist_64.txt"
    batch_size: 64
  }
}
################# CNN #############
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "triplet_data"
  top: "conv1"
  param {
    name: "conv1_w"
    lr_mult: 1
  }
  param {
    name: "conv1_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    name: "conv2_w"
    lr_mult: 1
  }
  param {
    name: "conv2_b"
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "ip1"
  param {
    name: "ip1_w"
    lr_mult: 1
  }
  param {
    name: "ip1_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "feat"
  param {
    name: "ip2_w"
    lr_mult: 1
  }
  param {
    name: "ip2_b"
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}

############# L2 Normalization ############

layer {
  name: "Reduction1"
  type: "Reduction"
  bottom: "feat"
  top: "Reduction1"
  reduction_param {
    operation: SUMSQ
    axis: 1
  }
}
layer {
  name: "Power1"
  type: "Power"
  bottom: "Reduction1"
  top: "Power1"
  power_param {
    power: -0.5
  }
}
layer {
  name: "Reshape1"
  type: "Reshape"
  bottom: "Power1"
  top: "Reshape1"
  reshape_param {
    shape {
      dim: 1
    }
    axis: -1
    num_axes: 0
  }
}
layer {
  name: "Tile1"
  type: "Tile"
  bottom: "Reshape1"
  top: "Tile1"
  tile_param {
    axis: 1
    tiles: 10
  }
}
layer {
  name: "norm"
  type: "Eltwise"
  bottom: "feat"
  bottom: "Tile1"
  top: "embedding"
  eltwise_param {
    operation: PROD
  }
}

############# pairwise distance ###############
layer {
  name: "pairwise_dist"
  type: "Python"
  bottom: "embedding"
  top: "dist_matrix"
  python_param {
    module: "pairwise_distances_layer"
    layer: "PairwiseDistancesLayer"
    param_str: '{\"margin\": 1.0}'
  }
}

############# anc-pos/anc-neg mask ###############

layer {
  name: "mask"
  type: "Python"
  bottom: "label"
  top: "a_p_mask"
  top: "a_n_mask"
  python_param {
    module: "triplet_mask_layer"
    layer: "TripletMaskLayer"
  }
}

############# anc-pos dist ###############

layer {
  name: "pos_filter"
  type: "Eltwise"
  bottom: "dist_matrix"
  bottom: "a_p_mask"
  top: "pos_survive"
  eltwise_param { operation: PROD }
}

layer {
  name: "pos_argmax"
  type: "ArgMax"
  bottom: "pos_survive"
  top: "pos_dist"
  argmax_param {
  axis: 1
  }
}

############# anc-neg dist ###############

layer {
  name: "neg_filter"
  type: "Eltwise"
  bottom: "dist_matrix"
  bottom: "a_n_mask"
  top: "neg_survive"
  eltwise_param { operation: PROD }
}


layer {
  name: "scale1"
  bottom: "neg_survive"
  top: "neg_minus"
  type: "Power"
  power_param {
    scale: -1
  }
}

layer {
  name: "neg_argmax"
  type: "ArgMax"
  bottom: "neg_minus"
  top: "neg_argmax"
  argmax_param {
  axis: 1
  }
}

layer {
  name: "scale2"
  bottom: "neg_argmax"
  top: "neg_dist"
  type: "Power"
  power_param {
    scale: -1
  }
}

############# loss ###############

layer {
  name: "mean_triplet_loss"
  type: "Python"
  bottom: "pos_dist"
  bottom: "neg_dist"
  top: "loss"
  python_param {
    module: "mean_triplet_loss_layer"
    layer: "MeanTripletLossLayer"
    param_str: '{\"debug\": 1.0}'
  }
}



In [None]:
caffe_root = '../'  # this file should be run from {caffe_root}/examples (otherwise change this line)

import sys
sys.path.insert(0, caffe_root + 'python')
sys.path.insert(0, caffe_root + 'examples/tripletloss')
import caffe

In [None]:
from mnist import MNIST
import os
mnist_data_dir = os.path.join(caffe_root, 'data/mnist')
mndata = MNIST(mnist_data_dir)
images, labels = mndata.load_training()
print('loaded {} images, {} labels'.format(len(images), len(labels)))
print('sample image at 0 = {}'.format(images[0]))

In [None]:
import numpy as np
import cv2
from StringIO import StringIO

img_dir = '/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/images'
if not os.path.exists(img_dir):
    os.makedirs(img_dir)

# create a training list
triplet_dict = {'anchor': None, 'positive': None, 'negative': None}
anchor_list = StringIO()
pos_list = StringIO()
neg_list = StringIO()
batch_str = StringIO()
triplet_no = 0
batch_size = 0
for i, l in zip(images, labels):
    array = np.array(i)
    img = array.reshape((28, 28))
    
    if triplet_dict['anchor'] is None:
        # this becomes an anchor
        triplet_dict['anchor'] = [img, l]
    elif triplet_dict['positive'] is None:
        # check if this is the same label
        if triplet_dict['anchor'][1] == l:
            # this becomes a postive one
            triplet_dict['positive'] = [img, l]
    elif triplet_dict['anchor'][1] != l:
        # this becomes a negative one
        triplet_dict['negative'] = [img, l]
        
    if triplet_dict['negative'] is None:
        continue
    
    # write
    anchor_path = os.path.join(img_dir, '{}_anchor.jpg'.format(triplet_no))
    pos_path = os.path.join(img_dir, '{}_positive.jpg'.format(triplet_no))
    neg_path = os.path.join(img_dir, '{}_negative.jpg'.format(triplet_no))
    
    # image
    cv2.imwrite(anchor_path, triplet_dict['anchor'][0])
    cv2.imwrite(pos_path, triplet_dict['positive'][0])
    cv2.imwrite(neg_path, triplet_dict['negative'][0])
    
    # sample
    anchor_list.write('{} {}\n'.format(anchor_path, triplet_dict['anchor'][1]))
    pos_list.write('{} {}\n'.format(pos_path, triplet_dict['positive'][1]))
    neg_list.write('{} {}\n'.format(neg_path, triplet_dict['negative'][1]))
    
    # reset
    triplet_dict['anchor'] = None
    triplet_dict['positive'] = None
    triplet_dict['negative'] = None
    
    triplet_no += 1
    batch_size += 1
    
    if batch_size == 64:
        # write anchors first
        batch_str.write(anchor_list.getvalue())
        anchor_list.close()
        anchor_list = StringIO()
        # positive
        batch_str.write(pos_list.getvalue())
        pos_list.close()
        pos_list = StringIO()
        # negative
        batch_str.write(neg_list.getvalue())
        neg_list.close()
        neg_list = StringIO()
        # reset
        batch_size = 0
    
# finally, write sample list
with open(os.path.join(img_dir, '../' ,'trainlist_64.txt'), 'w') as f:
    f.write(batch_str.getvalue())
    batch_str.close()
    anchor_list.close()
    pos_list.close()
    neg_list.close()

In [None]:
%%writefile tripletloss/mnist_tripletloss_solver_10.prototxt
# The train/test net protocol buffer definition
train_net: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist_omoindrot_tripletloss_train_test_10.prototxt"
test_net: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist_omoindrot_tripletloss_train_test_10.prototxt"
# samples = 192 * 77 = 14784
test_iter: 77
# test at every epoch
test_interval: 77
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every epoch
display: 77
# The maximum number of iterations = 10 epochs
max_iter: 770
# snapshot intermediate results at every epoch
snapshot: 77
snapshot_prefix: "/home/researcher/caffe-tripletloss/examples/tripletloss/mnist/mnist_tripletloss"

In [None]:
caffe.set_device(0)
caffe.set_mode_gpu()

# reset solver to avoid a continuous training over multiple runs
solver = None
solver = caffe.SGDSolver('/home/researcher/caffe-tripletloss/examples/tripletloss/mnist_tripletloss_solver_10.prototxt')

In [None]:
# each output is (batch size, feature dim, spatial dim)
[(k, v.data.shape) for k, v in solver.net.blobs.items()]

In [None]:
# just print the weight sizes (we'll omit the biases)
[(k, v[0].data.shape) for k, v in solver.net.params.items()]

In [None]:
%%time
itr_per_epoch = 77
niter = itr_per_epoch * 20

train_loss = np.zeros(niter)

# the main solver loop
for it in range(niter):
    solver.step(1)  # SGD by Caffe
    
    # store the train loss
    loss = solver.net.blobs['loss'].data
    
    # output every epoch
    if it % itr_per_epoch == 0:
        print('loss at epoch {} = {}'.format(it/itr_per_epoch, loss))
    
    train_loss[it] = loss