In [1]:
import tensorflow as tf
import os
import numpy as np
from pyramda import compose, curry
from PIL import Image
from tqdm import tqdm_notebook, tqdm

### Utils

In [2]:
def filter_list(refs, condition, l):
    if condition == True:
        return list(filter(lambda x: x in refs, l))
    else:
        return list(filter(lambda x: x not in refs, l))
    
filter_list = curry(filter_list)

In [3]:
def cosine_distance(a, b):
    return 1 - np.dot(a, b.T)

In [4]:
add = curry(tf.add)
subtract_from = curry(tf.subtract)
matmul = curry(tf.matmul)
maximum = curry(tf.maximum)

### Session

In [5]:
sess = tf.Session()

### Load model

In [6]:
def cosine_distance_tf(a, b):
    return compose(
        subtract_from(1.),
        matmul(a),
        tf.transpose,
    )(b)

In [7]:
def compute_cost(outputs, alpha):
    return compose(
        maximum(0.),
        tf.reduce_sum,
        add(alpha),
        subtract_from(cosine_distance_tf(outputs[0], outputs[1])),
    )(
        cosine_distance_tf(outputs[0], outputs[2]),
    )

In [8]:
def load_model(checkpoint_filename, input_name="images", output_name="features", alpha=0.):
    with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(file_handle.read())
    
    tf.import_graph_def(graph_def, name="net")
    
    input_vars = []
    output_vars = []
    for i in range(3):
        input_vars.append(tf.get_default_graph().get_tensor_by_name("net/%s:0" % input_name))
        output_vars.append(tf.get_default_graph().get_tensor_by_name("net/%s:0" % output_name))
        
    cost = compute_cost(output_vars, alpha)
    
    return input_vars, output_vars, cost

In [9]:
inputs, outputs, cost = load_model('./models/deep_sort_cnn/mars-small128.pb')

In [10]:
single_input = inputs[0]
single_output = outputs[0]

### Forward pass

In [11]:
def forward_pass(path, dirs, num_per_class):
    """
    Get features from the base convolutional net for random images
    
    Parameters:
    -----------
    - path: string
      Path to the image source on disk. 
      Source directory should be divided in directories, one directory per class.
    - dirs: [string]
      List of directories contained in the path. Correspond to classes.
    - num_per_class: int
      Number of images randomly chosen from each class
      
    Returns:
    --------
    - samples: ndarray (N, H, W, C)
      Numpy array of randomly chosen images resized according to model's input shape.
      N - number of samples
      H - height
      W - width
      C - number odf channels
    - features: ndarray (N, M)
      Numpy array of features inferred by the base convolutional network from samples
      N - number of samples
      M - size of the convnet output
    """
    
    features = np.zeros((num_per_class * len(dirs), single_output.shape[1]))
    samples = np.zeros((num_per_class * len(dirs), *single_input.shape[1:]))
    
    for i, dir_name in enumerate(tqdm(dirs)):
        dir_path = os.path.join(path, dir_name)
        filenames = os.listdir(dir_path)
        filenames = np.random.choice(filenames, 10)
        
        batch = np.zeros((num_per_class, *single_input.shape[1:]))
        
        for j, filename in enumerate(filenames):
            img = Image.open(os.path.join(dir_path, filename))
            img = img.resize((single_input.shape[2], single_input.shape[1]))
            img = np.array(img)
            batch[j,:,:,:] = img
        
        batch_features = sess.run(single_output, {
            single_input: batch,
        })
        
        samples[i*num_per_class: i*num_per_class + num_per_class, :, :, :] = batch
        features[i*num_per_class: i*num_per_class + num_per_class, :] = batch_features
    
    return samples, features

### Load batch

In [12]:
def load_batch(path, dirs, threshold, num_per_class, max_num_tiplets):
    """
    Get batch of triplets
    
    Parameters:
    -----------
    - path: string
      Path to the image source on disk. 
      Source directory should be divided in directories, one directory per class.
    - dirs: [string]
      List of directories contained in the path. Correspond to classes.
    - threshold: float
      Threshold that should divide positive and negative samples. 
      Used to choose ngative samples for training
    - num_per_class: int
      Number of images randomly chosen from each class
    - max_num_tiplets: int
      Maximum number of triplets per class. Actual number of triplets is max(max_num_tiplets, number of hard negatives)
      
    Returns:
    --------
    triplets: ndarry (N, 3, H, W, C)
      Numpy array of randomly chosen triplets.
      N - number of samples
      3 - three samples: anchor, positive, negative
      H - height
      W - width
      C - number odf channels
    """
    
    samples, features = forward_pass(path, dirs, num_per_class=num_per_class)
    
    triplets = []
    for i in range(len(dirs)):
        #get negative samples
        negatives = []
        index = np.random.randint(samples.shape[0])
        for j in range(1000):
            neg_index = np.random.randint(samples.shape[0])
            if (abs(neg_index - index) >= num_per_class) & (cosine_distance(features[index,:], features[neg_index,:]) > threshold):
                negatives.append(samples[neg_index,:,:,:])
                if (len(negatives) == max_num_tiplets):
                    break
                    
        #get positive samples
        class_start_index = index - index % num_per_class
        positives = []
        while len(positives) < len(negatives):
            for j in range(class_start_index, class_start_index + num_per_class):
                if j != index:
                    positives.append(samples[j,:,:,:])
                    if len(positives) == len(negatives):
                        break
        
        #get copies of anchor
        anchor = np.zeros_like(negatives)
        for j in range(len(negatives)):
            anchor[j,:] = samples[index,:,:,:]
            
        triplets.append(np.stack([anchor, np.array(positives), np.array(negatives)], axis=1))
    
    return np.concatenate(triplets, axis=0)

### Training

In [13]:
path = '../input/mars/bbox_train/'
dirs = compose(
    filter_list(['.DS_Store'], False),
    os.listdir,
)(path)

In [14]:
def learn_on_batch(path, dirs):
    triplets = load_batch(path, dirs[0:5], threshold=0.2, num_per_class=20, max_num_tiplets=10)
    
    #sess.run([inputs, outputs, cost], )

In [15]:
learn_on_batch(path, dirs)

100%|██████████| 5/5 [00:01<00:00,  2.89it/s]
