In [None]:
%matplotlib inline

import os 
import sys
import numpy as np
import time
import imp
import cv2
import tensorflow as tf

sys.path.append('.')
from datasets.motdb import MotDB
from track_rcnn.layer import roi_pool
sys.path.append('./caffe-tensorflow')
from kaffe.tensorflow import Network

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches
from JSAnimation.IPython_display import display_animation

sess = tf.InteractiveSession()

In [None]:
# Hyperparameters
n_ims = 2
max_h = 368
max_w = 496

spatial_scale = 1/16.
pooled_h = 6
pooled_w = 6

n_rois = 6

In [None]:
# Define the network
class AlexNetConv5(Network):
    def setup(self):
        (self.feed('data')
             .conv(11, 11, 96, 4, 4, padding='VALID', name='conv1')
             .lrn(2, 2e-05, 0.75, name='norm1')
             .max_pool(3, 3, 2, 2, padding='VALID', name='pool1')
             .conv(5, 5, 256, 1, 1, group=2, name='conv2')
             .lrn(2, 2e-05, 0.75, name='norm2')
             .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
             .conv(3, 3, 384, 1, 1, name='conv3')
             .conv(3, 3, 384, 1, 1, group=2, name='conv4')
             .conv(3, 3, 256, 1, 1, group=2, name='conv5'))
        
class CaffeNetConv5(Network):
    def setup(self):
        (self.feed('data')
             .conv(11, 11, 96, 4, 4, padding='VALID', name='conv1')
             .max_pool(3, 3, 2, 2, padding='VALID', name='pool1')
             .lrn(2, 2e-05, 0.75, name='norm1')
             .conv(5, 5, 256, 1, 1, group=2, name='conv2')
             .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
             .lrn(2, 2e-05, 0.75, name='norm2')
             .conv(3, 3, 384, 1, 1, name='conv3')
             .conv(3, 3, 384, 1, 1, group=2, name='conv4')
             .conv(3, 3, 256, 1, 1, group=2, name='conv5'))
        
class VGG16Conv5(Network):
    def setup(self):
        (self.feed('data')
             .conv(3, 3, 64, 1, 1, name='conv1_1')
             .conv(3, 3, 64, 1, 1, name='conv1_2')
             .max_pool(2, 2, 2, 2, name='pool1')
             .conv(3, 3, 128, 1, 1, name='conv2_1')
             .conv(3, 3, 128, 1, 1, name='conv2_2')
             .max_pool(2, 2, 2, 2, name='pool2')
             .conv(3, 3, 256, 1, 1, name='conv3_1')
             .conv(3, 3, 256, 1, 1, name='conv3_2')
             .conv(3, 3, 256, 1, 1, name='conv3_3')
             .max_pool(2, 2, 2, 2, name='pool3')
             .conv(3, 3, 512, 1, 1, name='conv4_1')
             .conv(3, 3, 512, 1, 1, name='conv4_2')
             .conv(3, 3, 512, 1, 1, name='conv4_3')
             .max_pool(2, 2, 2, 2, name='pool4')
             .conv(3, 3, 512, 1, 1, name='conv5_1')
             .conv(3, 3, 512, 1, 1, name='conv5_2')
             .conv(3, 3, 512, 1, 1, name='conv5_3'))

# Compute the feature

In [None]:
# Define Tensorflow Graph
im_input = tf.placeholder(tf.float32, [n_ims, max_h, max_w, 3])
rois_input = tf.placeholder(tf.float32, [n_rois, 5])
net = AlexNetConv5({'data': im_input})
conv5 = net.get_output()

pool5, bins = roi_pool(conv5, rois_input,
                               spatial_scale, pooled_h, pooled_w, debug=True)

In [None]:
# Read images
im_names = ['cat.jpg', 'fish-bike.jpg']
ims = np.zeros([n_ims, max_h, max_w, 3])
for i in range(n_ims):
    im = cv2.imread('data/'+im_names[i])
    ims[i, :im.shape[0], :im.shape[1], :] = im
    
# Define RoIs
rois = np.zeros([n_rois, 5])

rois[:4, :] = np.array([[0, 100, 50, 204, 152]])
rois[1, :] = np.array([[0, 20, 20, 5104, 83]])
rois[4, :] = np.array([1, 100, 200, 200, 300])
rois[5, :] = np.array([1, 200, 50, 400, 150])

# Run
net.load('data/tensorflow_models/alexnet.npy', sess, ignore_missing=True)
conv5_data, pool5_data, bins_data = sess.run([conv5, pool5, bins],
                                             feed_dict={im_input: ims, rois_input: rois})
print(conv5_data.shape)

In [None]:
# Visualization

n_channels = 10
channel = 20
for roi_idx in range(n_rois):
    fig = plt.figure(figsize=(10, 5))
    ax1 = fig.add_subplot(1, 3, 1)
    ax2 = fig.add_subplot(1, 3, 2)
    ax3 = fig.add_subplot(1, 3, 3)
    
    # Plot image
    im_idx = rois[roi_idx, 0]
    ax1.imshow(cv2.cvtColor(ims[im_idx, :, :, :].astype('uint8'), cv2.COLOR_BGR2RGB))
    x1, y1, x2, y2 = rois[roi_idx, 1:]
    ax1.add_patch(patches.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, linewidth=1, edgecolor='r'))
    
    for ph in range(pooled_h):
        for pw in range(pooled_w):
            x1, y1, x2, y2 = bins_data[roi_idx, ph, pw, :] / spatial_scale
            ax1.add_patch(patches.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, linewidth=1, edgecolor='b'))
            
    # Plot conv5 feature (mean of multiple channels)
    ax2.imshow(np.mean(conv5_data[im_idx, :, :, :200], axis=2), cmap='Greys_r')
    
    for ph in range(pooled_h):
        for pw in range(pooled_w):
            x1, y1, x2, y2 = bins_data[roi_idx, ph, pw, :]
            ax2.add_patch(patches.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, linewidth=1, edgecolor='b'))
    
    # Plot the pool5
    ax3.imshow(np.mean(pool5_data[roi_idx, :, :, :200], axis=2), cmap='Greys_r')
    print(conv5_data.shape)