# COMP5329 - Deep Learning 

## Tutorial 8 - Deep Learning Applications

**Semester 1, 2018**

**Objectives:**

* To learn about the basic idea of deep learning based object detection algorithms. 
* To learn about how to use a RCNN to implement object detection tasks. 

**Instructions:**

* Install the tflearn library by typing command 'pip install tflearn' under your tensorflow environment.
* Install the selectivesearch by typing command 'pip install selectivesearch' under your tensorflow environment.
* Install the sklearn by typing command 'pip install sklearn' under your tensorflow environment.

* Exercises to be completed on IPython notebook such as web-based TMPNB Ipython notebook(https://tmpnb.org).
* Go to File->Open. Drag and drop "Deep_Learning_Applications.ipynb"(with '') file to the home interface and click upload.
* To run the cell you can press Ctrl-Enter or hit the Play button at the top. 

Lecturers: Chang Xu

Tutors: 


## Deep Learning Application - Object Detection

## Training AlexNet on flower17

Training an AlexNet on flower17 dataset (https://github.com/ck196/tensorflow-alexnet). The pre-trained models can be download from https://drive.google.com/drive/folders/1dc8Q5pAIqtfw3oxZi275Ctj-wKBac-aV?usp=sharing (model_save.model). It might take 1 hour on a single GPU.

In [None]:
import pickle
import numpy as np 
import selectivesearch
from PIL import Image
import os.path
import skimage

import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression

def pil_to_nparray(pil_image):
    pil_image.load()
    return np.asarray(pil_image, dtype="float32")

def resize_image(in_image, new_width, new_height, out_image=None,
                 resize_mode=Image.ANTIALIAS):
    img = in_image.resize((new_width, new_height), resize_mode)
    if out_image:
        img.save(out_image)
    return img

# IOU Part 1
def if_intersection(xmin_a, xmax_a, ymin_a, ymax_a, xmin_b, xmax_b, ymin_b, ymax_b):
    if_intersect = False
    if xmin_a < xmax_b <= xmax_a and (ymin_a < ymax_b <= ymax_a or ymin_a <= ymin_b < ymax_a):
        if_intersect = True
    elif xmin_a <= xmin_b < xmax_a and (ymin_a < ymax_b <= ymax_a or ymin_a <= ymin_b < ymax_a):
        if_intersect = True
    elif xmin_b < xmax_a <= xmax_b and (ymin_b < ymax_a <= ymax_b or ymin_b <= ymin_a < ymax_b):
        if_intersect = True
    elif xmin_b <= xmin_a < xmax_b and (ymin_b < ymax_a <= ymax_b or ymin_b <= ymin_a < ymax_b):
        if_intersect = True
    else:
        return False
    if if_intersect == True:
        x_sorted_list = sorted([xmin_a, xmax_a, xmin_b, xmax_b])
        y_sorted_list = sorted([ymin_a, ymax_a, ymin_b, ymax_b])
        x_intersect_w = x_sorted_list[2] - x_sorted_list[1] 
        y_intersect_h = y_sorted_list[2] - y_sorted_list[1]
        area_inter = x_intersect_w * y_intersect_h
        return area_inter

# IOU Part 2
def IOU(ver1, vertice2):
    # vertices in four points
    vertice1 = [ver1[0], ver1[1], ver1[0]+ver1[2], ver1[1]+ver1[3]]
    area_inter = if_intersection(vertice1[0], vertice1[2], vertice1[1], vertice1[3], vertice2[0], vertice2[2], vertice2[1], vertice2[3])
    if area_inter:
        area_1 = ver1[2] * ver1[3] 
        area_2 = vertice2[4] * vertice2[5] 
        iou = float(area_inter) / (area_1 + area_2 - area_inter)
        return iou
    return False

# Clip Image
def clip_pic(img, rect):
    x = rect[0]
    y = rect[1]
    w = rect[2]
    h = rect[3]
    x_1 = x + w
    y_1 = y + h
    return img[x:x_1, y:y_1, :], [x, y, x_1, y_1, w, h]

# Read in data and save data for Alexnet
def load_train_proposals(datafile, num_clss, threshold = 0.5, svm = False, save=False, save_path='dataset.pkl'):
    train_list = open(datafile,'r')
    labels = []
    images = []
    for line in train_list:
        tmp = line.strip().split(' ')
        # tmp0 = image address
        # tmp1 = label
        # tmp2 = rectangle vertices
        img = skimage.io.imread(tmp[0])
        img_lbl, regions = selectivesearch.selective_search(
                               img, scale=500, sigma=0.9, min_size=10)
        candidates = set()
        for r in regions:
            if r['rect'] in candidates:
                continue
            if r['size'] < 220:
                continue
            proposal_img, proposal_vertice = clip_pic(img, r['rect'])
            if len(proposal_img) == 0:
                continue
            # Ignore things contain 0 or not C contiguous array
            x, y, w, h = r['rect']
            if w == 0 or h == 0:
                continue
            # Check if any 0-dimension exist
            [a, b, c] = np.shape(proposal_img)
            if a == 0 or b == 0 or c == 0:
                continue
            im = Image.fromarray(proposal_img)
            resized_proposal_img = resize_image(im, 224, 224)
            candidates.add(r['rect'])
            img_float = pil_to_nparray(resized_proposal_img)
            images.append(img_float)
            # IOU
            ref_rect = tmp[2].split(',')
            ref_rect_int = [int(i) for i in ref_rect]
            iou_val = IOU(ref_rect_int, proposal_vertice)
            # labels, let 0 represent default class, which is background
            index = int(tmp[1])
            if svm == False:
                label = np.zeros(num_clss+1)
                if iou_val < threshold:
                    label[0] = 1
                else:
                    label[index] = 1
                labels.append(label)
            else:
                if iou_val < threshold:
                    labels.append(0)
                else:
                    labels.append(index)
    if save:
        pickle.dump((images, labels), open(save_path, 'wb'))
    return images, labels

def load_from_pkl(dataset_file):
    X, Y = pickle.load(open(dataset_file, 'rb'))
    return X,Y

def load_image(img_path):
    img = Image.open(img_path)
    return img

def resize_image(in_image, new_width, new_height, out_image=None,
                 resize_mode=Image.ANTIALIAS):
    img = in_image.resize((new_width, new_height), resize_mode)
    if out_image:
        img.save(out_image)
    return img

def pil_to_nparray(pil_image):
    pil_image.load()
    return np.asarray(pil_image, dtype="float32")

def load_data(datafile, num_clss, save=False, save_path='dataset.pkl'):
    train_list = open(datafile,'r')
    labels = []
    images = []
    for line in train_list:
        tmp = line.strip().split(' ')
        fpath = tmp[0]
        print(fpath)
        img = load_image(fpath)
        img = resize_image(img,224,224)
        np_img = pil_to_nparray(img)
        images.append(np_img)

        index = int(tmp[1])
        label = np.zeros(num_clss)
        label[index] = 1
        labels.append(label)
    if save:
        pickle.dump((images, labels), open(save_path, 'wb'))
    return images, labels
 
def create_alexnet(num_classes):
    # Building 'AlexNet'
    network = input_data(shape=[None, 224, 224, 3])
    network = conv_2d(network, 96, 11, strides=4, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = conv_2d(network, 256, 5, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = conv_2d(network, 384, 3, activation='relu')
    network = conv_2d(network, 384, 3, activation='relu')
    network = conv_2d(network, 256, 3, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = fully_connected(network, 4096, activation='tanh')
    network = dropout(network, 0.5)
    network = fully_connected(network, 4096, activation='tanh')
    network = dropout(network, 0.5)
    network = fully_connected(network, num_classes, activation='softmax')
    network = regression(network, optimizer='momentum',
                         loss='categorical_crossentropy',
                         learning_rate=0.001)
    return network

def train(network, X, Y):
    # Training
    model = tflearn.DNN(network, checkpoint_path='model_alexnet',
                        max_checkpoints=1, tensorboard_verbose=2, tensorboard_dir='output')
    if os.path.isfile('model_save.model'):
        model.load('model_save.model')
    model.fit(X, Y, n_epoch=100, validation_set=0.1, shuffle=True,
              show_metric=True, batch_size=64, snapshot_step=200,
              snapshot_epoch=False, run_id='alexnet_oxflowers17') # epoch = 1000
    # Save the model
    model.save('model_save.model')

def predict(network, input_data, modelfile,images):
    model = tflearn.DNN(network)
    model.load(modelfile)
    return model.predict(images)

  from ._conv import register_converters as _register_converters


In [None]:
#Uncomment below codes to train AlexNet yourselves.
#X, Y = load_data('train_list.txt', 17)
#net = create_alexnet(17)
#train(net,X,Y)## Training AlexNet on flower17


## Fine-tune AlexNet on traget dataset

Fine-tuning the pre-trained AlexNet on the given dataset. The fine-tune model can be download from https://drive.google.com/drive/folders/1dc8Q5pAIqtfw3oxZi275Ctj-wKBac-aV?usp=sharing (fine_tune_model_save.model.*). It might take 20 mins on a single GPU.

In [None]:
def fine_tune_Alexnet(network, X, Y):
    # Training
    model = tflearn.DNN(network, checkpoint_path='rcnn_model_alexnet',
                        max_checkpoints=1, tensorboard_verbose=2, tensorboard_dir='output_RCNN')
    print("Loading the alexnet")
    model.load('model_save.model')
    model.fit(X, Y, n_epoch=10, validation_set=0.1, shuffle=True,
              show_metric=True, batch_size=64, snapshot_step=200,
              snapshot_epoch=False, run_id='alexnet_rcnnflowers2') # epoch = 1000
    # Save the model
    model.save('fine_tune_model_save.model')
    
#Uncomment below codes to fine-tune AlexNet on tragets dataset.
#print("Loading Data")
#X, Y = load_from_pkl('dataset.pkl')
#restore = False
#net = create_alexnet(3, restore)
#fine_tune_Alexnet(net,X,Y)
## Fine-tune AlexNet on traget dataset

## Extracting proposals and training classifiers (SVMs employed here)

In [None]:
from __future__ import division, print_function, absolute_import
import pickle
import numpy as np 
import selectivesearch
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import os.path
import skimage
from sklearn import svm
import preprocessing_RCNN as prep
import os

import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression

# Load testing images
def resize_image(in_image, new_width, new_height, out_image=None,
                 resize_mode=Image.ANTIALIAS):
    img = in_image.resize((new_width, new_height), resize_mode)
    if out_image:
        img.save(out_image)
    return img

def pil_to_nparray(pil_image):
    pil_image.load()
    return np.asarray(pil_image, dtype="float32")

def image_proposal(img_path):
    img = skimage.io.imread(img_path)
    img_lbl, regions = selectivesearch.selective_search(
                       img, scale=500, sigma=0.9, min_size=10)
    candidates = set()
    images = []
    vertices = []
    for r in regions:
        if r['rect'] in candidates:
            continue
        if r['size'] < 220:
            continue
        proposal_img, proposal_vertice = clip_pic(img, r['rect'])
        # Delete Empty array
        if len(proposal_img) == 0:
            continue
        # Ignore things contain 0 or not C contiguous array
        x, y, w, h = r['rect']
        if w == 0 or h == 0:
            continue
        # Check if any 0-dimension exist
        [a, b, c] = np.shape(proposal_img)
        if a == 0 or b == 0 or c == 0:
            continue
        im = Image.fromarray(proposal_img)
        resized_proposal_img = resize_image(im, 224, 224)
        candidates.add(r['rect'])
        img_float = pil_to_nparray(resized_proposal_img)
        images.append(img_float)
        vertices.append(r['rect'])
    return images, vertices

# Load training images
def generate_single_svm_train(one_class_train_file):
    trainfile = one_class_train_file
    savepath = one_class_train_file.replace('txt', 'pkl')
    images = []
    Y = []
    if os.path.isfile(savepath):
        print("restoring svm dataset " + savepath)
        images, Y = load_from_pkl(savepath)
    else:
        print("loading svm dataset " + savepath)
        images, Y = load_train_proposals(trainfile, 2, threshold=0.3, svm=True, save=True, save_path=savepath)
    return images, Y
    
# Use a already trained alexnet with the last layer redesigned
def create_alexnet_features(num_classes, restore=False):
    # Building 'AlexNet'
    network = input_data(shape=[None, 224, 224, 3])
    network = conv_2d(network, 96, 11, strides=4, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = conv_2d(network, 256, 5, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = conv_2d(network, 384, 3, activation='relu')
    network = conv_2d(network, 384, 3, activation='relu')
    network = conv_2d(network, 256, 3, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = fully_connected(network, 4096, activation='tanh')
    network = dropout(network, 0.5)
    network = fully_connected(network, 4096, activation='tanh')
    network = regression(network, optimizer='momentum',
                         loss='categorical_crossentropy',
                         learning_rate=0.001)
    return network

# Construct cascade svms

def train_svms(train_file_folder, model):
    listings = os.listdir(train_file_folder)
    svms = []
    for train_file in listings:
        if "pkl" in train_file:
            continue
        X, Y = generate_single_svm_train(train_file_folder+train_file)
        train_features = []
        for i in X:
            feats = model.predict([i])
            train_features.append(feats[0])
        print("feature dimension")
        print(np.shape(train_features))
        clf = svm.LinearSVC()
        print("fit svm")
        clf.fit(train_features, Y)
        svms.append(clf)
    return svms

In [None]:
train_file_folder = 'svm_train/'
# create and save proposals
net = create_alexnet_features(3)
model = tflearn.DNN(net)
model.load('fine_tune_model_save.model')
# training svm classfiers to classify selected proposals
svms = train_svms(train_file_folder, model)

## Object Detection

Given a test image, we first select multiple proposals using selective search. Then, the fine-tuned AlexNet are utilized to extract features of these proposals. Finally, trained classifiers (i.e., svm) are used to detect objects.

In [None]:
img_path = 'testimg.jpg'
imgs, verts = image_proposal(img_path)
features = model.predict(imgs)
print("predict image:")
print(np.shape(features))
results = []
results_label = []
count = 0
for f in features:
    for i in svms:
        pred = i.predict([f])
        print(pred)
        if pred[0] != 0:
            results.append(verts[count])
            results_label.append(pred[0])
    count += 1
img = skimage.io.imread(img_path)
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6))
ax.imshow(img)
for x, y, w, h in results:
    rect = mpatches.Rectangle(
        (x, y), w, h, fill=False, edgecolor='red', linewidth=1)
    ax.add_patch(rect)

plt.show()

## Exercises

* For each object, multiple proposals are selected. How can we remove most of them and retain the desired one?
* Based on the framework of RCNN, attempting to understand the ideas of Fast-RCNN (https://arxiv.org/abs/1504.08083) and Faster-RCNN (https://arxiv.org/abs/1506.01497).