# [IAPR 2019:][iapr2019] Special project

**Group members:**
    1- first name and last name,
    2- first name and last name,
    3- first name and last name

**Due date:** 30.05.2019

[iapr2019]: https://github.com/LTS5/iapr-2019


## Description
Please find the description of this special project via [this link].

[this link]: https://github.com/LTS5/iapr-2019/blob/master/project/special_project_description.pdf

## Part 0: Loading the data

In [None]:
import os
import tarfile
import numpy as np
import scipy as scp
import skimage.io
import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib inline

#  Folder extraction
data_base_path = os.path.join(os.pardir, 'datasets')
data_folder = 'project-data'
data_projet_path  = os.path.join(data_base_path,data_folder)
tar_path = os.path.join(data_base_path, data_folder + '.tar.gz')
if not os.path.exists(data_projet_path):
    with tarfile.open(tar_path, mode='r:gz') as tar:
        tar.extractall(path=data_base_path)

# Data project path
img_projet_path   = os.path.join(data_projet_path,'images')
annot_projet_path = os.path.join(data_projet_path,'annotations')
# Read files that contains names
val_name_file  ='validation.txt'
test_name_file ='test.txt'
train_name_file='train.txt'

# Validation
val_img_fn_path = os.path.join(data_projet_path,val_name_file)
val_img_names= [line.rstrip('\n') for line in open(val_img_fn_path)]
val_img_fn   = [line +'.jpg' for line in val_img_names]
val_img_path = [os.path.join(os.path.join(img_projet_path,'validation'),line) for line in val_img_fn]
val_ic = skimage.io.imread_collection(val_img_path)
print('Number of validation images: ', len(val_ic))

# Train
tr_img_fn_path = os.path.join(data_projet_path,train_name_file)
tr_img_names= [line.rstrip('\n') for line in open(tr_img_fn_path)]
tr_img_fn   = [line +'.jpg' for line in tr_img_names]
tr_img_path = [os.path.join(os.path.join(img_projet_path,'train'),line) for line in tr_img_fn]
tr_ic = skimage.io.imread_collection(tr_img_path)
print('Number of validation images: ', len(tr_ic))

# Test
tst_img_fn_path = os.path.join(data_projet_path,test_name_file)
tst_img_names= [line.rstrip('\n') for line in open(tst_img_fn_path)]
tst_img_fn   = [line +'.jpg' for line in tst_img_names]
tst_img_path = [os.path.join(os.path.join(img_projet_path,'test'),line) for line in tst_img_fn]
tst_ic = skimage.io.imread_collection(tst_img_path)
print('Number of test images: ', len(tst_ic))

In [None]:
import xml.etree.ElementTree as ET

def parse_file(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(float(bbox.find('xmin').text)),
                              int(float(bbox.find('ymin').text)),
                              int(float(bbox.find('xmax').text))-int(float(bbox.find('xmin').text)),
                              int(float(bbox.find('ymax').text))-int(float(bbox.find('ymin').text))]
        objects.append(obj_struct)

    return objects

# Xml annotations
val_xml_path = os.path.join(annot_projet_path, 'validation')
val_xmls = [parse_file(os.path.join(val_xml_path,name + '.xml')) for name in val_img_names]

tr_xml_path = os.path.join(annot_projet_path,'train')
tr_xmls = [parse_file(os.path.join(tr_xml_path,name + '.xml')) for name in tr_img_names]

tst_xml_path = os.path.join(annot_projet_path,'test')
tst_xmls = [parse_file(os.path.join(tst_xml_path,name + '.xml')) for name in tst_img_names]

In [None]:
# Plot images
fig, axes = plt.subplots(3, 2, figsize=(12, 12))
for ax, im, nm, annotations in zip(axes.ravel(), tst_ic, tst_img_names[0:6], tst_xmls):
    # Iterate over annotations
    for anno in annotations:
        rect = patches.Rectangle((anno['bbox'][0], anno['bbox'][1]), anno['bbox'][2], anno['bbox'][3],
                linewidth=1,edgecolor='r',facecolor='none')
        ax.add_patch(rect)
    
    ax.imshow(im)
    ax.axis('off')
    ax.set_title(nm)
plt.show()

## Part 1: Finding varroas by segmentation
Add your implementation for ''**detect_by_segmentation**'' function. Please make sure the input and output follows the mentioned format.

In [None]:
# Import stuff
from skimage.filters import threshold_li
from skimage.color   import rgb2gray
from skimage.measure import label

from skimage.morphology import erosion, dilation, opening, closing, white_tophat
from skimage.morphology import black_tophat, skeletonize, convex_hull_image
from skimage.morphology import disk

def pipeline(rgb_input_img):
    """ Input : RGB image of varroa infection
        Ouput : Binary image of detected varroa 
        Function : Process the image in order to keep the relevant informations """
    # Principal pipeline 

    # Preprocessing
    
    # Contrast enhancement
    sigmoid_p = 0.5
    rgb_input_img = skimage.exposure.adjust_sigmoid(rgb_input_img, cutoff=sigmoid_p, gain=10, inv=False)    
    
    # Color Selection
    single_canal = False
    
    input_img = rgb2gray(rgb_input_img)
    
    # Morphology operations
    
    struct_elem = disk(4)
    morph_img   = dilation(input_img, struct_elem)# Remove waste on images
    struct_elem = disk(4)
    morph_img   = erosion(morph_img,struct_elem) # Give proportion again to varroa
    
    # Thresholding
    
    # Li method thres.
    li_thres = threshold_li(morph_img)
        
    # Binarization
    bin_min  = morph_img < li_thres


    # Plot the results
    plot_processing = False
    if True == plot_processing:
        fig, axes = plt.subplots(ncols=3, figsize=(15, 15))
        ax = axes.ravel()

        ax[0].imshow(input_img)
        ax[0].set_xlabel('Orig.')

        ax[1].imshow(morph_img)
        ax[1].set_xlabel('Morph. processing.')

        ax[2].imshow(bin_min)
        ax[2].set_xlabel('Minimal thres.')
        plt.show()
    
    
    # Assign the output to final processing
    output = bin_min
    
    return output

def label_varroa_region(input_image):
    """ Input  : Binary image of detected varroa
        Output : Multiples labellised regions of the image"""
    output_labels,num_labels = label(input_image,return_num=True,connectivity=None)
    return output_labels

In [None]:
from skimage.measure import regionprops

def xml_annotation_to_bbox(xml_annotation):
    """ Convert an xml annotation to a list of bbox tuples"""
    bbox_list = []
    for elem in xml_annotation:
        bbox_coords = elem['bbox']
        bbox_tuple  = tuple(bbox_coords)
        bbox_list.append(bbox_tuple)
    
    return bbox_list


def iou_bbox(gt_reg,pred_reg):
    """ 
        Compute the IoU between two region with their bounding boxes
        
        Insipired from : https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
        
        Parameters
        ----- 
        pred_reg format : (MIN_ROW,MIN_COL,MAX_ROW,MAX_COL)
        pred_reg pixels in bounding box are in half-open interval [min_row; max_row) and [min_col; max_col).
        gt_reg h-format : (x, y, width, heigth) in some cartesian form
        gt_reg format   : (MIN_COL,MIN_ROW, MAX_ROW-MIN_ROW, MAX_COL-MIN_COL)
        
        Return
        -----
        float val in [0,1]
    """
    # x:rows and y:colummns are (0,0) at right corner
    
    # Harmonize the coordinates systems of the bounding boxes
    g_y_min,g_x_min,diff_x,diff_y   = gt_reg
    g_y_max = g_y_min + diff_y 
    g_x_max = g_x_min + diff_x
    
    # In ground truth max_values are inside the bbox but in label are outside 
    p_x_min,p_y_min,p_x_max,p_y_max = pred_reg
    # Correction of max values
    p_x_max = p_x_max - 1
    p_y_max = p_y_max - 1
    
    # Determine the (x,y) coordinates of the intersection rectangle
    i_x_min = max(p_x_min,g_x_min)
    i_y_min = max(p_y_min,g_y_min)
    i_x_max = min(p_x_max,g_x_max)
    i_y_max = min(p_y_max,g_y_max)
    
    # Compute the intersection area (robust to non-matching rect.s)
    inter_area = max(0,i_x_max -  i_x_min + 1) * max(0,i_y_max - i_y_min + 1)
    
    
    # Compute the area of both the prediction and the ground-truth
    pred_rect = (p_x_max-p_x_min+1)*(p_y_max-p_y_min+1) 
    gt_rect   = (g_x_max-g_x_min+1)*(g_y_max-g_y_min+1)

    debug_print = False
    
    if True == debug_print:
        print(" GT : {}:{}/{}:{}".format(g_x_min,g_y_min,g_x_max,g_y_max))
        print(" PR : {}:{}/{}:{}".format(p_x_min,p_y_min,p_x_max,p_y_max))
        print("INTER : {}".format(inter_area))
        print("PRED_AREA : {}".format(pred_rect))
        print("GT   AREA : {}".format(gt_rect))
    
    # Compute the intersection over union
    iou_val = inter_area/(gt_rect+pred_rect-inter_area)
    return iou_val
    

def iou_bbox2(gt_reg,pred_reg):
    """ 
        Compute the IoU between two region with their bounding boxes
         
        Parameters
        ----- 
        pred_reg format : (MIN_COL,MIN_ROW, MAX_ROW-MIN_ROW, MAX_COL-MIN_COL)
        pred_reg pixels in bounding box are in half-open interval [min_row; max_row) and [min_col; max_col).
        gt_reg h-format : (x, y, width, heigth) in some cartesian form
        gt_reg format   : (MIN_COL,MIN_ROW, MAX_ROW-MIN_ROW, MAX_COL-MIN_COL)
        
        Return
        -----
        float val in [0,1]
    """
    # x:col and y:rows are (0,0) at right corner
    
    # Harmonize the coordinates systems of the bounding boxes
    g_x_min,g_y_min,diff_x,diff_y = gt_reg
    g_y_max = g_y_min + diff_y 
    g_x_max = g_x_min + diff_x
    
    # In ground truth max_values are inside the bbox but in label are outside 
    p_x_min,p_y_min,p_diff_x,p_diff_y = pred_reg
    # Correction of max values
    p_x_max = p_x_min + p_diff_x
    p_y_max = p_y_min + p_diff_y
    
    # Determine the (x,y) coordinates of the intersection rectangle
    i_x_min = max(p_x_min,g_x_min)
    i_y_min = max(p_y_min,g_y_min)
    i_x_max = min(p_x_max,g_x_max)
    i_y_max = min(p_y_max,g_y_max)
    
    # Compute the intersection area (robust to non-matching rect.s)
    inter_area = max(0,i_x_max -  i_x_min + 1) * max(0,i_y_max - i_y_min + 1)
    
    
    # Compute the area of both the prediction and the ground-truth
    pred_rect = (p_x_max-p_x_min+1)*(p_y_max-p_y_min+1) 
    gt_rect   = (g_x_max-g_x_min+1)*(g_y_max-g_y_min+1)

    debug_print = False
    
    if True == debug_print:
        print(" GT : {}:{}/{}:{}".format(g_x_min,g_y_min,g_x_max,g_y_max))
        print(" PR : {}:{}/{}:{}".format(p_x_min,p_y_min,p_x_max,p_y_max))
        print("INTER : {}".format(inter_area))
        print("PRED_AREA : {}".format(pred_rect))
        print("GT   AREA : {}".format(gt_rect))
    
    # Compute the intersection over union
    iou_val = inter_area/(gt_rect+pred_rect-inter_area)
    return iou_val


def detection_statistics(pred_image,gt_labels,threshold):
    """
    
    Returns a tuple composed by pred_region_stats and gt_region_stats
    
    Parameters
    ------
    pred_image : labeled image
    gt_labels  : list of bbox tuples per ground truth region
    threshold  : IoU threshold
    
    Returns
    ------
    (pred_region_stats,gt_region_stats)
    
    pred_region_stats :
    a list of size of number of predicted region having the following status depending on the IoU criteria
    3 : True  positive (region is correct)
    1 : False positive (region doesn't exist in ground truth or don't match enough)
    
    gt_region_stats :
    a list of size of number of ground truth region having the following status depending on the IoU criteria
    0 : Region being detected
    -3: False negative (existing region not detected)
    """
    debug_print = False # Variable to enable debuging information  
        
    props_list = regionprops(pred_image) # Properties of the labelled image
    pred_region_status = np.zeros(len(props_list)) # Select between true or false positive
    pred_region_argmax = np.zeros(len(props_list)) # Array that stores the best IoU idx per predicted region
    gt_region_status = np.zeros(len(gt_labels))    # Store if ground truth region is being detected.
    
    # Rows : Gt regions,Cols : Predicted regions
    iou_array = np.zeros((len(gt_labels),len(props_list)),dtype=np.float64)     
    
    # Compute all the IoU bruteforce method
    for idx_a,a in enumerate(gt_labels):
        for idx_b,b in enumerate(props_list):
            b_bbox = b.bbox
            
            # Debug info begin
            if True == debug_print:
                print("gt_idx {} / reg_idx {} ".format(idx_a,idx_b))
                print("Bbox coords ----------")
                print("GT : {}\nPRED : {}".format(a,b_bbox))
            # Debug info end
            iou_array[idx_a,idx_b] = iou_bbox(a,b_bbox)
    
    # Debug info begin
    if True == debug_print:
        print("--------------")
        print("mtx size : {}",iou_array.shape)
        print(iou_array)
        print("--------------")
    # Debug info end
    
    # Compute the false positive (pred region which IoU equals 0 over the ground truth is equal to zero)
    false_pos_idx = [iou_array.sum(0) == 0]
    pred_region_status[false_pos_idx] = 1.0 # False positive
    pred_region_argmax[false_pos_idx] = -1.0 # False positive then no assignation to a region
    
    # Compute the true positive depending on the threshold
    for pred_idx,elem in enumerate(pred_region_status):
        if elem != 1.0:
            gt_idx = iou_array[:,pred_idx].argmax() # Get the maximum IoU
            # Check if IoU is above the threshold
            if iou_array[gt_idx,pred_idx] >= threshold:
                pred_region_status[pred_idx] = 3. # True positive
                pred_region_argmax[pred_idx] = gt_idx # Assign the predicted region to a ground truth region
            else:
                pred_region_status[pred_idx] = 1.   # False positive
                pred_region_argmax[pred_idx] = -1.0 # False positive then no assignation to a region       
                
    # Compute the false negative
    for idx,elem in enumerate(gt_region_status):
        if idx not in pred_region_argmax: # If no predicted region has been assigned to a ground truth region
            gt_region_status[idx] = -3.0 # False negative
    
    return (pred_region_status,gt_region_status)

def detection_statistics_2(pred_labels,gt_labels,threshold):
    """
    
    Returns a tuple composed by pred_region_stats and gt_region_stats
    
    Parameters
    ------
    pred_labels : list of bbox tuples per predicted region
    gt_labels   : list of bbox tuples per ground truth region
    threshold   : IoU threshold
    
    Returns
    ------
    (pred_region_stats,gt_region_stats)
    
    pred_region_stats :
    a list of size of number of predicted region having the following status depending on the IoU criteria
    3 : True  positive (region is correct)
    1 : False positive (region doesn't exist in ground truth or don't match enough)
    
    gt_region_stats :
    a list of size of number of ground truth region having the following status depending on the IoU criteria
    0 : Region being detected
    -3: False negative (existing region not detected)
    """
    debug_print = False # Variable to enable debuging information  
        
    pred_region_status = np.zeros(len(pred_labels)) # Select between true or false positive
    pred_region_argmax = np.zeros(len(pred_labels)) # Array that stores the best IoU idx per predicted region
    gt_region_status = np.zeros(len(gt_labels))    # Store if ground truth region is being detected.
    
    # Rows : Gt regions,Cols : Predicted regions
    iou_array = np.zeros((len(gt_labels),len(pred_labels)),dtype=np.float64)     
    
    # Compute all the IoU bruteforce method
    for idx_a,a in enumerate(gt_labels):
        for idx_b,b in enumerate(pred_labels):
            # Debug info begin
            if True == debug_print:
                print("gt_idx {} / reg_idx {} ".format(idx_a,idx_b))
                print("Bbox coords ----------")
                print("GT : {}\nPRED : {}".format(a,b))
            # Debug info end
            iou_array[idx_a,idx_b] = iou_bbox2(a,b)
    
    # Debug info begin
    if True == debug_print:
        print("--------------")
        print("mtx size : {}",iou_array.shape)
        print(iou_array)
        print("--------------")
    # Debug info end
    
    # Compute the false positive (pred region which IoU equals 0 over the ground truth is equal to zero)
    false_pos_idx = [iou_array.sum(0) == 0]
    pred_region_status[false_pos_idx] = 1.0 # False positive
    pred_region_argmax[false_pos_idx] = -1.0 # False positive then no assignation to a region
    
    # Compute the true positive depending on the threshold
    for pred_idx,elem in enumerate(pred_region_status):
        if elem != 1.0:
            gt_idx = iou_array[:,pred_idx].argmax() # Get the maximum IoU
            # Check if IoU is above the threshold
            if iou_array[gt_idx,pred_idx] >= threshold:
                pred_region_status[pred_idx] = 3. # True positive
                pred_region_argmax[pred_idx] = gt_idx # Assign the predicted region to a ground truth region
            else:
                pred_region_status[pred_idx] = 1.   # False positive
                pred_region_argmax[pred_idx] = -1.0 # False positive then no assignation to a region       
                
    # Compute the false negative
    for idx,elem in enumerate(gt_region_status):
        if idx not in pred_region_argmax: # If no predicted region has been assigned to a ground truth region
            gt_region_status[idx] = -3.0 # False negative
    
    return (pred_region_status,gt_region_status)

In [None]:
def precision(region_stats,gt_stats):
    """ Compute the precision of the image detection tp/tp+fp"""
    unique,counts = np.unique(region_stats, return_counts=True) # Get the list of number and # of occurences
    stats_dict = dict(zip(unique,counts))
    tp = stats_dict.get(3.0,0.0) # Get the number of true positive
    fp = stats_dict.get(1.0,0.0) # Get the number of false positive 
    
    # If nothing is detected then return zero (avoid numerical error)
    if 0.0 == tp+fp:
        return 0.0
    else:
        return tp/(tp+fp)

def recall(region_stats,gt_stats):
    """ Compute the recall of the image detection tp/tp+fn"""
    
    unique,counts = np.unique(region_stats, return_counts=True) # Get the list of number and # of occurences
    stats_dict = dict(zip(unique,counts))
    tp = stats_dict.get(3.0,0.0) # Get the number of true positive
    
    unique,counts = np.unique(gt_stats, return_counts=True) # Get the list of number and # of occurences
    stats_dict = dict(zip(unique,counts))
    fn = stats_dict.get(-3.0,0.0) # Get the number of false negative 
    
    # If nothing is detected then return zero (avoid numerical error)
    if 0.0 == tp+fn :
        return 0.0
    else:
        return tp/(tp+fn)
    
    

def f1_score(region_stats,gt_stats):
    """"""
    rec  = recall(region_stats,gt_stats)
    prec = precision(region_stats,gt_stats)
    
    # If nothing is detected then return zero (avoid numerical error)
    if 0. == prec or 0. == rec:
        f1_val = 0.0
    else:
        f1_val = 2*prec*rec/(rec+prec)
    print("Stats : Rec : {} / Prec : {} / F1-score : {}".format(rec,prec,f1_val))
    return f1_val

In [None]:
def detect_by_segmentation(img):
    '''
    Input: One single image
    Output: A numpy array containing coordonates of all detected varroas, with the following format: 
            [[x_1, y_1, w_1, h_2], [x_2, y_2, w_1, h_2], ..., [x_n, y_n, w_n, h_n]] 
            where ''n'' is the number of detected varroas.
    '''
    
    bin_img = pipeline(img)
    label_img = label_varroa_region(bin_img)
    props_list = regionprops(label_img) # Properties of the labelled image
    bbox_list = []
    for elem in props_list:
            min_r,min_c,max_r,max_c = elem.bbox
            bbox_list.append((min_c,min_r,max_c-min_c,max_r-min_r))
                             
    return bbox_list
    #Your code

Add your implementation. Report the Precision, Recall and F1-score, by using all 50 images of the test-set, and considering 0.3 as the IoU threshold.

In [None]:
#Your code


# Process the images and print some statistics
for idx,img in enumerate(tst_ic):
    bin_img = pipeline(img)
    label_img = label_varroa_region(bin_img)
    pred_labels = detect_by_segmentation(img) 
    gt_labels   = xml_annotation_to_bbox(tst_xmls[idx])
    # Proceed to gather statistics
    print("Image N° {}".format(idx))
    thres_list =  [0.3]
    for thres in thres_list:
        print("IoU threshold : {}".format(thres))
        print("Classic method")
        reg_stats,gt_stats = detection_statistics(label_img,gt_labels,thres)
        f1_score(reg_stats,gt_stats)
        print("Rewritten method")
        reg_stats,gt_stats = detection_statistics_2(pred_labels,gt_labels,thres)
        f1_score(reg_stats,gt_stats)
        
        

## Part 3: Using MLP and CNNs

Add your implementation for the third part. Feel free to add your desirable functions, but please make sure you have proper functions for the final detection, where their input and output follows the same format as the previous parts.

In [14]:
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TerminateOnNaN, CSVLogger
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt

from models.keras_ssd7 import build_model
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast

from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder
from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms
from data_generator.data_augmentation_chain_variable_input_size import DataAugmentationVariableInputSize
from data_generator.data_augmentation_chain_constant_input_size import DataAugmentationConstantInputSize
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation

from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder
from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms

%matplotlib inline

In [24]:
train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

# The directories that contain the images.
varroa_train_images_dir  = '../datasets/project-data/images/train/'
varroa_test_images_dir   = '../datasets/project-data/images/test/'
varroa_val_images_dir    = '../datasets/project-data/images/validation/'


# The directories that contain the annotations.
varroa_train_annotations_dir = '../datasets/project-data/annotations/train/'
varroa_test_annotations_dir  = '../datasets/project-data/annotations/test/'
varroa_val_annotations_dir   = '../datasets/project-data/annotations/validation/'


# The paths to the image sets.
varroa_train_image_set_filename = '../datasets/project-data/train.txt'
varroa_test_image_set_filename  = '../datasets/project-data/test.txt'
varroa_val_image_set_filename   = '../datasets/project-data/validation.txt'



# The XML parser needs to now what object class names to look for and in which order to map them to integers.
classes = ['background',
           'Varroa']


train_dataset.parse_xml(images_dirs=[varroa_train_images_dir],
                        image_set_filenames=[varroa_train_image_set_filename],
                        annotations_dirs=[varroa_train_annotations_dir],
                        classes=classes,
                        include_classes='all',
                        exclude_truncated=False,
                        exclude_difficult=False,
                        ret=False,
                        verbose=True)


val_dataset.parse_xml(images_dirs=[varroa_val_images_dir],
                        image_set_filenames=[varroa_val_image_set_filename],
                        annotations_dirs=[varroa_val_annotations_dir],
                        classes=classes,
                        include_classes='all',
                        exclude_truncated=False,
                        exclude_difficult=False,
                        ret=False,
                        verbose=True)

Processing image set 'train.txt': 100%|█████████████████████████████████████████████| 800/800 [00:02<00:00, 299.34it/s]
Processing image set 'validation.txt': 100%|████████████████████████████████████████| 150/150 [00:00<00:00, 283.93it/s]


In [25]:
img_height = 480 # Height of the input images
img_width = 480  # Width of the input images
img_channels = 3 # Number of color channels of the input images
intensity_mean = 127.5 # Set this to your preference (maybe `None`). The current settings transform the input pixel values to the interval `[-1,1]`.
intensity_range = 127.5 # Set this to your preference (maybe `None`). The current settings transform the input pixel values to the interval `[-1,1]`.
n_classes = 1 # Number of positive classes
scales = [0.08, 0.16, 0.32, 0.64, 0.96] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
aspect_ratios = [0.5, 1.0, 2.0] # The list of aspect ratios for the anchor boxes
two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1
steps = None # In case you'd like to set the step sizes for the anchor box grids manually; not recommended
offsets = None # In case you'd like to set the offsets for the anchor box grids manually; not recommended
clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
variances = [1.0, 1.0, 1.0, 1.0] # The list of variances by which the encoded target coordinates are scaled
normalize_coords = True # Whether or not the model is supposed to use coordinates relative to the image size

In [26]:
# 1: Build the Keras model

K.clear_session() # Clear previous models from memory.

model = build_model(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_global=aspect_ratios,
                    aspect_ratios_per_layer=None,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=intensity_mean,
                    divide_by_stddev=intensity_range)

# 2: Optional: Load some weights

#model.load_weights('./ssd7_weights.h5', by_name=True)

# 3: Instantiate an Adam optimizer and the SSD loss function and compile the model

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

In [27]:
# 3: Set the batch size.

batch_size = 10

# 4: Define the image processing chain.

# For the training generator:
data_augmentation_chain = DataAugmentationVariableInputSize(resize_height=img_height,
                                            resize_width=img_width)
# For the validation generator:
convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=img_height, width=img_width)


# 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

# The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
predictor_sizes = [model.get_layer('classes4').output_shape[1:3],
                   model.get_layer('classes5').output_shape[1:3],
                   model.get_layer('classes6').output_shape[1:3],
                   model.get_layer('classes7').output_shape[1:3]]

ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                    img_width=img_width,
                                    n_classes=n_classes,
                                    predictor_sizes=predictor_sizes,
                                    scales=scales,
                                    aspect_ratios_global=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    clip_boxes=clip_boxes,
                                    variances=variances,
                                    matching_type='multi',
                                    pos_iou_threshold=0.5,
                                    neg_iou_limit=0.3,
                                    normalize_coords=normalize_coords)


# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

train_generator = train_dataset.generate(batch_size=batch_size,
                                         shuffle=True,
                                         transformations=[
                                                      resize],
                                         label_encoder=ssd_input_encoder,
                                         returns={'processed_images',
                                                  'encoded_labels'},
                                         keep_images_without_gt=False)

val_generator = val_dataset.generate(batch_size=batch_size,
                                     shuffle=False,
                                     transformations=[
                                                      resize],
                                     label_encoder=ssd_input_encoder,
                                     returns={'processed_images',
                                              'encoded_labels'},
                                     keep_images_without_gt=False)

# Get the number of samples in the training and validations datasets.
train_dataset_size = train_dataset.get_dataset_size()
val_dataset_size   = val_dataset.get_dataset_size()

print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size))

Number of images in the training dataset:	   800
Number of images in the validation dataset:	   150


In [28]:
# Define model callbacks.

# TODO: Set the filepath under which you want to save the weights.
model_checkpoint = ModelCheckpoint(filepath='ssd7_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
                                   monitor='val_loss',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=False,
                                   mode='auto',
                                   period=1)

csv_logger = CSVLogger(filename='ssd7_training_log.csv',
                       separator=',',
                       append=True)

early_stopping = EarlyStopping(monitor='val_loss',
                               min_delta=0.0,
                               patience=10,
                               verbose=1)

reduce_learning_rate = ReduceLROnPlateau(monitor='val_loss',
                                         factor=0.2,
                                         patience=8,
                                         verbose=1,
                                         epsilon=0.001,
                                         cooldown=0,
                                         min_lr=0.00001)

callbacks = [model_checkpoint,
             csv_logger,
             early_stopping,
             reduce_learning_rate]



In [None]:
# TODO: Set the epochs to train for.
# If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
initial_epoch   = 0
final_epoch     = 10
steps_per_epoch = 300

history = model.fit_generator(generator=train_generator,
                              steps_per_epoch=steps_per_epoch,
                              epochs=final_epoch,
                              callbacks=callbacks,
                              validation_data=val_generator,
                              validation_steps=ceil(val_dataset_size/batch_size),
                              initial_epoch=initial_epoch)

Epoch 1/10

Epoch 00001: val_loss improved from inf to 3.23184, saving model to ssd7_epoch-01_loss-4.4334_val_loss-3.2318.h5
Epoch 2/10



Epoch 00002: val_loss improved from 3.23184 to 2.89392, saving model to ssd7_epoch-02_loss-3.0043_val_loss-2.8939.h5
Epoch 3/10



Epoch 00003: val_loss did not improve from 2.89392
Epoch 4/10



Epoch 00004: val_loss improved from 2.89392 to 2.77780, saving model to ssd7_epoch-04_loss-2.8478_val_loss-2.7778.h5
Epoch 5/10



Epoch 00005: val_loss improved from 2.77780 to 2.72076, saving model to ssd7_epoch-05_loss-2.7712_val_loss-2.7208.h5
Epoch 6/10



Epoch 00006: val_loss did not improve from 2.72076
Epoch 7/10



Epoch 00007: val_loss did not improve from 2.72076
Epoch 8/10



Epoch 00008: val_loss did not improve from 2.72076
Epoch 9/10



Epoch 00009: val_loss improved from 2.72076 to 2.62484, saving model to ssd7_epoch-09_loss-2.6463_val_loss-2.6248.h5
Epoch 10/10




In [None]:
plt.figure(figsize=(20,12))
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend(loc='upper right', prop={'size': 24});

In [None]:
# 1: Set the generator for the predictions.

predict_generator = val_dataset.generate(batch_size=1,
                                         shuffle=True,
                                         transformations=[convert_to_3_channels,
                                                          resize],
                                         label_encoder=None,
                                         returns={'processed_images',
                                                  'filenames',
                                                  'inverse_transform',
                                                  'original_images',
                                                  'original_labels'},
                                         keep_images_without_gt=False)

In [None]:
# 2: Generate samples.

batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(predict_generator)

i = 0 # Which batch item to look at

print("Image:", batch_filenames[i])
print()
print("Ground truth boxes:\n")
print(np.array(batch_original_labels[i]))

In [None]:
# 3: Make predictions.

y_pred = model.predict(batch_images)

In [None]:
# 4: Decode the raw prediction `y_pred`

y_pred_decoded = decode_detections(y_pred,
                                   confidence_thresh=0.3,
                                   iou_threshold=0.1,
                                   top_k=200,
                                   normalize_coords=normalize_coords,
                                   img_height=img_height,
                                   img_width=img_width)

np.set_printoptions(precision=2, suppress=True, linewidth=90)
print("Predicted boxes:\n")
print('   class   conf xmin   ymin   xmax   ymax')
print(y_pred_decoded[i])

In [None]:
# 5: Convert the predictions for the original image.

y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms)

np.set_printoptions(precision=2, suppress=True, linewidth=90)
print("Predicted boxes:\n")
print('   class   conf xmin   ymin   xmax   ymax')
print(y_pred_decoded_inv[i])

In [None]:
# 5: Draw the predicted boxes onto the image

# Set the colors for the bounding boxes
colors = plt.cm.hsv(np.linspace(0, 1, n_classes+1)).tolist()
classes = ['background',
           'Varroa']
plt.figure(figsize=(20,12))
plt.imshow(batch_original_images[i])

current_axis = plt.gca()

for box in batch_original_labels[i]:
    xmin = box[1]
    ymin = box[2]
    xmax = box[3]
    ymax = box[4]
    label = '{}'.format(classes[int(box[0])])
    current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color='green', fill=False, linewidth=2))  
    current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':'green', 'alpha':1.0})

for box in y_pred_decoded_inv[i]:
    xmin = box[2]
    ymin = box[3]
    xmax = box[4]
    ymax = box[5]
    color = colors[int(box[0])]
    label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
    current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color=color, fill=False, linewidth=2))  
    current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0})

Need to know the fucking parameters on the images

In [None]:
print(val_ic[5].shape)
print(val_xmls[5])
fig, ax = plt.subplots()
plt.imshow(val_ic[5])  
rect = patches.Rectangle((val_xmls[5][0]['bbox'][0], val_xmls[5][0]['bbox'][1]), val_xmls[5][0]['bbox'][2], val_xmls[5][0]['bbox'][3],
                linewidth=1,edgecolor='r',facecolor='none')
ax.add_patch(rect)

In [None]:
# Reshape all the images to 1024x1024 and conserv aspect ratio
from skimage.transform import resize
def reshape_img_bb(img,bbox_list):
    DIM_IMG = 1024
    
    ratio_x = DIM_IMG/img.shape[1]
    ratio_y = DIM_IMG/img.shape[0]
    result_img = resize(img,(DIM_IMG,DIM_IMG),anti_aliasing=True)
    result_boxes = []
    if 0 != len(bbox_list):
        for elem in bbox_list:
            new_x = int(elem['bbox'][0] * ratio_x)
            new_y = int(elem['bbox'][1] * ratio_y)
            new_w = int(elem['bbox'][2] * ratio_x)
            new_h = int(elem['bbox'][3] * ratio_x)
            result_boxes.append((new_x,new_y,new_w,new_h))
            
    return (result_img,result_boxes)

Fucking validate the method

In [None]:
ax = plt.subplot(121)
for elem in tst_xmls[2]:
    rect = patches.Rectangle((elem['bbox'][0], elem['bbox'][1]), elem['bbox'][2], elem['bbox'][3],
                    linewidth=1,edgecolor='r',facecolor='none')
    ax.add_patch(rect)
ax.imshow(tst_ic[2])

ax = plt.subplot(122)
print(tst_ic[2].shape)
print(tst_xmls[2])
n_img,n_boxes = reshape_img_bb(tst_ic[2],tst_xmls[2])

for elem in n_boxes:
    rect = patches.Rectangle((elem[0], elem[1]), elem[2], elem[3],
                    linewidth=1,edgecolor='r',facecolor='none')
    ax.add_patch(rect)
ax.imshow(n_img)

Add SDD7 support and make it fucking work

In [None]:
model = Sequential([
        Dense(200, input_dim=64), 
        Activation('relu'), 
        Dropout(0.2), 
        Dense(4)
    ])
model.compile('adadelta', 'mse')

## Challenge

You can generate a json submission file by using the function ''**generate_pred_json**''. This prediction file can be uploaded online for evaluation (Please refer to section 3 of the project description for more details).

In [None]:
import numpy as np
import json

def generate_pred_json(data, tag='baseline'):
    '''
    Input
    - data: Is a dictionary d, such that:
          d = { 
              "ID_1": [], 
              "ID_2": [[x_21, y_21, w_21, h_21], [x_22, y_22, w_22, h_22]], 
              ... 
              "ID_i": [[x_i1, y_i1, w_i1, h_i1], ..., [x_iJ, y_iJ, w_iJ, h_iJ]],
              ... 
              "ID_N": [[x_N1, y_N1, w_N1, h_N1]],
          }
          where ID is the string id of the image (e.i. 5a05e86fa07d56baef59b1cb_32.00px_1) and the value the Kx4 
          array of intergers for the K predicted bounding boxes (e.g. [[170, 120, 15, 15]])
    - tag: (optional) string that will be added to the name of the json file.
    Output
      Create a json file, "prediction_[tag].json", conatining the prediction to EvalAI format.
    '''
    unvalid_key = []
    _data = data.copy()
    for key, value in _data.items():
        try:
            # Try to convert to numpy array and cast as closest int
            print(key)
            v = np.around(np.array(value)).astype(int)
            # Check is it is a 2d array with 4 columns (x,y,w,h)
            if v.ndim != 2 or v.shape[1] != 4:
                unvalid_key.append(key)
            # Id must be a string
            if not isinstance(key, str):
                unvalid_key.append(key)
            _data[key] = v.tolist()
        # Deal with not consistant array size and empty predictions
        except (ValueError, TypeError):
            unvalid_key.append(key)
    # Remove unvalid key from dictionnary
    for key in unvalid_key: del _data[key]
    
    with open('prediction_{}.json'.format(tag), 'w') as outfile:
        json.dump(_data, outfile)

In [None]:
#Your code