In [1]:
import pandas as pd

train = pd.read_csv('train.csv')

In [2]:
train[:5]

Unnamed: 0,image_filename,lighting,city
0,frame_20f328fa-2459-46d0-97a5-5ae2d6103cb0_000...,Twilight,NYC
1,frame_927bde20-f97f-48c2-af30-f9127b6b32ce_000...,Day,NYC
2,frame_67012509-f3bd-4175-a9d2-565a7b6bb3c7_000...,Day,NYC
3,frame_bd043377-6fb8-407a-95e5-7deb1fbab13a_000...,Day,NYC
4,frame_4da1583b-58d0-4893-8149-54541191031d_000...,Day,NYC


In [3]:
train_boxes = pd.read_csv('train_boxes.csv')

In [4]:
train_boxes[:5]

Unnamed: 0,image_filename,x0,y0,x1,y1,label,confidence
0,frame_817c47b8-22c4-438a-8dc6-0e3f67f299ee_000...,601.6,270.355731,726.755556,421.185771,van,1.0
1,frame_817c47b8-22c4-438a-8dc6-0e3f67f299ee_000...,497.777778,308.774704,534.755556,338.656126,car,1.0
2,frame_817c47b8-22c4-438a-8dc6-0e3f67f299ee_000...,449.422222,310.197628,509.155556,358.577075,car,1.0
3,frame_a9110bf2-5252-4ec6-83c6-33b65d0fc04d_000...,711.111111,304.505929,786.488889,368.537549,car,1.0
4,frame_a9110bf2-5252-4ec6-83c6-33b65d0fc04d_000...,584.533333,307.351779,647.111111,358.577075,car,1.0


In [5]:
from collections import Counter

Counter(train_boxes['confidence'].ravel())

Counter({1.0: 134361})

In [6]:
train_boxes.drop(labels = 'confidence', axis = 1, inplace = True)

In [7]:
from sklearn.model_selection import train_test_split
train_names, test_names = train_test_split(train.image_filename.values, test_size = 0.1, random_state = 0)

In [8]:
full = train_boxes.merge(train, on = 'image_filename')

In [9]:
full_train = full[full.image_filename.isin(train_names)]
full_validation = full[full.image_filename.isin(test_names)]

In [10]:
train_boxes.shape, full_train.shape, full_validation.shape

((134361, 6), (120918, 8), (13443, 8))

# OpenCV baseline (no tuning)

In [11]:
!wget https://raw.githubusercontent.com/Juzer2012/Car-detection/master/cars.xml -O cars.xml

--2017-08-12 10:34:38--  https://raw.githubusercontent.com/Juzer2012/Car-detection/master/cars.xml
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.12.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.12.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 118803 (116K) [text/plain]
Saving to: ‘cars.xml’


2017-08-12 10:34:38 (297 KB/s) - ‘cars.xml’ saved [118803/118803]



In [12]:
import cv2
 
car_cascade = cv2.CascadeClassifier('cars.xml')

In [13]:
import random
import os
from tqdm import tqdm 

#Adopted from @bendyna code 
def get_predictions(image_filenames):
    result = []
    for image_file in tqdm(image_filenames):
        path = os.path.join('images', image_file)
        image = cv2.imread(path)
        if image is None:
            continue
        cars = car_cascade.detectMultiScale(image, 1.1, 2)

        prediction_boxes = []
        for x, y, w, h in cars:
            box = (x, y, x + w, y + h)
            prediction_boxes.append((random.random(), box))
            
        true_boxes = []
        for _, row in train_boxes[train_boxes.image_filename == image_file].iterrows():
            true_boxes.append((row.x0, row.y0, row.x1, row.y1))
        result.append((image_file, true_boxes, prediction_boxes))
    
    print len(result)
    return result

def IOU(box1, box2):
    left = max(box1[0], box2[0])
    top = max(box1[1], box2[1])
    right = min(box1[2], box2[2])
    bottom = min(box1[3], box2[3])
    w = max(0, right - left + 1)
    h = max(0, bottom - top + 1)
    inter = w * h
    uni = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1) \
            + (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1) - inter
    return inter * 1. / uni


def average_precision(arr, iou_threshold=0.75):
    score_detection = []
    all_true = 0
    for _, true_boxes, prediction_boxes in arr:
        all_true += len(true_boxes)
        detected = [0] * len(true_boxes)
        pboxes = sorted(prediction_boxes, key=lambda x: x[0], reverse=True)
        for score, box in pboxes:
            best = None
            best_iou = 0
            for i in range(len(true_boxes)):
                if detected[i]:
                    continue
                iou = IOU(true_boxes[i], box)
                if iou >= iou_threshold and iou > best_iou:
                    best, best_iou = i, iou
            if best is not None:
                detected[i] = 1
                score_detection.append((score, 1))
            else:
                score_detection.append((score, 0))
    score_detection = sorted(score_detection, key=lambda x: x[0], reverse=True)
    result = 0
    width = 1. / all_true
    height = 1
    count = 0
    count1 = 0
    for score, detection in score_detection:
        count += 1
        if detection:
            result += width * height
            count1 += 1
        else:
            height = count1 * 1.0 / count
    return result


In [14]:
average_precision(get_predictions(test_names[::20]), 0.75)

100%|██████████| 250/250 [00:15<00:00, 15.94it/s]

150





0

# SSD (No tuning)

In [15]:
import cv2
import keras
from keras.applications.imagenet_utils import preprocess_input
from keras.backend.tensorflow_backend import set_session
from keras.models import Model
from keras.preprocessing import image
import matplotlib.pyplot as plt
import numpy as np
from scipy.misc import imread
import tensorflow as tf

Using TensorFlow backend.


In [16]:
!git clone https://github.com/rykov8/ssd_keras.git

fatal: destination path 'ssd_keras' already exists and is not an empty directory.


In [17]:
!touch ssd_keras/__init__.py

In [18]:
from ssd_keras.ssd import SSD300
from ssd_keras.ssd_utils import BBoxUtility

%matplotlib inline
plt.rcParams['figure.figsize'] = (8, 8)
plt.rcParams['image.interpolation'] = 'nearest'

np.set_printoptions(suppress=True)

config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.95
set_session(tf.Session(config=config))


In [19]:
voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle',
               'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable',
               'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant',
               'Sheep', 'Sofa', 'Train', 'Tvmonitor']
NUM_CLASSES = len(voc_classes) + 1

In [20]:
!ls ssd_keras/

gt_pascal.pkl  pics		       ssd_layers.pyc	   ssd_utils.py
__init__.py    prior_boxes_ssd300.pkl  ssd.py		   ssd_utils.pyc
__init__.pyc   README.md	       ssd.pyc		   testing_utils
LICENSE        SSD.ipynb	       SSD_training.ipynb  weights_SSD300.hdf5
PASCAL_VOC     ssd_layers.py	       ssd_training.py


In [21]:
# Download weights from https://mega.nz/#F!7RowVLCL!q3cEVRK9jyOSB9el3SssIA

In [22]:
input_shape=(300, 300, 3)
model = SSD300(input_shape, num_classes=NUM_CLASSES)
model.load_weights('ssd_keras/weights_SSD300.hdf5', by_name=True)
bbox_util = BBoxUtility(NUM_CLASSES)


In [23]:
import random
import os
from tqdm import tqdm 

def get_predictions(image_filenames):
    result = []

    for image_file in tqdm(image_filenames):
        path = os.path.join('images', image_file)
        try:
            img = image.load_img(path)
            original_width, original_height = img.size
            img = image.load_img(path, target_size=(300, 300))
        except IOError:
            continue
        img = image.img_to_array(img)
        
        preds = model.predict(np.array([img]), batch_size=1, verbose=False)
        prediction_boxes = []
        results = bbox_util.detection_out(preds)

        for label, conf, xmin, ymin, xmax, ymax in results[0]:
            if label in [6]:
                box = (xmin * original_width, ymin * original_height, xmax * original_width, ymax * original_height)
                prediction_boxes.append((conf, box))
            
        true_boxes = []
        for _, row in train_boxes[train_boxes.image_filename == image_file].iterrows():
            true_boxes.append((row.x0, row.y0, row.x1, row.y1))
        result.append((image_file, true_boxes, prediction_boxes))
    
    print len(result)
    return result

def IOU(box1, box2):
    left = max(box1[0], box2[0])
    top = max(box1[1], box2[1])
    right = min(box1[2], box2[2])
    bottom = min(box1[3], box2[3])
    w = max(0, right - left + 1)
    h = max(0, bottom - top + 1)
    inter = w * h
    uni = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1) \
            + (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1) - inter
    return inter * 1. / uni


def average_precision(arr, iou_threshold=0.75):
    score_detection = []
    all_true = 0
    for _, true_boxes, prediction_boxes in arr:
        all_true += len(true_boxes)
        detected = [0] * len(true_boxes)
        pboxes = sorted(prediction_boxes, key=lambda x: x[0], reverse=True)
        for score, box in pboxes:
            best = None
            best_iou = 0
            for i in range(len(true_boxes)):
                if detected[i]:
                    continue
                iou = IOU(true_boxes[i], box)
                if iou >= iou_threshold and iou > best_iou:
                    best, best_iou = i, iou
            if best is not None:
                detected[i] = 1
                score_detection.append((score, 1))
            else:
                score_detection.append((score, 0))
    score_detection = sorted(score_detection, key=lambda x: x[0], reverse=True)
    result = 0
    width = 1. / all_true
    height = 1
    count = 0
    count1 = 0
    for score, detection in score_detection:
        count += 1
        if detection:
            result += width * height
            count1 += 1
        else:
            height = count1 * 1.0 / count
    return result

In [24]:
average_precision(get_predictions(test_names[::10]), 0.75)

100%|██████████| 500/500 [00:20<00:00, 24.94it/s]

300





0.0077499009841511075