In [1]:
# import libraries

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

from collections import defaultdict

import json

from sklearn.metrics import confusion_matrix, classification_report, cohen_kappa_score

import itertools

import random

from skimage import measure

In [2]:
# functions

def read_image(path):
    return plt.imread(path)

def read_annotation_file(path):
    with open(path) as annotation_file:
        annotation_list = json.load(annotation_file)
    # Transform list of annotations into dictionary
    annotation_dict = {}
    for annotation in annotation_list:
        sequence_id = annotation['sequence_id']
        if sequence_id not in annotation_dict:
            annotation_dict[sequence_id] = {}
        annotation_dict[sequence_id][annotation['frame']] = annotation['object_coords']
    return annotation_dict

random.seed(0)

def random_different_coordinates(coords, size_x, size_y, pad):
    """ Returns a random set of coordinates that is different from the provided coordinates, 
    within the specified bounds.
    The pad parameter avoids coordinates near the bounds."""
    good = False
    while not good:
        good = True
        c1 = random.randint(pad + 1, size_x - (pad + 1))
        c2 = random.randint(pad + 1, size_y -( pad + 1))
        for c in coords:
            if c1 == c[0] and c2 == c[1]:
                good = False
                break
    return (c1,c2)

def extract_neighborhood(x, y, arr, radius):
    """ Returns a 1-d array of the values within a radius of the x,y coordinates given """
    return arr[(x - radius) : (x + radius + 1), (y - radius) : (y + radius + 1)].ravel()

def check_coordinate_validity(x, y, size_x, size_y, pad):
    """ Check if a coordinate is not too close to the image edge """
    return x >= pad and y >= pad and x + pad < size_x and y + pad < size_y

def generate_labeled_data(image_path, annotation, nb_false, radius):
    """ For one frame and one annotation array, returns a list of labels 
    (1 for true object and 0 for false) and the corresponding features as an array.
    nb_false controls the number of false samples
    radius defines the size of the sliding window (e.g. radius of 1 gives a 3x3 window)"""
    features,labels = [],[]
    im_array = read_image(image_path)
    # True samples
    for obj in annotation:
        obj = [int(x + .5) for x in obj] #Project the floating coordinate values onto integer pixel coordinates.
        # For some reason the order of coordinates is inverted in the annotation files
        if check_coordinate_validity(obj[1],obj[0],im_array.shape[0],im_array.shape[1],radius):
            features.append(extract_neighborhood(obj[1],obj[0],im_array,radius))
            labels.append(1)
    # False samples
    for i in range(nb_false):
        c = random_different_coordinates(annotation,im_array.shape[1],im_array.shape[0],radius)
        features.append(extract_neighborhood(c[1],c[0],im_array,radius))
        labels.append(0)
    return np.array(labels),np.stack(features,axis=1)

def generate_labeled_set(annotation_array, path, sequence_id_list, radius, nb_false):
    # Generate labeled data for a list of sequences in a given path
    labels,features = [],[]
    for seq_id in sequence_id_list:
        for frame_id in range(1,6):
            d = generate_labeled_data(f"{path}{seq_id}/{frame_id}.png",
                                    annotation_array[seq_id][frame_id],
                                    nb_false,
                                    radius)
            labels.append(d[0])
            features.append(d[1])
    return np.concatenate(labels,axis=0), np.transpose(np.concatenate(features,axis=1))

## data prepartation

In [3]:
train_annotation = read_annotation_file('train_anno.json')

In [4]:
%%time

radius = 3
train_labels, train_features = generate_labeled_set(train_annotation, 'train/', range(1,1000), radius, 10)

print(train_labels.shape)
print(train_features.shape)

(58438,)
(58438, 49)
CPU times: user 30.7 s, sys: 384 ms, total: 31.1 s
Wall time: 31.1 s


## training

In [5]:
from sklearn.ensemble import RandomForestClassifier

In [6]:
model = RandomForestClassifier(random_state = 0)

In [7]:
%%time

model.fit(train_features, train_labels)

CPU times: user 17 s, sys: 20 ms, total: 17 s
Wall time: 17 s


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

## validation

In [8]:
validation_labels, validation_features = generate_labeled_set(train_annotation, 'train/', range(1001,1280), radius, 10)

print(validation_labels.shape)
print(validation_features.shape)

(16491,)
(16491, 49)


In [9]:
# random forest

pred_labels = model.predict(validation_features)

print(classification_report(pred_labels, validation_labels))
print('\n')
print(confusion_matrix(pred_labels, validation_labels))
print('\n')
print("Kappa =", cohen_kappa_score(pred_labels, validation_labels))

              precision    recall  f1-score   support

           0       0.99      0.98      0.99     14134
           1       0.89      0.96      0.93      2357

    accuracy                           0.98     16491
   macro avg       0.94      0.97      0.96     16491
weighted avg       0.98      0.98      0.98     16491



[[13862   272]
 [   88  2269]]


Kappa = 0.9137031656127184


## predicition

In [10]:
def classify_image(im, model, radius):
    n_features=(2*radius+1)**2 #Total number of pixels in the neighborhood
    feat_array=np.zeros((im.shape[0],im.shape[1],n_features))
    for x in range(radius+1,im.shape[0]-(radius+1)):
        for y in range(radius+1,im.shape[1]-(radius+1)):
            feat_array[x,y,:]=extract_neighborhood(x,y,im,radius)
    all_pixels=feat_array.reshape(im.shape[0]*im.shape[1],n_features)
    pred_pixels=model.predict(all_pixels).astype(np.bool_)
    pred_image=pred_pixels.reshape(im.shape[0],im.shape[1])
    return pred_image

def extract_centroids(pred, bg):
    conn_comp=measure.label(pred, background=bg)
    object_dict=defaultdict(list) #Keys are the indices of the connected components and values are arrrays of their pixel coordinates 
    for (x,y),label in np.ndenumerate(conn_comp):
            if label != bg:
                object_dict[label].append([x,y])
    # Mean coordinate vector for each object, except the "0" label which is the background
    centroids={label: np.mean(np.stack(coords),axis=0) for label,coords in object_dict.items()}
    object_sizes={label: len(coords) for label,coords in object_dict.items()}
    return centroids, object_sizes

def filter_large_objects(centroids,object_sizes, max_size):
    small_centroids={}
    for label,coords in centroids.items():
            if object_sizes[label] <= max_size:
                small_centroids[label]=coords
    return small_centroids

def predict_objects(sequence_id, frame_id, model, radius, max_size):
    test_image = plt.imread(f"test/{sequence_id}/{frame_id}.png")
    test_pred=classify_image(test_image, model, radius)
    test_centroids, test_sizes = extract_centroids(test_pred, 0)
    test_centroids = filter_large_objects(test_centroids, test_sizes, max_size)
    # Switch x and y coordinates for submission
    if len(test_centroids.values()) > 0:
        sub=np.concatenate([c[np.array([1,0])].reshape((1,2)) for c in test_centroids.values()])
        #np array converted to list for json seralization, truncated to the first 30 elements
        return sub.tolist()[0:30]
    else:
        return []

In [11]:
%%time

sub_list = predict_objects(1, 1, model, radius, 1)

CPU times: user 5.4 s, sys: 72.1 ms, total: 5.47 s
Wall time: 5.47 s


In [12]:
sequence_id = 1
frame_id = 1

In [17]:
submission = []

for s in range(1,5121):
    for fr in range(1,6):
        if s == sequence_id and fr == frame_id:
            submission.append({"sequence_id" : s,
                               "frame" : fr,
                               "num_objects" : len(sub_list),
                               "object_coords" : sub_list})
        else:
            submission.append({"sequence_id" : s,
                               "frame" : fr,
                               "num_objects" : 0,
                               "object_coords" : []})

with open('my_submission/my_submission.json', 'w') as outfile:
    json.dump(submission, outfile)

## parallelization

In [14]:
#from multiprocessing import Pool
#nb_procs = 3

In [15]:
#%%time

#p = Pool(processes=nb_procs)
#sequence_list, frame_list = np.arange(1, 10), np.arange(1, 6)
#id_pair_list = list(itertools.product(sequence_list, frame_list))
#sub_sequence = p.starmap(predict_objects, [(id_pair[0], id_pair[1], model, radius, 1) for id_pair in id_pair_list])
#p.close()

In [16]:
#sub_dict = {id_pair: sub for id_pair,sub in zip(id_pair_list, sub_sequence)}

#submission=[]
#for id_pair,sub_list in sub_dict.items():
#           submission.append({"sequence_id" : int(id_pair[0]), 
#                                    "frame" : int(id_pair[1]), 
#                                    "num_objects" : len(sub_list), 
#                                    "object_coords" : sub_list})
#with open('my_submission.json', 'w') as outfile:
#    json.dump(submission, outfile)