# Road segmentation

In [None]:
%matplotlib inline

import numpy as np
import os,sys

from sklearn import linear_model
from sklearn import neighbors
from sklearn import svm
from sklearn import preprocessing
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV

from helpers import *
from plots import *
import postprocessing

%load_ext autoreload
%autoreload 2

## Parameters

In [None]:
# Model and hyper-parameters grid.

#model = linear_model.LogisticRegression(class_weight="balanced", solver="lbfgs")
#param_grid = {'C': [1e5, 1e4, 1e3, 1e2, 1e1, 1, 1e-1, 1e-2, 1e-3, 1e-4]}

model = neighbors.KNeighborsClassifier(algorithm='auto')
param_grid = {'n_neighbors': [1, 5, 9, 13, 17, 21, 25, 29, 33, 37], 'weights': ['uniform', 'distance']}
#param_grid = {'n_neighbors': [15, 25, 35, 45, 55, 65, 75, 85, 95], 'weights': ['distance']}

#model = neighbors.RadiusNeighborsClassifier(algorithm='auto')
#param_grid = {'radius': [0.11, 0.12, 0.13, 0.14], 'weights': ['uniform', 'distance'], 'outlier_label': [0, 1]}

#model = svm.SVC(random_state=0)
#param_grid = {'C': [1.0], 'kernel': ['rbf'], 'class_weight': [None]}

#model = svm.LinearSVC(random_state=0)
#param_grid = {'C': [10000000000000], 'class_weight': [None], 'max_iter': [10000]}

In [None]:
# Number of training images
n = 100

# Number of folds for the cross-validation.
k = 3

# Fix randomness (integer) or not (None)
seed = 0

# AIcrowd expects patches of 16 by 16 pixels.
patch_size = 4
aggregate_threshold = 0.3

# Percentage of pixels > 1 required to assign a foreground label to a patch (default=0.25)
# We can give multiple values as a list, and it will keep the one that yields the best F1 score on cross-validation.
foreground_threshold = [0.25]

# Extraction function
#extraction_func = extract_features_2d
extraction_func = extract_features_6d
#extraction_func = extract_features_12d

# Preprocessing function
#preproc = None
preproc = preprocessing.StandardScaler()
#preproc = preprocessing.Normalizer()

# Create a submission file ?
submit = False

## Data extraction

In [None]:
# Load a set of images
imgs, gt_imgs = load_training_images(n)

In [None]:
# Extract patches from all images
img_patches = get_patches(imgs, patch_size)
gt_patches = get_patches(gt_imgs, patch_size)

In [None]:
# Get features for each image patch
X = get_features_from_patches(img_patches, extraction_func)
Ys = [get_labels_from_patches(gt_patches, t) for t in foreground_threshold]
Y = None
print(X.shape)

In [None]:
# Preprocessing
if preproc is not None:
    preproc = preproc.fit(X)
    X = preproc.transform(X)
print(X)

## Cross-validation and parameters tuning

In [None]:
split = StratifiedShuffleSplit(n_splits=k, random_state=seed)
search = GridSearchCV(model, param_grid, cv=split, scoring="f1")
score = -1
# Cross-validate with different foreground_threshold values.
for i, y in enumerate(Ys):
    # Grid search on the parameters, to find the best score.
    search.fit(X,y)
    print("Threshold={} : score={}".format(foreground_threshold[i], search.best_score_))
    if search.best_score_ > score:
        Y = y
        score = search.best_score_
        model = search.best_estimator_

In [None]:
print("F1 score:", score)
print(model.get_params())

## Predictions

In [None]:
# Predict on the training set
Z = model.predict(X)

TPR = true_positive_rate(Z, Y)
TNR = true_negative_rate(Z, Y)
FPR = false_positive_rate(Z, Y)
FNR = false_negative_rate(Z, Y)
print('True positive rate = ' + str(TPR))
print('True negative rate = ' + str(TNR))
print('False positive rate = ' + str(FPR))
print('False negative rate = ' + str(FNR))

In [None]:
# Predict on test set and create submission
if submit:
    create_submission(model, extraction_func, patch_size, preproc, aggregate_threshold)

## Comparison of groundtruth and prediction

In [None]:
idx = 1
img = imgs[idx]
gt_img = gt_imgs[idx]
# Run predictions
Xi = get_features_from_img(img, extraction_func, patch_size)
Yi = get_labels_from_img(gt_img, foreground_threshold, patch_size)
if preproc is not None:
    Xi = preproc.transform(Xi)
Zi = model.predict(Xi)

In [None]:
w = img.shape[0]
h = img.shape[1]
label_img = label_to_img(w, h, patch_size, patch_size, Yi)
predicted_img = label_to_img(w, h, patch_size, patch_size, Zi)

In [None]:
print("Groundtruth:")
show_img(gt_imgs[idx])

In [None]:
print("Patched groundtruth:")
show_img(label_img)

In [None]:
print("Predictions:")
show_img(predicted_img)

## Visualization of a single test image

In [None]:
# Load a test image
img_idx = 1

img_path = "Datasets/test_set_images/test_{0}/test_{0}.png".format(img_idx)
img = load_image(img_path)

In [None]:
# Run predictions
Xi = get_features_from_img(img, extraction_func, patch_size)
if preproc is not None:
    Xi = preproc.transform(Xi)
Zi = model.predict(Xi)

In [None]:
if Xi.shape[1] == 2:
    show_data_points(Xi, Zi)

In [None]:
# Display prediction as an image
w = img.shape[0]
h = img.shape[1]
predicted_img = label_to_img(w, h, patch_size, patch_size, Zi)
show_img_concatenated(img, predicted_img)

In [None]:
show_img_overlay(img, predicted_img)

In [None]:
if patch_size != 16:
    Zi_agg = postprocessing.aggregate_labels(Zi, patch_size, 16, aggregate_threshold)
    predicted_img_agg = label_to_img(w, h, 16, 16, Zi_agg)
    show_img(predicted_img_agg)
    show_img_overlay(img, predicted_img_agg)

In [None]:
postprocessing.hough_transform(predicted_img)