# Dependencies and setup

In [None]:
import cv2
import os
import glob
import re
import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm, metrics, model_selection

from tensorflow.python.keras.applications import ResNet50
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, GlobalAveragePooling2D, Conv2D, MaxPooling2D, Dropout

import plot_support

#training_dir = os.path.join('..', 'Data', 'Training')
#data_dir = os.path.join('..', 'Data', 'Vehicules1024')

training_dir = os.path.join('..', 'Data', 'Training_pre')
data_dir = os.path.join('..', 'Data', 'Preprocessed')

run_pixel_svm   = True
run_feature_svm = False
run_cnn         = True
run_resnet      = True

# Read all the data into memory

In [None]:
# Get a list of all the data files
training_files = glob.glob(os.path.join(training_dir, '*.png'))

# Read the first image to get the size
test_img = cv2.imread(training_files[0])
img_size = test_img.shape[0:2]

# Initialize data elements (avoids resizing later)
X = np.zeros((len(training_files), img_size[0], img_size[1]))
y = np.zeros(len(training_files))

# Step through all the files
current_idx = 0;
for fs in training_files:
    # Parse the filename (we only really care about extracting the object class)
    x = re.search("\d+\.\d+\.(\S+)\.png", fs)
    
    # Read the file
    img = cv2.imread(fs)
    img = np.squeeze(img[:,:,1]) # select the first color channel
    
    if (x.group(1) == '1'): # "Car"
        X[current_idx,:,:] = img
        y[current_idx] = 1
        current_idx += 1
    elif(x.group(1) == 'bg'): # Background
        X[current_idx,:,:] = img
        y[current_idx] = 0
        current_idx += 1

num_cars = np.count_nonzero(y==1)
num_bg = np.count_nonzero(y==0)
print("found {} cars and {} background images".format(num_cars, num_bg))

# Equalize the class probabilities for training (delete a random subset)
# We do this now and not earlier because we don't know fully what we are going to get out of the directory apriori
num_bg_to_delete = np.round(0).astype('int')
np.random.seed(2019)
if (num_bg_to_delete):
    print("Deleting ", num_bg_to_delete, " background images")
    bg_idx = np.nonzero(y==0)[0]
    np.random.shuffle(bg_idx)
    to_delete = bg_idx[0:num_bg_to_delete]
    y = np.delete(y,to_delete)
    X = np.delete(X,to_delete, axis=0)
    
num_cars = np.count_nonzero(y==1)
num_bg = np.count_nonzero(y==0)
print("After class equalization, there are {} cars and {} background images".format(num_cars, num_bg))
print(X.shape)

In [None]:
imgs_to_show = 25
imgs_per_col = 5
plt.figure(figsize=(10,10)) 
for i in range(0, imgs_to_show):
    plt.subplot(imgs_to_show/imgs_per_col, imgs_per_col, i+1)
    plt.imshow(X[i,:,:], cmap='gray')
    plt.axis('off')
    plt.title(y[i])
plt.show

# Classification setup

In [None]:
X_float = X.astype('float32')

print(X_float[0,:,:].max())
print(X_float[0,:,:].min())

# SVM (on pixels)

In [None]:
if run_pixel_svm:
    X_flat = X_float.reshape((len(X), -1))
    X_train, X_test, y_train, y_test = model_selection.train_test_split(X_flat, y, test_size=0.25, random_state=23)

    parameters = [
        {'kernel':['rbf'], 'gamma':np.logspace(-8, -5, 10)}
    ]
    clf=svm.SVC()
    svm_pixels=model_selection.GridSearchCV(estimator=clf, param_grid=parameters, cv=4, n_jobs=-1);
    svm_pixels.fit(X_train,y_train)
    print("Best estimator:\n", svm_pixels.best_estimator_)

    #Check performance
    y_pred = svm_pixels.predict(X_test)

    print("Accuracy score:\n", metrics.accuracy_score(y_test,y_pred))

In [None]:
if run_pixel_svm:
    y_probs = svm_pixels.predict(X_test) #y_probs is class probabilities

    X_test_imgs = X_test.reshape((-1, 64, 64, 1))
    plot_support.plot_misclassifications(X_test_imgs, y_test, y_probs, file_prefix="svm_pixels")

# SVM (on features)

First, generate the feature vectors for the test data

In [None]:
if run_feature_svm:
    from feature_extraction import generate_feature_vector
    # Parameters
    num_cells = 8
    num_orientations = 8
    frequencies = [0.125, 0.25, 0.5, 1, 2, 4, 8] # Guesses
    sigmas = [1,2,3]
    num_features = generate_feature_vector( X[0,:,:], num_cells, num_orientations, frequencies, sigmas  ).size #just to get the size of the feature vector

    X_features = np.zeros((X.shape[0], num_features))
    for i in range(0, X.shape[0]):
        X_features[i, :] = generate_feature_vector( X[i,:,:], num_cells, num_orientations, frequencies, sigmas  )

Now, train a classifier using these features

In [None]:
if run_feature_svm:
    X_train, X_test, y_train, y_test = model_selection.train_test_split(X_features, y, test_size=0.25, random_state=23)
    parameters = [
        {'kernel':['rbf'], 'gamma':np.logspace(-8, -5, 10)}
    ]
    clf=svm.SVC()
    cv_clf=model_selection.GridSearchCV(estimator=clf, param_grid=parameters, cv=4, n_jobs=-1);
    cv_clf.fit(X_train,y_train)
    print("Best estimator:\n", cv_clf.best_estimator_)

    #Check performance
    y_pred = cv_clf.predict(X_test)

    print("Accuracy score:\n", metrics.accuracy_score(y_test,y_pred))

# CNN

First, format the data.  It is strangely extremely important to scale the data to between 0 and 1 (it started as an 8 bit image, with a max value of 255).  Note that the CNN expects each chip to be 3D - even if the third dimension (channel) is 1.  We then slice the data (randomly) into training and holdout sets. 

In [None]:
X_for_cnn = X_float/255
x_train, x_test, y_train, y_test = model_selection.train_test_split(X_for_cnn.reshape(X_for_cnn.shape[0], X_for_cnn.shape[1], X_for_cnn.shape[2], 1), y, test_size=0.15, random_state=23)
input_shape = (x_train.shape[1], x_train.shape[2], 1)

In [None]:
if run_cnn:
    cnn_model = Sequential()
    cnn_model.add(Conv2D(filters=64, kernel_size=(4,4), input_shape=input_shape))
    cnn_model.add(MaxPooling2D(pool_size=(2, 2)))
    cnn_model.add(Conv2D(filters=32, kernel_size=(4,4)))
    cnn_model.add(Flatten()) # Flattening the 2D arrays for fully connected layers
    cnn_model.add(Dense(128, activation='relu'))
    cnn_model.add(Dropout(0.2))
    cnn_model.add(Dense(2,activation='softmax'))
    cnn_model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    cnn_model.fit(x=x_train,y=y_train, epochs=10)
    
    score = cnn_model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    y_test_hat = cnn_model.predict(x_test)[:,1]
    precision, recall, _ = metrics.precision_recall_curve(y_test, y_test_hat)
    step_kwargs = {'step': 'post'}
    plt.step(recall, precision, color='b', alpha=0.2,
             where='post')
    plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('2-class Precision-Recall curve');

The precision/recall plotting was taken from the [scikit-learn documentation](https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html)

Examine the misclassifications from the holdout set.  Specifically, we'll look at (up to) 10 false positives and false negatives.

In [None]:
if run_cnn:
    #import importlib
    #importlib.reload(plot_support)
    y_probs = cnn_model.predict(x_test) #y_probs is class probabilities
    y_probs = np.squeeze(y_probs[:,1])

    plot_support.plot_misclassifications(x_test, y_test, y_probs, file_prefix="cnn")

# Transfer learning from ResNet50
[Kaggle Example](https://www.kaggle.com/dansbecker/transfer-learning)

In [None]:
if run_resnet:
    X_color = X.astype('float64').reshape(X.shape[0], X.shape[1], X.shape[2], 1)
    X_color = np.tile(X_color, (1,1,1,3))
    X_color /= 3

    x_train, x_test, y_train, y_test = model_selection.train_test_split(X_color, y, test_size=0.15, random_state=23)
    input_shape = (x_train.shape[1], x_train.shape[2], 3)

    transfer_model = Sequential()
    transfer_model.add(ResNet50(include_top=False, input_shape=input_shape, pooling='avg', weights='imagenet'))
    transfer_model.add(Dense(2,activation='softmax'))
    transfer_model.layers[0].trainable = False

    transfer_model.compile(optimizer='sgd', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    transfer_model.fit(x=x_train,y=y_train, epochs=3)

    score = transfer_model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

In [None]:
if run_resnet:
    y_probs = transfer_model.predict(x_test) #y_probs is class probabilities
    y_probs = np.squeeze(y_probs[:,1])

    plot_support.plot_misclassifications(x_test, y_test, y_probs, file_prefix="resnet")

# Run classifiers on test images (localization)

In [None]:
from segmentation import run_sliding_window
from non_maximal_suppression import do_non_max_suppression
import matplotlib.patches as patches

test_img_list = ["00000014_ir.png", "00000017_ir.png", "00000021_ir.png", "00000024_ir.png"]
classifier_name = ["SVM (pixels)", "SVM (features)", "CNN", "ResNet transfer"]

# Loop over all the test images
for ti in test_img_list:
    # Read in image data and format it for the various classifiers
    test_img = cv2.imread(os.path.join(data_dir, ti ), cv2.IMREAD_GRAYSCALE)
    partitioned_image, bboxes_vertices = run_sliding_window(test_img, 8, 64, 64)
    bboxes_array = np.asarray(bboxes_vertices).reshape((-1,4))
    partitioned_image_flat = partitioned_image.reshape((-1, 64, 64, 1))

    partitioned_image_for_svm = partitioned_image_flat.reshape((-1, 64*64))

    partitioned_image_for_cnn = partitioned_image_flat.astype('float32')
    partitioned_image_for_cnn /= 255

    partitioned_image_for_transfer = np.tile(partitioned_image_flat.astype('float64'), (1,1,1,3))
    partitioned_image_for_transfer /= 3
    
    # Run each of the enabled classifiers
    class_flat = np.zeros((4,partitioned_image_flat.shape[0]))
    if run_pixel_svm:
        class_flat[0,:] = svm_pixels.predict(partitioned_image_for_svm)
    if run_cnn:
        class_flat[2,:] = cnn_model.predict(partitioned_image_for_cnn)[:,1]
    if run_resnet:
        class_flat[3,:] = transfer_model.predict(partitioned_image_for_transfer)[:,1]
    
    # Plot results for each classifier
    for i in range(0,4):
        # Detection logic
        class_flat_single = np.squeeze(class_flat[i,:])
        detection_idx = np.nonzero(class_flat_single>0.97)[0]
        detection_boxes = bboxes_array[detection_idx,:]
        detection_probs = class_flat_single[detection_idx]
        
        plt.figure(figsize=(24,8))
        
        plt.subplot(1,3,1)
        plt.imshow(class_flat_single.reshape((partitioned_image.shape[0], partitioned_image.shape[1])), cmap='magma') #show map of probabilities
        plt.axis('off')
        plt.title('{}: Class estimate heatmap'.format(classifier_name[i]))
        
        plt.subplot(1,3,2)
        plt.imshow(test_img, cmap='gray')
        for b in detection_boxes:
            rect = patches.Rectangle(b[0:2],64,64,linewidth=4,edgecolor='g',facecolor='none')
            plt.gca().add_patch(rect)
        plt.axis('off')
        plt.title('{}: Before non-maximal supression'.format(classifier_name[i]))

        plt.subplot(1,3,3)
        picked_boxes = do_non_max_suppression(detection_boxes, detection_probs)
        plt.imshow(test_img, cmap='gray')
        for b in picked_boxes:
            rect = patches.Rectangle(b[0:2],64,64,linewidth=4,edgecolor='g',facecolor='none')
            plt.gca().add_patch(rect)
        plt.axis('off')
        plt.title('{}: After non-maximal supression'.format(classifier_name[i]))
        plt.savefig('{} - with {}.png'.format(ti, classifier_name[i]))
        plt.show()
        