In [10]:
%reset

In [None]:
import matplotlib.image as mpimg
import numpy as np
import matplotlib.pyplot as plt
import os, sys
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import warnings
sys.path.append('../')
from tools import *

%matplotlib inline
%load_ext autoreload
%autoreload 2

# Ignore all warnings
warnings.filterwarnings("ignore")

In [None]:
# Load the data
root_dir = "../data/training_augmented/"
img_dir = root_dir + "images/"
gt_dir = root_dir + "groundtruth/"

files = os.listdir(img_dir)

n = min(50, len(files)) # Load maximum 20 images
print("Loading " + str(n) + " images and their labels...")
imgs = [load_image(img_dir + files[i]) for i in range(n)]
gt_imgs = [load_image(gt_dir + files[i]) for i in range(n)]

# Split into training and test set
X = np.array(imgs)
y = np.array(gt_imgs)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

n_train = X_train.shape[0]
n_val = X_val.shape[0]


In [None]:
# Extract patches from input images
patch_size = 16

X_train_patches = [img_crop(X_train[i], patch_size, patch_size) for i in range(n_train)]
y_train_patches = [img_crop(y_train[i], patch_size, patch_size) for i in range(n_train)]

X_val_patches = [img_crop(X_val[i], patch_size, patch_size) for i in range(n_val)]
y_val_patches = [img_crop(y_val[i], patch_size, patch_size) for i in range(n_val)]

# Linearize list of patches (make it a 2D array)
X_train_patches = np.asarray([X_train_patches[i][j] for i in range(len(X_train_patches)) for j in range(len(X_train_patches[i]))])
y_train_patches = np.asarray([y_train_patches[i][j] for i in range(len(y_train_patches)) for j in range(len(y_train_patches[i]))])

X_val_patches = np.asarray([X_val_patches[i][j] for i in range(len(X_val_patches)) for j in range(len(X_val_patches[i]))])
y_val_patches = np.asarray([y_val_patches[i][j] for i in range(len(y_val_patches)) for j in range(len(y_val_patches[i]))])

# Compute the features
X_train_features = np.asarray([extract_features(X_train_patches[i]) for i in range(len(X_train_patches))])
y_train_features = np.asarray([value_to_class(np.mean(y_train_patches[i])) for i in range(len(y_train_patches))])

X_val_features = np.asarray([extract_features(X_val_patches[i]) for i in range(len(X_val_patches))])
y_val_features = np.asarray([value_to_class(np.mean(y_val_patches[i])) for i in range(len(y_val_patches))])


In [None]:
# Print feature statistics

print("Computed " + str(X_train_features.shape[0]) + " features")
print("Feature dimension = " + str(X_train_features.shape[1]))
print("Number of classes = " + str(np.max(y_train_features)))  # TODO: fix, length(unique(Y))

Y0 = [i for i, j in enumerate(y_train_features) if j == 0]
Y1 = [i for i, j in enumerate(y_train_features) if j == 1]
print("Class 0: " + str(len(Y0)) + " samples, " + str(100 * len(Y0) / len(y_train_features)) + "%")
print("Class 1: " + str(len(Y1)) + " samples, " + str(100 * len(Y1) / len(y_train_features)) + "%")

In [None]:
# Train a linear SVM classifier
print("Training SVM classifier...")
svm_model = SVC(kernel='rbf', class_weight='balanced', C=1, gamma='auto')
svm_model.fit(X_train_features, y_train_features)

In [None]:
# Predict on the training set
print("Predicting on the training set...")
y_train_pred = svm_model.predict(X_train_features)

# Compute accuracy on the training set
y_train_pred_nz = np.nonzero(y_train_pred)[0]
y_train_features_nz = np.nonzero(y_train_features)[0]

TPR = len(list(set(y_train_features_nz) & set(y_train_pred_nz))) / float(len(y_train_pred))
print("True Positive Rate = " + str(TPR))

In [None]:
# Predict on the validation set
print("Predicting on the validation set...")
y_val_pred = svm_model.predict(X_val_features)

# Compute accuracy on the validation set
y_val_pred_nz = np.nonzero(y_val_pred)[0]
y_val_features_nz = np.nonzero(y_val_features)[0]

TPR = len(list(set(y_val_features_nz) & set(y_val_pred_nz))) / float(len(y_val_pred))
print("True Positive Rate = " + str(TPR))

In [None]:
# Create submission file
print("Creating submission file...")
test_dir = "../data/test_set_images/"

# Create directory called prediction_logreg under ../data/test_set_images/
if not os.path.exists("../predictions/prediction_svm/"):
    os.makedirs("../predictions/prediction_svm/")


for file in os.listdir(test_dir):
    print("Processing " + file + "...")
    img_path = os.path.join(test_dir, file, f'{file}.png')

    img = load_image(img_path)
    img_patches = img_crop(img, patch_size, patch_size)
    img_patches = np.asarray(img_patches)
    img_patches_features = np.asarray([extract_features(img_patches[i]) for i in range(len(img_patches))])
    
    img_patches_pred = svm_model.predict(img_patches_features)
    img_pred = label_to_img(img.shape[0], img.shape[1], patch_size, patch_size, img_patches_pred)
    img_pred = img_float_to_uint8(img_pred)

    img_pred_path = os.path.join("../predictions/prediction_svm/", f'{file}.png')
    img_pred_img = Image.fromarray(img_pred)
    img_pred_img.save(img_pred_path)

    cimg_pred = concatenate_images(img, img_pred)


In [None]:

submission_filename = '../predictions/svm_submission.csv'
image_filenames = []
for i in range(1, 51):
    image_filename = '../predictions/prediction_svm/test_' + str(i) + '.png'
    print(image_filename)
    image_filenames.append(image_filename)
masks_to_submission(submission_filename, *image_filenames)