# Classifer

In [1]:
import numpy as np
from sklearn import svm
import cv2
import matplotlib.pyplot as plt
import os
import math
import time

### Helper functions for image plotting, importing, and preprocessing

In [2]:
def plot_images(list_of_images, max_col = 4):
    n = len(list_of_images)
    if n == 1:
        plt.imshow(list_of_images[0]); plt.axis('off'); plt.show()
    else:
        # get number of columns and rows required
        r, c = 1, n
        if n > max_col:
            c = max_col
            r = int(math.ceil(n/max_col))
    
        fig = plt.figure(figsize=(17, max_col * r))
        for i, (img) in enumerate(list_of_images):
            ax = fig.add_subplot(r, c, (i+1))
            ax.set_title("Imgage " + str(i))
            ax.axis('off')
            ax.imshow(img, cmap=plt.cm.gray)


In [3]:
def get_train(path_dir, category, n_images):
    """
    @description Imports a selected number of resized and flattened images to grayscale
    @param path_dir Root directory for your image dataset
    @param category Category of images to choose from
    @param n_images Number of images you'd like to import
    """
    path_dir += '/train/' + category + '/'
    files_name = os.listdir(path_dir)
    imgs = []
    for c, f in enumerate(files_name):
        if c == n_images:
            break
        imgs.append(cv2.imread(path_dir + f, cv2.IMREAD_GRAYSCALE))
    return np.array(imgs)
            
def get_test(path):
    files_name = os.listdir(path + "/test/")
    imgs = []
    for c, f in enumerate(files_name):
        imgs.append(cv2.imread(path + "/test/" + f))
        if c > 10:
            break
            
    return np.array(imgs)

In [4]:
def pad_images(img_list, result_shape):
    BLACK = [0,0,0]
    result = np.empty_like(img_list)
    
    for i, img in enumerate(img_list):
        
        height_adjust = result_shape[0] - img.shape[0]
        width_adjust = result_shape[1] - img.shape[1]
        
        top = math.ceil(height_adjust / 2)
        bot = height_adjust - top
        left = math.ceil(width_adjust / 2)
        right = width_adjust - left
            
        result[i] = cv2.copyMakeBorder(img, top, bot, left, right, cv2.BORDER_CONSTANT, value=BLACK)
    return result

#### Import a subset of or all images for each category with their relevant label

In [6]:
path_classification = './MIO-TCD-Classification'
path_classification_train = os.path.join(path_classification, 'train')

# Change this number if you want more or less images. Set it to -1 if you want all images
number_images_category = 1000
categories = os.listdir(path_classification_train)
classification_images = []
y_tr = []

start_time = time.time()
for category in categories:
    path_category = os.path.join(path_classification_train, category)
    for i, image_name in enumerate(os.listdir(path_category)):
        
        if i == number_images_category:
            break
            
        image = cv2.imread(os.path.join(path_category, image_name), cv2.IMREAD_GRAYSCALE)
        classification_images.append(image)
        
        label = categories.index(category)
        y_tr.append(label)

classification_images = np.array(classification_images)
y_tr = np.array(y_tr)

assert len(classification_images) == len(categories) * number_images_category
assert len(classification_images) == len(y_tr)

print('Elapsed Time:', time.time() - start_time)

Elapsed Time: 5.452611446380615


#### Import a number of test images

In [11]:
path_classification_test = os.path.join(path_classification, 'test')

# For now I'll take the same amount test of images as I have for training
number_images_test = len(classification_images)
images_test = []

for i, image_name in enumerate(os.listdir(path_classification_test)):
    if i == number_images_test:
        break
    
    image = cv2.imread(os.path.join(path_classification_test, image_name), cv2.IMREAD_GRAYSCALE)
    images_test.append(image)
    
images_test = np.array(images_test)
assert len(images_test) == number_images_test

#### Pad images to maintain one common aspect ratio

In [7]:
largest_width = np.max([x.shape[0] for x in classification_images])
largest_height = np.max([x.shape[1] for x in classification_images])

padded_images = pad_images(classification_images, (largest_width, largest_height))

#### Flatten images to prepare for SVM

In [8]:
x_tr = np.array([x.flatten() for x in padded_images])
y_test = np.array([x.flatten() for x in images_test])

In [9]:
def compute_svm_predictions(x_train, y_train, x_val, y_val):
    model = svm.SVC()
    model.fit(x_train, y_train)
    preds = model.predict(x_val)
    return preds

In [10]:
compute_svm_predictions(x_tr, y_tr, x_tr[:10], y_tr[:10])

KeyboardInterrupt: 