In [29]:
import numpy as np
import skimage.io as io

from skimage.filters import sobel_h, sobel_v
from skimage.transform import resize

from sklearn.svm import LinearSVC
from itertools import product


def get_grads(img, rotate=0):
    weights = np.array([0.299, 0.587, 0.114])
    brightness = img @ weights
    grad_y = sobel_h(brightness)
    grad_x = sobel_v(brightness)
    grad_norms = np.sqrt(grad_y ** 2 + grad_x ** 2)
    grad_angles = np.arctan2(grad_y, grad_x) + rotate
    grad_angles -= (grad_angles > np.pi) * (2 * np.pi)
    return grad_norms, grad_angles


def make_hist(grad_norms, grad_angles, bins):
    limits = np.linspace(-np.pi, np.pi, bins + 1).reshape(bins + 1, 1, 1)
    return np.sum(((grad_angles >= limits[:-1]) & (grad_angles < limits[1:])) * grad_norms, axis=(1, 2))


def build_cells(grad_norms, grad_angles, cell_shape=(8, 8), bins=8):
    hists = np.empty([*np.array(grad_norms.shape) // cell_shape, bins])
    cell_slice = lambda i, ax: slice(cell_shape[ax] * i, cell_shape[ax] * (i + 1))
    for i, j in product(range(hists.shape[0]), range(hists.shape[1])):
        hists[i, j] = make_hist(
            grad_norms[cell_slice(i, 0), cell_slice(j, 1)], 
            grad_angles[cell_slice(i, 0), cell_slice(j, 1)],
            bins,
        )
    return hists


def build_blocks(cells, block_shape, step=None):
    if step is None:
        step = block_shape
    result_shape = (np.array(cells.shape[:2]) - block_shape) // step + 1
    result = np.empty([*result_shape, np.prod(block_shape) * cells.shape[-1]])
    for i, j in product(range(result.shape[0]), range(result.shape[1])):
        result[i, j] = cells[
            i * step[0]: i * step[0] + block_shape[0], 
            j * step[1]: j * step[1] + block_shape[0]
        ].reshape(-1)
        result[i, j] /= np.sqrt((result[i, j] ** 2).sum() + 1e-5)
    return result.reshape(-1)


def extract_hog(image):
    image = resize(image, (64, 64))
    grad_norms, grad_angles = get_grads(image, rotate=np.pi / 8)
    cells = build_cells(grad_norms, grad_angles)
    return build_blocks(cells, block_shape=(2, 2), step=(1, 1))


def fit_and_classify(X_train, y_train, X_test):
    model = LinearSVC(C=0.3)
    model.fit(X_train, y_train)
    return model.predict(X_test)


In [13]:
import os
from tqdm import tqdm_notebook as tqdm

In [11]:
os.listdir()

['00_gt', '00_output', '00_input']

In [30]:
from os.path import join, basename
from glob import glob

data_dir = '00_input'
train_dir = join(data_dir, 'train')
test_dir = join(data_dir, 'test')
gt_dir = '00_gt'

def read_gt(gt_dir):
    fgt = open(join(gt_dir, 'gt.csv'))
    next(fgt)
    lines = fgt.readlines()

    filenames = []
    labels = np.zeros(len(lines))
    for i, line in enumerate(lines):
        filename, label = line.rstrip('\n').split(',')
        filenames.append(filename)
        labels[i] = int(label)

    return filenames, labels

def extract_features(path, filenames):
    hog_length = len(extract_hog(io.imread(join(path, filenames[0]))))
    data = np.zeros((len(filenames), hog_length))
    for i in tqdm(range(0, len(filenames))):
        filename = join(path, filenames[i])
        data[i, :] = extract_hog(io.imread(filename))
    return data

train_filenames, train_labels = read_gt(train_dir)
test_filenames, test_labels = read_gt(gt_dir)

train_features = extract_features(train_dir, train_filenames)
test_features = extract_features(test_dir, test_filenames)

HBox(children=(IntProgress(value=0, max=21210), HTML(value='')))

HBox(children=(IntProgress(value=0, max=18000), HTML(value='')))

In [16]:
from sklearn.metrics import accuracy_score

In [33]:
for C in tqdm(np.linspace(0.08, 0.14, 7)):
    model = LinearSVC(C=C)
    model.fit(train_features, train_labels)
    predict_labels = model.predict(test_features)
    print(C, (predict_labels == test_labels).mean())

HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

0.08 0.9392222222222222
0.09 0.9396666666666667
0.1 0.9397222222222222


KeyboardInterrupt: 

0.9256666666666666

In [1]:
for C in np.logspace(-2, 2, 7):
    model = SVC(C=C)
    model.fit(train_features, train_labels)
    y = model.predict(test_features)
    with open(join(output_dir, 'output.csv'), 'w') as fout:
        for i, filename in enumerate(test_filenames):
            print('%s,%d' % (filename, y[i]), file=fout)

NameError: name 'np' is not defined