In [2]:
import pandas as pd

data_train = pd.read_csv('train/gt.csv')
y_train = data_train.class_id
data_train.head()

Unnamed: 0,filename,class_id
0,00000.png,0
1,00001.png,0
2,00002.png,0
3,00003.png,0
4,00004.png,0


In [3]:
from skimage.transform import resize

%time ic_train = [resize(imread('./train/' + name, True), (32,32)) for name in data_train.filename]

CPU times: user 39.3 s, sys: 51.7 s, total: 1min 31s
Wall time: 30.6 s


In [4]:
import numpy as np
from scipy.ndimage.filters import convolve

def extract_hog(img, cell_rows=8, cell_cols=8, block_row_cells=2, block_col_cells=2, eps=1e-8,
                block_row_stride=2, block_col_stride=2, bins=8):
    
    block_rows = block_row_cells * cell_rows
    block_cols = block_col_cells * cell_cols
    Dx = np.array([[-1, 0, 1]])
    Dy = np.array([[-1], [0], [1]])
    
    Ix = convolve(img, Dx, mode='constant')
    Iy = convolve(img, Dy, mode='constant')
    G = np.sqrt(Ix ** 2 + Iy ** 2)
    # theta
    T = np.arctan2(Iy, Ix)    
    T[T < 0] += np.pi
    hog = []
    
    for block_row_start in range(0, img.shape[0] - block_rows, block_row_stride):
        for block_col_start in range(0, img.shape[1] - block_cols, block_col_stride):

            G_block = G[block_row_start : block_row_start + block_rows,
                        block_col_start : block_col_start + block_cols]
            T_block = T[block_row_start : block_row_start + block_rows,
                        block_col_start : block_col_start + block_cols]
            v = []
            for block_row_cell in range(block_row_cells):
                for block_col_cell in range(block_col_cells):
                    G_cell = G_block[block_row_cell * cell_rows : block_row_cell * cell_rows + cell_rows,
                                     block_col_cell * cell_cols : block_col_cell * cell_cols + cell_cols]
                    T_cell = T_block[block_row_cell * cell_rows : block_row_cell * cell_rows + cell_rows,
                                     block_col_cell * cell_cols : block_col_cell * cell_cols + cell_cols]    
                    hist, _ = np.histogram(T_cell.flatten(), bins, range=(0, np.pi), weights=G_cell.flatten())
                    v.extend(hist)
            v = np.array(v)
            v = v / np.sqrt(np.sum(v ** 2) + eps)
            hog.extend(v)
            
    return np.array(hog)

In [5]:
num_feat = extract_hog(ic_train[0]).shape[0]
print num_feat

2048


In [7]:
%%time

X_train = np.zeros((len(ic_train), num_feat))
for i in xrange(len(ic_train)):
    X_train[i] = extract_hog(ic_train[i])

CPU times: user 10min 3s, sys: 124 ms, total: 10min 3s
Wall time: 10min 3s


In [9]:
from sklearn.svm import LinearSVC, SVC
from sklearn.cross_validation import cross_val_score

cross_val_score(LinearSVC(), X_train, y_train, scoring='accuracy', verbose=4)

[CV] no parameters to be set .........................................
[CV] ................ no parameters to be set, score=0.910865 - 1.8min
[CV] no parameters to be set .........................................
[CV] ................ no parameters to be set, score=0.908799 - 1.8min
[CV] no parameters to be set .........................................
[CV] ................ no parameters to be set, score=0.916214 - 1.9min


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  5.5min finished


array([ 0.91086458,  0.90879878,  0.91621394])

In [29]:
%%time
from skimage.io import imread, ImageCollection

def imread_gray(img, **params):
    return imread(img, as_grey=True)

ic_test = [resize(img, (32,32)) for img in ImageCollection('./test/*.png', load_func=imread_gray)]
X_test = np.zeros((len(ic_test), num_feat))
for i in xrange(len(ic_test)):
    X_test[i] = extract_hog(ic_test[i])

CPU times: user 3min 20s, sys: 20.1 s, total: 3min 40s
Wall time: 3min 54s


In [32]:
def fit_and_classify(clf, X_train, y_train, X_test):
    return clf.fit(X_train, y_train).predict(X_test)

%time y_pred = fit_and_classify(LinearSVC(), X_train, y_train, X_test)

CPU times: user 2min 45s, sys: 908 ms, total: 2min 46s
Wall time: 2min 45s


In [41]:
data_test = data_train.ix[:len(ic_test) - 1, :].copy()
data_test.iloc[:, 1] = y_pred
data_test.to_csv('test.csv', index=False)