In [1]:
import matplotlib.pyplot as plt
import numpy as np
from scipy import ndimage as ndi
from skimage.util import img_as_float
from skimage.filters import gabor_kernel
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

In [2]:
SUBSET = '20k'
DATA_DIR = 'data/'
DATA_FILENAME = '1d_subset' + SUBSET

In [3]:
data_train = np.load(DATA_DIR + DATA_FILENAME + '_train' + '.npz')
X_train = data_train['X']
X2_train = data_train['X2']
y_train = data_train['y']
# train_names = data_train['names']
data_train.close()
X_train.shape, X_train.dtype

((20000, 9216), dtype('uint8'))

In [4]:
data_test = np.load(DATA_DIR + DATA_FILENAME + '_test' + '.npz')
X_test = data_test['X']
X2_test = data_test['X2']
y_test = data_test['y']
# test_names = data_test['names']
data_test.close()
X_test.shape, X_test.dtype

((10000, 9216), dtype('uint8'))

In [5]:
def compute_feats(image, kernels):
    feats = np.zeros((len(kernels), 2), dtype=np.double)
    for k, kernel in enumerate(kernels):
        filtered = ndi.convolve(image, kernel, mode='wrap')
        feats[k, 0] = filtered.mean()
        feats[k, 1] = filtered.var()
    return feats

In [6]:
# Turn a <v> of size 9216 (96x96) into 1-D LDP feature vector of
# size 20 (two LDP histogram)
def vec_to_feat(v):
    feats1 = compute_feats(v[:9216].reshape(96, 96), kernels).ravel()
    feats2 = compute_feats(v[9216:].reshape(48, 48), kernels).ravel()
    return np.concatenate((feats1, feats2))

In [7]:
# prepare filter bank kernels
kernels = []
for theta in range(4):
    theta = theta / 4. * np.pi
    for sigma in (1, 3):
        for frequency in (0.05, 0.25):
            kernel = np.real(gabor_kernel(frequency, theta=theta,
                                          sigma_x=sigma, sigma_y=sigma))
            kernels.append(kernel)
len(kernels), kernels[0].shape

(16, (7, 7))

In [8]:
Xf_train = np.apply_along_axis(vec_to_feat, 1, np.hstack((X_train, X2_train)))
Xf_train.shape, Xf_train.dtype

((20000, 64), dtype('float64'))

In [9]:
Xf_test = np.apply_along_axis(vec_to_feat, 1, np.hstack((X_test, X2_test)))
Xf_test.shape, Xf_test.dtype

((10000, 64), dtype('float64'))

In [10]:
clf = GradientBoostingClassifier(n_estimators=400)
clf.fit(Xf_train, y_train)
print(clf.score(Xf_train, y_train))
print(clf.score(Xf_test, y_test))

0.8549
0.8264


In [11]:
SUBSET_FILENAME = '2gabor_subset' + SUBSET
np.savez(DATA_DIR + SUBSET_FILENAME + '_train', X=Xf_train, y=y_train)  #, names=train_names)
np.savez(DATA_DIR + SUBSET_FILENAME + '_test', X=Xf_test, y=y_test)  #, names=test_names)