In [1]:
import matplotlib.pyplot as plt
import numpy as np
from skimage.feature import local_binary_pattern
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

In [2]:
SUBSET = '20k'
DATA_DIR = 'data/'
DATA_FILENAME = '1d_subset' + SUBSET

In [3]:
data_train = np.load(DATA_DIR + DATA_FILENAME + '_train' + '.npz')
X_train = data_train['X']
X2_train = data_train['X2']
y_train = data_train['y']
# train_names = data_train['names']
data_train.close()
X_train.shape, X_train.dtype

((20000, 9216), dtype('uint8'))

In [4]:
data_test = np.load(DATA_DIR + DATA_FILENAME + '_test' + '.npz')
X_test = data_test['X']
X2_test = data_test['X2']
y_test = data_test['y']
# test_names = data_test['names']
data_test.close()
X_test.shape, X_test.dtype

((10000, 9216), dtype('uint8'))

In [5]:
# Local Binary Pattern Histogram
def lbpv_histogram(img, b, n):
    global patterns
    patterns = local_binary_pattern(img, 8, 1, 'var')
    hist, _ = np.histogram(patterns, bins=np.arange(0, b*n + 1, b))
    return hist.astype('int16')

In [6]:
# Local Binary Pattern Histogram
def lbpu_histogram(img):
    global patterns
    patterns = local_binary_pattern(img, 8, 1, 'uniform')
    hist, _ = np.histogram(patterns, bins=np.arange(59 + 1))
    # assert hist[10:].sum() == 0
    return hist[:10].astype('int16')

In [7]:
# Turn a <v> of size 9216 (96x96) into 1-D LDP feature vector of
# size 20 (two LDP histogram)
def vec_to_feat(v):
    img1 = v[:9216].reshape(96, 96)
    img2 = v[9216:].reshape(48, 48)
    lbp1 = lbpu_histogram(img1)
    lbp2 = lbpu_histogram(img2)
    lbp3 = lbpv_histogram(img1, 400, 10)
    lbp4 = lbpv_histogram(img2, 300, 10)
    return np.concatenate((lbp1, lbp2, lbp3, lbp4))

In [8]:
Xf_train = np.apply_along_axis(vec_to_feat, 1, np.hstack((X_train, X2_train)))
Xf_train.shape, Xf_train.dtype

((20000, 40), dtype('int16'))

In [9]:
Xf_test = np.apply_along_axis(vec_to_feat, 1, np.hstack((X_test, X2_test)))
Xf_test.shape, Xf_test.dtype

((10000, 40), dtype('int16'))

In [11]:
clf = GradientBoostingClassifier(n_estimators=400)
clf.fit(Xf_train, y_train)
print(clf.score(Xf_train, y_train))
print(clf.score(Xf_test, y_test))

0.86115
0.8225
