# Generate 5-cell overlapping LBPs: 64x64px centered and 32x32px on four corners

In [1]:
from helper import *
from skimage.feature import local_binary_pattern

In [2]:
SUBSET = '20k'
DATA_DIR = 'data/'
DATA_FILENAME = '1d_subset' + SUBSET

In [3]:
(X_train, y_train), (X_test, y_test) = \
    load_npz(DATA_DIR + DATA_FILENAME + '.npz')

((20000, 9216), dtype('uint8')) ((20000,), dtype('uint8'))
((10000, 9216), dtype('uint8')) ((10000,), dtype('uint8'))


In [4]:
# Local Binary Pattern Histogram
def lbpu_histogram(img):
    patterns = local_binary_pattern(img, 8, 1, 'uniform')
    hist, _ = np.histogram(patterns, bins=np.arange(59 + 1))
    # assert hist[10:].sum() == 0
    return hist[:10].astype('int16')

In [5]:
# Extract center of image as 2-D matrix
def extract_center(v, hs=32):
    img = v.reshape(96, 96)
    return img[48-hs:48+hs, 48-hs:48+hs]

# Extract (<i>, <j>)th cell in 9x9 Grid as 2-D vector
def extract_center_cell(v, i, j):
    img = v.reshape(96, 96)
    return img[16+32*i:16+32*(i+1), 16+32*j:16+32*(j+1)]

# Turn a <v> of size 9216 (96x96) into 4 vectors of 1024 (32x32)
# by extracting 4 cells centered on image and un-rolling each cell
# individually
def vec_to_cells(v):
    cells = []
    for i in range(2):
        for j in range(2):
            cells.append(extract_center_cell(v, i, j))
    return np.array(cells)

In [6]:
# Turn a <v> of size 9216 (96x96) into 1-D LDP feature vector of
# size 10 (single LDP histogram)
def vec_to_feat(v):
    vec4 = vec_to_cells(v)
    lbp4  = np.array([lbpu_histogram(vec4[i]) for i in range(len(vec4))])
    lbp_center = lbpu_histogram(extract_center(v))
    return np.concatenate((lbp_center, lbp4.ravel()))

In [7]:
Xf_train = np.apply_along_axis(delayed(vec_to_feat), 1, X_train)
Xf_train = np.concatenate(compute([x for x in Xf_train]))
Xf_train.shape, Xf_train.dtype

((20000, 50), dtype('int16'))

In [8]:
Xf_test = np.apply_along_axis(delayed(vec_to_feat), 1, X_test)
Xf_test = np.concatenate(compute([x for x in Xf_test]))
Xf_test.shape, Xf_test.dtype

((10000, 50), dtype('int16'))

In [9]:
SUBSET_FILENAME = '5lbp_subset' + SUBSET
np.savez(DATA_DIR + SUBSET_FILENAME,
         X_train=Xf_train, y_train=y_train,
         X_test=Xf_test, y_test=y_test)

In [10]:
GBT(Xf_train, Xf_test, y_train, y_test)

n_est: Train, Test
400  : 81.2,  76.1
600  : 83.0,  76.0
800  : 84.8,  75.9
