In [1]:
import matplotlib.pyplot as plt
import numpy as np
from random import randrange
from skimage.feature import local_binary_pattern

In [2]:
SUBSET = '20k'
DATA_DIR = 'data/'
DATA_FILENAME = 'data_naive1d_subset' + SUBSET

In [3]:
data_train = np.load(DATA_DIR + DATA_FILENAME + '_train' + '.npz')
X_train = data_train['X']
y_train = data_train['y']
train_names = data_train['names']
data_train.close()
X_train.shape, X_train.dtype

((20000, 9216), dtype('uint8'))

In [4]:
data_test = np.load(DATA_DIR + DATA_FILENAME + '_test' + '.npz')
X_test = data_test['X']
y_test = data_test['y']
test_names = data_test['names']
data_test.close()
X_test.shape, X_test.dtype

((10000, 9216), dtype('uint8'))

In [5]:
# Extract center of image as 1-D matrix
def extract_center(v, hs=32):
    img = v.reshape(96, 96)
    return img[48-hs:48+hs, 48-hs:48+hs]

# Extract (<i>, <j>)th cell as 1-D vector
def extract_center_cell(v, i, j):
    img = v.reshape(96, 96)
    return img[16+32*i:16+32*(i+1), 16+32*j:16+32*(j+1)]

# Turn a <v> of size 9216 (96x96) into 4 vectors of 1024 (32x32)
# by extracting 4 cells centered on image and un-rolling each cell
# individually
def vec_to_cells(v):
    cells = []
    for i in range(2):
        for j in range(2):
            cells.append(extract_center_cell(v, i, j))
    return np.array(cells)

In [6]:
# Local Binary Pattern Histogram
def lbp_histogram(img, dim=32):
    patterns = local_binary_pattern(img, 8, 1, 'uniform')
    hist, _ = np.histogram(patterns, bins=np.arange(59 + 1))
    # if only the first ten bins are non-zero, we can reduce our
    # landmark feature size
    assert hist[10:].sum() == 0
    # add one to avoid divide-by-zero errors later in kld()
    return hist[:10].astype('int16') + 1

In [7]:
# Turn a <v> of size 9216 (96x96) into 1-D LDP feature vector of
# size 10 (single LDP histogram)
def vec_to_feat(v):
    vec4 = vec_to_cells(v)
    lbp4  = np.array([lbp_histogram(vec4[i]) for i in range(len(vec4))])
    lbp_center = lbp_histogram(extract_center(v), 64)
    return np.concatenate((lbp_center, lbp4.ravel()))

In [8]:
train_feat = np.apply_along_axis(vec_to_feat, 1, X_train)
train_feat.shape, train_feat.dtype

((20000, 50), dtype('int16'))

In [9]:
test_feat = np.apply_along_axis(vec_to_feat, 1, X_test)
test_feat.shape, test_feat.dtype

((10000, 50), dtype('int16'))

In [10]:
SUBSET_FILENAME = 'data_5lbp_subset' + SUBSET
np.savez(DATA_DIR + SUBSET_FILENAME + '_train', X=train_feat, y=y_train, names=train_names)
np.savez(DATA_DIR + SUBSET_FILENAME + '_test', X=test_feat, y=y_test, names=test_names)