In [1]:
import matplotlib.pyplot as plt
import numpy as np
from random import randrange
from skimage.feature import local_binary_pattern

In [2]:
DATA_DIR = 'data/'
DATA_FILENAME = 'data_naive1d_subset20k'

In [3]:
data_train = np.load(DATA_DIR + DATA_FILENAME + '_train' + '.npz')
X_train = data_train['X']
y_train = data_train['y']
train_names = data_train['names']
data_train.close()
X_train.shape, X_train.dtype

((20000, 9216), dtype('uint8'))

In [4]:
data_test = np.load(DATA_DIR + DATA_FILENAME + '_test' + '.npz')
X_test = data_test['X']
y_test = data_test['y']
test_names = data_test['names']
data_test.close()
X_test.shape, X_test.dtype

((10000, 9216), dtype('uint8'))

In [5]:
# Extract (<i>, <j>)th cell as 1-D vector
def extract_cell(v, i, j):
    img = v.reshape(96, 96)
    return img[32*i:32*(i+1), 32*j:32*(j+1)].ravel()

# Turn a <v> of size 9216 (96x96) into 9 vectors of 1024 (32x32)
# by dividing image into 9 cells and un-rolling each cell
# individually
def vec_to_cells(v):
    cells = []
    for i in range(3):
        for j in range(3):
            cells.append(extract_cell(v, i, j))
    return np.array(cells)

In [6]:
# Return smaller of either Kullback–Leibler Divergences
def kld(p, q):
    p_q = np.sum(p * np.log2(p/q))
    q_p = np.sum(q * np.log2(q/p))
#     if p_q < 0 or q_p < 0:
#         print(p); print(q); print(sum)
#         raise Exception
    return min(p_q, q_p)

In [7]:
# Local Binary Pattern Histogram
def lbp_histogram(v, dim=32):
    img = v.reshape(dim, dim)
    patterns = local_binary_pattern(img, 8, 1, 'uniform')
    hist, _ = np.histogram(patterns, bins=np.arange(59 + 1))
    # if only the first ten bins are non-zero, we can reduce our
    # landmark feature size
    assert hist[10:].sum() == 0
    # add one to avoid divide-by-zero errors later in kld()
    return hist[:10] + 1

In [8]:
# Turn a <v> of size 9216 (96x96) into 1-D LDP feature vector of
# size 90 (9x10)
def vec_to_feat(v):
    vec9 = vec_to_cells(v)
    feat9 = np.apply_along_axis(lbp_histogram, 1, vec9)
    return feat9.ravel()

In [9]:
train_feat = np.apply_along_axis(vec_to_feat, 1, X_train)
train_feat.shape

(20000, 90)

In [10]:
test_feat = np.apply_along_axis(vec_to_feat, 1, X_test)
test_feat.shape

(10000, 90)

In [11]:
SUBSET_FILENAME = 'data_lbp_subset20k'
np.savez(DATA_DIR + SUBSET_FILENAME + '_train', X=train_feat, y=y_train, names=train_names)
np.savez(DATA_DIR + SUBSET_FILENAME + '_test', X=test_feat, y=y_test, names=test_names)