In [1]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
from random import randrange
from skimage.feature import local_binary_pattern

In [2]:
SUBSET = '4k'
DATA_DIR = 'data/'
DATA_FILENAME = 'data_naive1d_subset' + SUBSET

In [3]:
data_train = np.load(DATA_DIR + DATA_FILENAME + '_train' + '.npz')
X_train = data_train['X']
y_train = data_train['y']
train_names = data_train['names']
data_train.close()
X_train.shape, X_train.dtype

((4000, 9216), dtype('uint8'))

In [4]:
data_test = np.load(DATA_DIR + DATA_FILENAME + '_test' + '.npz')
X_test = data_test['X']
y_test = data_test['y']
test_names = data_test['names']
data_test.close()
X_test.shape, X_test.dtype

((2000, 9216), dtype('uint8'))

In [5]:
# Local Binary Pattern Histogram
def lbp_histogram(img):
    patterns = local_binary_pattern(img, 8, 1, 'uniform')
    hist, _ = np.histogram(patterns, bins=np.arange(59 + 1))
    # if only the first ten bins are non-zero, we can reduce our
    # landmark feature size
    assert hist[10:].sum() == 0
    # add one to avoid divide-by-zero errors later in kld()
    return hist[:10].astype('int16') + 1

In [6]:
# Turn a <v> of size 9216 (96x96) into 1-D LDP feature vector of
# size 20 (two LDP histogram)
def vec_to_feat(v):
    img1 = v.reshape(96, 96)
    img2 = cv2.resize(img1, None, fx=0.5, fy=0.5, interpolation = cv2.INTER_CUBIC)
    lbp1 = lbp_histogram(img1).ravel()
    lbp2 = lbp_histogram(img2).ravel()
    return np.concatenate((lbp1, lbp2))

In [7]:
train_feat = np.apply_along_axis(vec_to_feat, 1, X_train)
train_feat.shape, train_feat.dtype

((4000, 20), dtype('int16'))

In [8]:
test_feat = np.apply_along_axis(vec_to_feat, 1, X_test)
test_feat.shape, test_feat.dtype

((2000, 20), dtype('int16'))

In [9]:
SUBSET_FILENAME = 'data_2lbp_subset' + SUBSET
np.savez(DATA_DIR + SUBSET_FILENAME + '_train', X=train_feat, y=y_train, names=train_names)
np.savez(DATA_DIR + SUBSET_FILENAME + '_test', X=test_feat, y=y_test, names=test_names)