# Generate Distance-to-Landmarks features

In [1]:
from helper import *
from skimage.feature import local_binary_pattern

In [2]:
SUBSET = '20k'
DATA_DIR = 'data/'
DATA_FILENAME = 'lbp_subset' + SUBSET

In [3]:
(X_train, y_train), (X_test, y_test) = \
    load_npz(DATA_DIR + DATA_FILENAME + '.npz')

((20000, 10), dtype('int16')) ((20000,), dtype('uint8'))
((10000, 10), dtype('int16')) ((10000,), dtype('uint8'))


In [4]:
# Avoid Divide-by-Zero in KLD
X_train += 1
X_test += 1

In [5]:
LAND_FILENAME = 'landmarks_subset' + SUBSET
data = np.load(DATA_DIR + LAND_FILENAME + '.npz')
landmarks = data['both']
landmarks_pos = data['pos']
landmarks_neg = data['neg']
data.close()
landmarks.shape, landmarks.dtype

((64, 10), dtype('int16'))

In [6]:
# Return smaller of either Kullback–Leibler Divergences
def kld(p, q):
    p_q = np.sum(p * np.log2(p/q))
    q_p = np.sum(q * np.log2(q/p))
    if p_q < 0 or q_p < 0:
        print(p); print(q); print(sum)
        raise Exception
    return min(p_q, q_p)

In [7]:
def kld_v(v):
    return lambda z: kld(v, z)

In [8]:
# Return index of most similar landmark
def lbp_to_landmark(v, landmarks):
    land_dists = np.apply_along_axis(kld_v(v), 1, landmarks)
    return land_dists

In [9]:
def lbp_to_landmark_l(landmarks):
    return lambda v: lbp_to_landmark(v, landmarks)

In [10]:
Xb_train = np.apply_along_axis(lbp_to_landmark_l(landmarks), 1, X_train).astype('float32')
Xb_train.shape, Xb_train.dtype

((20000, 64), dtype('float32'))

In [11]:
Xb_test = np.apply_along_axis(lbp_to_landmark_l(landmarks), 1, X_test).astype('float32')
Xb_test.shape, Xb_test.dtype

((10000, 64), dtype('float32'))

In [12]:
Xp_train = np.apply_along_axis(lbp_to_landmark_l(landmarks_pos), 1, X_train).astype('float32')
Xp_train.shape, Xp_train.dtype

((20000, 64), dtype('float32'))

In [13]:
Xp_test = np.apply_along_axis(lbp_to_landmark_l(landmarks_pos), 1, X_test).astype('float32')
Xp_test.shape, Xp_test.dtype

((10000, 64), dtype('float32'))

In [14]:
Xn_train = np.apply_along_axis(lbp_to_landmark_l(landmarks_neg), 1, X_train).astype('float32')
Xn_train.shape, Xn_train.dtype

((20000, 64), dtype('float32'))

In [15]:
Xn_test = np.apply_along_axis(lbp_to_landmark_l(landmarks_neg), 1, X_test).astype('float32')
Xn_test.shape, Xn_test.dtype

((10000, 64), dtype('float32'))

In [16]:
SUBSET_FILENAME = 'ldist_subset' + SUBSET
np.savez(DATA_DIR + SUBSET_FILENAME,
         X_train=Xb_train, y_train=y_train,
         X_test=Xb_test, y_test=y_test)

In [17]:
GBT(Xb_train, Xb_test, y_train, y_test)

n_est: Train, Test
400  : 82.1,  77.3
600  : 83.7,  77.4
800  : 85.3,  77.3


In [18]:
GBT(Xp_train, Xp_test, y_train, y_test)

n_est: Train, Test
400  : 81.5,  77.2
600  : 82.8,  77.2
800  : 84.1,  77.3


In [19]:
GBT(Xn_train, Xn_test, y_train, y_test)

n_est: Train, Test
400  : 82.0,  77.6
600  : 83.6,  77.4
800  : 85.1,  77.4
