In [1]:
%reload_ext autoreload
%autoreload 2

import os
import sys
import time
import cv2

from joblib import Parallel, delayed

sys.path.append(os.path.join(os.environ['REPO_DIR'], 'utilities'))
from utilities2015 import *

from matplotlib.path import Path
%matplotlib inline

import math
import random

In [20]:
labels = ['BackG', '5N', '7n', '7N', '12N', 'Pn', 'VLL', 
          '6N', 'Amb', 'R', 'Tz', 'RtTg', 'LRt', 'LC', 'AP', 'sp5']

n_labels = len(labels)

labels_index = dict((j, i) for i, j in enumerate(labels))

labels_from_surround = dict( (l+'_surround', l) for l in labels[1:])

labels_surroundIncluded_list = labels[1:] + [l+'_surround' for l in labels[1:]]
labels_surroundIncluded = set(labels_surroundIncluded_list)

labels_surroundIncluded_index = dict((j, i) for i, j in enumerate(labels_surroundIncluded_list))

# colors = np.random.randint(0, 255, (len(labels_index), 3))
colors = np.loadtxt(os.environ['REPO_DIR'] + '/visualization/100colors.txt')
colors[labels_index['BackG']] = 1.

In [21]:
# build training data

# sift_dir = '/oasis/projects/nsf/csd395/yuncong/Brain/learning/sift'
sift_dir = '/oasis/projects/nsf/csd395/wel144/2016-brainstem/sift'

train_data = []
train_labels = []

n_sample = 1000

for name in labels[1:]:
    train_hists0 = bp.unpack_ndarray_file(sift_dir + '/train/MD589_%(name)s_histograms_l0.bp' % {'name': name})
    train_hists1 = bp.unpack_ndarray_file(sift_dir + '/train/MD589_%(name)s_histograms_l1.bp' % {'name': name})
    train_hists2 = bp.unpack_ndarray_file(sift_dir + '/train/MD589_%(name)s_histograms_l2.bp' % {'name': name})
    
    n_train = train_hists0.shape[0]
    print (name, n_train),

    #     train_hists = np.c_[train_hists0, train_hists1.reshape((n_train, -1)), train_hists2.reshape((n_train, -1))]

    random_indices = np.random.choice(range(n_train), min(n_train, n_sample), replace=False)
    n_train = len(random_indices)
    train_hists = np.c_[train_hists0[random_indices], 
                        train_hists1[random_indices].reshape((n_train, -1)), 
                        train_hists2[random_indices].reshape((n_train, -1))]
    
    train_data.append(train_hists)
    train_labels.append(np.ones((n_train, )) * labels_index[name])
    print n_train
    

train_data = np.concatenate(train_data)
train_labels = np.concatenate(train_labels)
n_train = train_data.shape[0]

('5N', 1440) 1000
('7n', 3444) 1000
('7N', 2579) 1000
('12N', 1230) 1000
('Pn', 3042) 1000
('VLL', 1287) 1000
('6N', 154) 154
('Amb', 346) 346
('R', 1082) 1000
('Tz', 1387) 1000
('RtTg', 2639) 1000
('LRt', 1050) 1000
('LC', 481) 481
('AP', 483) 483
('sp5', 3240) 1000


In [22]:
train_data.shape

(12464, 4200)

In [23]:
train_data_normalized = train_data / train_data.sum(axis=1)[:,None].astype(np.float)
train_data_normalized = np.nan_to_num(train_data_normalized)

  if __name__ == '__main__':


In [24]:
bp.pack_ndarray_file(train_data_normalized, 'preprocessed/train_data.bp')
bp.pack_ndarray_file(train_labels, 'preprocessed/train_labels.bp')

In [25]:
# build testing data

stack = 'MD585'

first_detect_sec, last_detect_sec = detect_bbox_range_lookup[stack]

sec = first_detect_sec

test_hists0 = bp.unpack_ndarray_file(sift_dir + '/%(stack)s/%(stack)s_%(sec)04d_roi1_histograms_l0.bp' % {'stack': stack, 'sec': sec})
test_hists1 = bp.unpack_ndarray_file(sift_dir + '/%(stack)s/%(stack)s_%(sec)04d_roi1_histograms_l1.bp' % {'stack': stack, 'sec': sec})
test_hists2 = bp.unpack_ndarray_file(sift_dir + '/%(stack)s/%(stack)s_%(sec)04d_roi1_histograms_l2.bp' % {'stack': stack, 'sec': sec})

n_test = test_hists0.shape[0]
test_hists = np.c_[test_hists0, test_hists1.reshape((n_test, -1)), test_hists2.reshape((n_test, -1))]

test_data = test_hists

In [26]:
test_data.shape

(20979, 4200)

In [27]:
test_data_normalized = test_data / test_data.sum(axis=1)[:,None].astype(np.float)

In [28]:
bp.pack_ndarray_file(test_data_normalized, 'preprocessed/%(stack)s_test_data.bp' % {'stack': stack})

In [29]:
# method 1
t = time.time()

def compute_intersection_kernel_oneJob(i, j1):
#     dist = .5 * np.sum(train_data_normalized + h - np.abs(train_data_normalized - h), axis=1)
    dist = np.minimum(train_data_normalized[i], train_data_normalized[j1:]).sum(axis=1)
    return dist

train_dist_triangle = np.concatenate(Parallel(n_jobs=16)(delayed(compute_intersection_kernel_oneJob)(i, i) 
                                     for i in range(n_train)))

sys.stderr.write('compute intersection kernel: %f seconds\n' % (time.time() - t)) # ~ 200s / 12k training data

train_dist_mat = np.empty((n_train, n_train))
train_dist_mat[np.triu_indices(n_train)] = train_dist_triangle
r = np.tril_indices(n_train)
train_dist_mat[r] = train_dist_mat.T[r]


compute intersection kernel: 192.439459 seconds


In [30]:
# method 2
t = time.time()

def compute_intersection_kernel_oneJob(h):
#     dist = .5 * np.sum(train_data_normalized + h - np.abs(train_data_normalized - h), axis=1)
    dist = np.minimum(train_data_normalized, h).sum(axis=1)
    return dist

# train_dist_mat = np.array(Parallel(n_jobs=16)(delayed(compute_intersection_kernel_oneJob)(h) 
#                                      for h in train_data_normalized[:1000]))

sys.stderr.write('compute intersection kernel: %f seconds\n' % (time.time() - t)) # ~ 400s / 12k training data

compute intersection kernel: 0.000318 seconds


In [31]:
# method 3
t = time.time()

def compute_intersection_kernel_oneJob(i, ni, j, nj):
    dists = np.minimum(train_data_normalized[i:i+ni, None], train_data_normalized[j:j+nj]).sum(axis=-1)
    return dists

# train_dist_mat = np.empty((n_train, n_train))
# ni = 100
# nj = 100
# for j in range(0, n_train, nj):
#     train_dist_mat[:, j:j+nj] = np.concatenate(Parallel(n_jobs=16)(delayed(compute_intersection_kernel_oneJob)(i, ni, j, nj) 
#                                                     for i in range(0, n_train, ni)))

sys.stderr.write('compute intersection kernel: %f seconds\n' % (time.time() - t)) # ~ 400s / 12k training data

compute intersection kernel: 0.000340 seconds


In [32]:
train_dist_mat.shape

(12464, 12464)

In [33]:
bp.pack_ndarray_file(train_dist_mat, 'preprocessed/train_spm_dist_mat.bp')