In [1]:
from datetime import datetime
import random
import math
import numpy as np
import pickle
%matplotlib inline
import matplotlib.pyplot as plt
import cv2
from scipy.misc import imresize
from skimage import feature

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint

In [2]:
# helpers

# plot images, list are dislayed in subplots
def plot(imgs, title=None):
    # make sure input is a list
    if not isinstance(imgs, list):
        imgs = [imgs]
    
    plt.figure()
        
    for i in range(len(imgs)):
        if(title is not None): 
            plt.suptitle(title)
        plt.subplot(1, len(imgs), i+1)
        plt.axis("off")
        # fix channels for rgb
        if len(imgs[i].shape) > 2:
            plt.imshow(imgs[i][:,:,[2,1,0]])
        else:
            plt.imshow(imgs[i])
        

# create empty list of given size
def empty(size):
    return [None]*size


def empty_dict(chunk, n_features):
    n_samples = chunk['n_samples']
    
    new_dict = {
        'n_samples':  n_samples,
        'n_features': n_features,
        'values':     np.zeros((n_samples, n_features)),
        'label':      np.zeros(n_samples),
        'valid':      np.zeros(n_samples),
        'weight':     np.zeros(n_samples),
    }
    
    new_dict['label' ][:] = chunk['label' ][:]
    new_dict['valid' ][:] = chunk['valid' ][:]
    new_dict['weight'][:] = chunk['weight'][:]
    
    return new_dict

# load data set
def load_chunk(set_name, chunk_nr):
    
    path = './data_pp/%s_%i.pkl' % (set_name, chunk_nr)
    
    chunk = pickle.load(open(path, 'rb'))
    
    print('load_chunk: loaded %i samples (from %s)' % (chunk['n_samples'], path))
    
    return chunk
    
# dump feature set
def dump_chunk(set_name, chunk_nr, feature_name, features):
    
    # set NaNs to 0
    vals = features['values']
    vals[np.isnan(vals)] = 0
    
    path = './features/%s_%i-%s.pkl' % (set_name, chunk_nr, feature_name)
    
    pickle.dump(features, open(path, 'wb'))
    
    print('dump_chunk: dumped %i samples (to %s)' % (chunk['n_samples'], path))


def blockshaped(a, block_size):
    (n_rows, n_cols) = block_size
    h, w = a.shape
    return (a.reshape(h//n_rows, n_rows, -1, n_cols).swapaxes(1,2).reshape(-1, n_rows, n_cols))
    
    
print('done')

done


In [3]:
def hog_features(chunk, img_type=None):
    
    # hog params
    orientations=8
    pixels_per_cell=(20,20)
    cells_per_block=(4,4)
    visualize=False
    
    n_values = 1152
    
    # init container
    features = empty_dict(chunk, 7*n_values)
    
    for i in range(chunk['n_samples']):
        
        if not (i+1)%2000:
            print('hog_features: %i' % (i+1))
        
        if not chunk['valid'][i]:
            continue
        
        # feature container
        vals = features['values'][i, :]
        
        # grayscale
        gs = cv2.cvtColor(chunk['rgb'][i], cv2.COLOR_RGB2GRAY)
        vals[0*n_values:1*n_values] = feature.hog(gs, orientations, pixels_per_cell, cells_per_block, visualize)
        
        # red, green, blue channels
        rc,gc,bc = cv2.split(chunk['rgb'][i])
        vals[1*n_values:2*n_values] = feature.hog(rc, orientations, pixels_per_cell, cells_per_block, visualize)
        vals[2*n_values:3*n_values] = feature.hog(gc, orientations, pixels_per_cell, cells_per_block, visualize)
        vals[3*n_values:4*n_values] = feature.hog(bc, orientations, pixels_per_cell, cells_per_block, visualize)

        # depth
        dm = chunk['dep'][i]
        vals[4*n_values:5*n_values] = feature.hog(dm, orientations, pixels_per_cell, cells_per_block, visualize)
        
        # segmentation
        sm = chunk['seg'][i][:, :, 0]
        vals[5*n_values:6*n_values] = feature.hog(sm, orientations, pixels_per_cell, cells_per_block, visualize)
        
        # depth (cutoff)
        dep = chunk['dep'][i]
        
        cut = 127
        while (dep<cut).sum() > 750:
            cut -= 1

        mask = np.zeros((126,126))

        for x in range(0,7):
            for y in range(0,7):
                mask[x:126-(6-x),y:126-(6-y)] += dep<cut

        mask = mask[3:123,3:123]
        mask = mask*(mask>24)
            
        vals[6*n_values:7*n_values] = feature.hog(dep*mask, orientations, pixels_per_cell, cells_per_block, visualize)
        
    return features
    

def sift_features(chunk, img_type):
    
    # sift params
    step_size = 20
    sift = cv2.xfeatures2d.SIFT_create()
    
    # init container
    features = empty_dict(chunk, 30*128)
    
    for i in range(chunk['n_samples']):
        
        if not (i+1)%2000:
            print('sift_features (%s): %i' % (img_type, i+1))
        
        if not chunk['valid'][i]:
            continue
        
        img = chunk[img_type][i]
    
        grid = [cv2.KeyPoint(x, y, step_size)
                for y in range(10, img.shape[0]-10, step_size)
                    for x in range(0, img.shape[1], step_size)]
        
        features['values'][i, :] = sift.compute(img, grid)[1].ravel()
        
    return features


def diff_features(chunk, img_type):
    
    # diff params
    block_size = (3, 3)
    
    # init container
    features = empty_dict(chunk, (120*60)//(block_size[0]*block_size[1]))
    
    for i in range(chunk['n_samples']):
        
        if not (i+1)%2000:
            print('diff_features (%s): %i' % (img_type, i+1))
        
        if not chunk['valid'][i]:
            continue
        
        img = chunk[img_type][i]
        
        l = img[:, :60].T
        r = np.flip(img[:, 60:], 1).T
        
        l_sums = [a.sum() for a in blockshaped(l, block_size)]
        r_sums = [a.sum() for a in blockshaped(r, block_size)]
        
        features['values'][i, :] = np.array(l_sums) - np.array(r_sums)
        
    return features
        

extractors = {
    'hog': hog_features,
    'sift': sift_features,
    'diff': diff_features,
}

In [4]:
set_name = 'test'
chunk_nr = 0
extractor = 'diff'
img_type = 'dep'
iteration = 0

In [5]:
chunk = load_chunk(set_name, chunk_nr)
print('done')

load_chunk: loaded 8190 samples (from ./data_pp/test_0.pkl)
done


In [6]:
start = datetime.now()

features = extractors[extractor](chunk, img_type)
print(features['values'].shape)

print(datetime.now()-start)

diff_features (dep): 2000
diff_features (dep): 4000
diff_features (dep): 6000
diff_features (dep): 8000
(8190, 800)
0:00:22.086539


In [7]:
dump_chunk(set_name, chunk_nr, '%s_%s_%i' % (extractor, img_type, iteration), features)

dump_chunk: dumped 8190 samples (to ./features/test_0-diff_dep_0.pkl)
