In [None]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
import seaborn as sns

import functools as ft
import itertools as it
import operator as op
import collections as co

import os
import glob
import os.path as osp


from utilities import my_show, my_gshow, my_read, my_read_g, my_read_cg

from sklearn import (cluster, datasets, decomposition,
                     metrics,
                     model_selection as skms,
                     neighbors, pipeline, svm,
                     preprocessing as skpre)

%matplotlib inline

In [None]:
def cm_helper(ax, actual, predicted):
    cm = metrics.confusion_matrix(actual, predicted)
    sns.heatmap(cm, annot=True, fmt='3d', ax=ax)
    ax.set_ylabel('Actual')
    ax.set_xlabel('Predicted')
    
# reproducibilty is next to godliness
np.random.seed(42)

In [None]:
digits = datasets.load_digits()

In [None]:
fig, axes = plt.subplots(1,4)
first_four_wtgt = it.islice(zip(digits.images, digits.target), 4)
for (image, label), ax in zip(first_four_wtgt, axes):       
    my_gshow(ax, image)
    ax.set_title("True: {}".format(label))

# The Simplest Workflow with a Super-simple Learner

  1. create
  2. fit
  3. predict
  4. lather-rinse-repeat

In [None]:
# reloading data so we have a one-cell example:
digits = datasets.load_digits()

# simplify some names and massage shape of data
n_examples = len(digits.images)
data = digits.images.reshape(n_examples, -1)  # N rows, rest is flattened
tgts = digits.target

# split the data into train/test sets
(data_train, data_tst,
 tgts_train, tgts_tst) = skms.train_test_split(data, tgts, test_size=.2)

# create and fit model
knn_classifier = neighbors.KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(data_train, tgts_train)

# predict and evaluate result for first test case
predicted = knn_classifier.predict(data_tst[0:1,:])[0] #annoying, wants 2D
actual    = tgts_tst[0]

my_gshow(plt.gca(), data_tst[0].reshape(8,8))
plt.gca().set_title("actual: {}\npredicted: {}".format(actual, predicted));

In [None]:
# make predictions for the entire test set and see how we did:
predicted = knn_classifier.predict(data_tst)
cm_helper(plt.gca(), tgts_tst, predicted)

# Some LOO Experiments

##### Revisiting Last Week

Incidentally, last week, we built a 1-NN classifer that used HOG features.  And we evaluated it using leave-one-out cross-validation.  We can recreate that here without having to code much at all (besides the HOG features).

In [None]:
default_magic = (1, -1.0, 0, 0.2, 1, 64, False)
hog_d = cv2.HOGDescriptor((8,8),        # image size
                        (8,8), (8,8), # frame size, frame steps
                        (8,8),        # cell size
                        9,            # number of bins 
                        *default_magic)

def extract_features(images, hog_d=hog_d):
    hists = [hog_d.compute(img).squeeze() for img in images]
    data = np.stack(hists, 0)
    return data

In [None]:
#%%timeit -r1 gives ~ 4.41s 
# ~time to extract featuresm, build model, LOO cross-val
# check last week's run time:  this is a lot slower ... but, sklearn has to deal with many
# many more options that we didn't make use of ... and we have relatively small data
# always take timings with a grain of salt
data = extract_features(digits.images.astype(np.uint8))
knn_classifier = neighbors.KNeighborsClassifier(n_neighbors=1)
preds = skms.cross_val_predict(knn_classifier, data, tgts, cv=skms.LeavePOut(1))

In [None]:
cm_helper(plt.gca(), tgts, preds)

##### Is Simpler "Better"?

We're currently doing a bit of comparing apples to oranges.  Our first example was a 3NN built on the raw images and our second was a 1NN built on HOG features.  Let's try a more direct comparison.  How does a 1NN fit to raw images do?  Let's see:

In [None]:
n_examples = len(digits.images)
data = digits.images.reshape(n_examples, -1)  # N rows, rest is flattened
tgts = digits.target
knn_classifier = neighbors.KNeighborsClassifier(n_neighbors=1)

In [None]:
# %%timeit -r1  gives ~8.35 s
# this takes a little while b/c we are repeatedly building a fairly large classifier
# (recall KNN basically memorizes its data for later lookup)
preds = skms.cross_val_predict(knn_classifier, data, tgts, cv=skms.LeavePOut(1))

In [None]:
# as is often the case, we have a trade off
# in this case, we traded processing time for correctness
#
# also, if we dig into it, we will probably find that the majority of the time cost 
# in nearest neighbors in "raw image space" is mostly due to the size of the data table
# (the images were 8x8 so we had Nx64 table; HOG space is "only" Nx9)
#
# also note:  our images are "tiny" so it is still feasible to work directly on pixels
#       and:  these digits are very well aligned and evenly illuminated.  
#             that gives an unfair advantage to kNN 
#             (from the POV of "how well would it do in the real world?")
#  IF YOU WANT:  you could create pseudo-data by taking the "nice" digits and:
#             1. translate and/or 2. rotate and/or 3. add noise/contrast variation
#             then "presto" you have more realistic data without having to manually go out
#             and get more data
cm_helper(plt.gca(), tgts, preds)

# Back to CV and a Better Learner

In [None]:
# reloading data so we have a one-cell example:
digits = datasets.load_digits()

# simplify some names and massage shape of data
n_examples = len(digits.images)
data = digits.images.reshape(n_examples, -1)  # N rows, rest is flattened
tgts = digits.target

# create and fit model
# support-vector-classifier, c-formulation, rbf kernel (super powers!)
# gamma:  how far does an example reach:
#         small neighborhood in space (big value of gamma)
#         or big neighborhood (small value)
#         it operates like the inverse variance (precision) of a normal distribution
svc = svm.SVC(gamma=0.001) 

# StratifiedKFold means we want 
# balanced representation between each class [0-9] in each fold
predicted = skms.cross_val_predict(svc, data, tgts, cv=skms.StratifiedKFold())
cm_helper(plt.gca(), tgts, predicted)

# Working with Faces

## Download the Data

In [None]:
# note, this only downloads if the data is missing from data_home
lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70, 
                                       resize=0.4,
                                       data_home="./data/skl_data/")

## Data Shape Exploration

In [None]:
# lfw_people has two views of the underlying information
# .data -> "flat" one row per image, h*w features
# .images -> each image as an (h,w) matrix

# we'll make use of the image shapes for display purposes below
n_samples, *img_shape = lfw_people.images.shape
n_features = np.prod(img_shape)
n_classes  = len(lfw_people.target_names)

print(n_classes, n_samples, n_features, img_shape)

## Train Test Split

In [None]:
faces  = lfw_people.data
faces_target = lfw_people.target

(faces_train, faces_test, 
 faces_train_tgt, faces_test_tgt) = skms.train_test_split(faces, faces_target, test_size=0.25)

## "Pre" PCA followed by SVC

In [None]:
# preprocessing with PCA:  fit and transform the training data
n_components = 150
pca = decomposition.PCA(n_components=n_components, svd_solver='randomized', whiten=True)
pca_faces_train = pca.fit_transform(faces_train)

Support vector machines are a major player in the machine learning community.  There are a number of different, more or less equivalent, formulations of them.  In the formulation we are using through scikit-learn, there are two primary parameters that we care about.  $\gamma$ (gamma) controls how far the influence of a single example can spread.  You can think of it like variance in a Gaussian (normal) distribution:  a bigger variance makes the connection between the mean and a point far away more likely.  However, $\gamma$ works like the inverse of variance (sometimes called precision).  $C$ controls a fundamental tradeoff between training-accuracy and model-simplicity (bias-variance and over/underfitting).  A high value of $C$ allows the model to "wiggle more" to classify the training data better.  This comes at the risk of overfitting noisy datapoints.

In [None]:
# define and fit the main model
param_grid = {'C'     : np.logspace(-2, 4, 7), 
              'gamma' : np.logspace(-4, -1, 4)}

grid_svc = skms.GridSearchCV(svm.SVC(class_weight='balanced'), param_grid)
model    = grid_svc.fit(pca_faces_train, faces_train_tgt)

In [None]:
# predict on the test data
faces_pred = model.predict(pca.transform(faces_test))

In [None]:
print("Parameters", model.best_params_)
print(metrics.classification_report(faces_test_tgt, faces_pred, 
                                    target_names=lfw_people.target_names))
# does this need to be fixed?
cm = metrics.confusion_matrix(faces_test_tgt, faces_pred, 
                              labels=range(n_classes))
ax = sns.heatmap(cm, fmt="3d", annot=True)
ax.set_ylabel('expected')
ax.set_xlabel('predicted');

## Full Pipeline Model

In [None]:
# define and fit the pipeline model
param_grid = {'pca__n_components' : [25, 75, 150], # 150 is max for randomized solved
              'svc__C'     : np.logspace(-2, 4, 7), 
              'svc__gamma' : np.logspace(-4, -1, 4)}

pipe = pipeline.make_pipeline(decomposition.PCA(svd_solver='randomized',
                                                whiten=True), 
                              svm.SVC(class_weight='balanced'))

model = skms.GridSearchCV(pipe, param_grid, n_jobs=-1).fit(faces_train, faces_train_tgt)

In [None]:
model.estimator

In [None]:
# predict on the test data
faces_pred = model.predict(faces_test)

In [None]:
print("Parameters: ", model.best_params_)
print(metrics.classification_report(faces_test_tgt, faces_pred, 
                                    target_names=lfw_people.target_names))
cm = metrics.confusion_matrix(faces_test_tgt, faces_pred, labels=range(n_classes))

ax = sns.heatmap(cm, fmt="3d", annot=True)
ax.set_ylabel('expected')
ax.set_xlabel('predicted');

## Qualitative Results

In [None]:
def idx_to_names(idx):
    return [n.rsplit(None, 1)[-1] for n in lfw_people.target_names[idx]]
real_names = idx_to_names(faces_test_tgt[:10]) # lfw_people.target_names[faces_test_tgt]
pred_names = idx_to_names(faces_pred[:10])

In [None]:
eval_images = faces_test[:10].reshape(-1, *img_shape)
fig, axes = plt.subplots(2,5,figsize=(10,4))
for ax, img, real, pred in zip(axes.flat, eval_images, real_names, pred_names):
    my_gshow(ax, img)
    ax.set_title("Actual {}\nPredict {}".format(real, pred))
fig.tight_layout()

## The Eigenfaces (Happy Halloween!)

In [None]:
eigenfaces = pca.components_.reshape(n_components, *img_shape)
fig, axes = plt.subplots(2,5,figsize=(10,4))
for idx, (ax, ef) in enumerate(zip(axes.flat, eigenfaces), 1):
    my_gshow(ax, ef)
    ax.set_title("Eigenface {}".format(idx))

# More Complicated Features and a Different Architecture

## The caltech Dataset

In [None]:
# 101_ObjectCategories from:
# "http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz"
# extracted airplanes, butterfly, panda, grand_piano, dollar_bill
# to data/data/101_ObjectCategories/airplanes/*
#                                   etc.

## Bag-of-Visual Words

In [None]:
# object recognition:  is an object (a cat) in an image
# object detection:  where is the object within an image (get a bounding box)

# we are doing recognition

In [None]:
data_path = "data/101_ObjectCategories"
obj_classes = [osp.split(d)[-1] for d in glob.glob(osp.join(data_path, "*"))]
print(obj_classes)

def make_paths(obj_class):
    form = osp.join(data_path, obj_class, "*")
    img_paths = glob.glob(form)
    num_imgs  = len(img_paths)
    return img_paths[:20]

# read images into a dictionary of object class : [list of images]
loi = [(oc,[my_read(p) for p in make_paths(oc)]) for oc in obj_classes]
training_imgs = co.OrderedDict(loi)

# calculate these now for future use
numbered_images = enumerate(training_imgs.values())
image_labels    = list(it.chain.from_iterable([idx] * len(v) for idx, v in numbered_images))
num_images      = len(image_labels)

class_labels = dict((x,y) for y,x in enumerate(obj_classes))

print(len(training_imgs['airplanes']))
print(training_imgs['airplanes'][0].shape)
print('number of images:', num_images)
print(class_labels)

In [None]:
start = 0
for idx, k in enumerate(training_imgs.keys()):
    print(idx, k, len(training_imgs[k]))
    end = start + len(training_imgs[k])
    assert all(i==class_labels[k] for i in image_labels[start:end]), image_labels[start:end]  
    start = end

In [None]:
# need list of features coming from each class
def map_dict(src, f):
    ' helper to apply f to each elt of the lists in the values of src (a dict)'
    #return {k:f(v) for k,v in src.items()}
    return co.OrderedDict((k,f(v)) for k,v in src.items())
    
#src = {1:'red', 2:'cat'}
#def f(v):
#    return(len(v))
#map_dict(src, f)
#{1: 3, 2: 3}

## Extract Descriptors From Each Image

In [None]:
# FIXME:  use orb instead?
def get_sift_desc_one(img):
    sift = cv2.xfeatures2d.SIFT_create()
    key_points, descriptors = sift.detectAndCompute(img, None)
    return descriptors
get_sift_desc_lst = ft.partial(map, get_sift_desc_one)

In [None]:
lazy_desc = map_dict(training_imgs, get_sift_desc_lst)

In [None]:
# don't have to do this
eager_desc = map_dict(lazy_desc, list)
print(eager_desc['airplanes'][3].shape)

In [None]:
eager_desc['airplanes'] # 40 airplane descriptors
# shapes of descriptors for first two airplane images; 
# each has 128 "learning-features-columns"
eager_desc['airplanes'][0].shape, eager_desc['airplanes'][1].shape

In [None]:
# need a single array of *all* of the descriptors over each object
all_desc = list(it.chain.from_iterable(eager_desc.values()))
# 5 * 40 - 2 b/c only 38 total pandas
print(len(all_desc), all_desc[0].shape)
print(all(d.shape[1] == 128 for d in all_desc))
all_desc = np.concatenate(all_desc, 0)
all_desc.shape

## Cluster the Descriptors

In [None]:
n_clusters = 20
km_clusterer = cluster.KMeans(n_clusters=n_clusters)
desc_clusters = km_clusterer.fit_predict(all_desc)

In [None]:
# one cluster is a "visual word"
# represent each image as a combination of visual-words
#      frequency of visual words in each image
# 
# vocabulary is histogram of all visual-words over all images

# mapping from *a descriptor* to its cluster
desc_clusters[:10]

In [None]:
# this is superbly annoying, but it gets a sequences of 
# unique image labels for the *entire* image database
# so we can tell what descriptors came from what images
# see next cell for some explanation ...
img_labeled_descs = enumerate(it.chain.from_iterable(eager_desc.values()))
res = list(it.chain.from_iterable([idx] * v.shape[0] for idx, v in img_labeled_descs))
print(len(res))

In [None]:
# where do we flip from first airplane to second airplane
num_descriptors = eager_desc['airplanes'][0].shape[0]
print(res[num_descriptors-1], 
      res[num_descriptors], 
      res[num_descriptors+1])

# where do descriptors start describing third airplane?
base = num_descriptors
num_descriptors = eager_desc['airplanes'][1].shape[0]
print(num_descriptors)
print(res[base+num_descriptors-1], 
      res[base+num_descriptors], 
      res[base+num_descriptors+1])

## Redescribe Images in Terms of Clusters "Visual-Words"

In [None]:
# now, we know descriptors -> clusters (visual words)
# and we know  descriptors -> images
# we need to describe images in terms of visual-words

In [None]:
del eager_desc, all_desc

In [None]:
counts = co.Counter(zip(res, desc_clusters))

table = np.zeros((num_images, n_clusters))
for (img, clust), count in counts.items():
    table[img, clust] = count

## Learn a Model from Redescribed Images to Classes

In [None]:
# column wise standardization is basically like normalizing the HOG histogram:
# it says that the relative amounts in the bins/features are what matter
# not the individual magnitudes of those amounts
# (related to how eigenvectors capture the orientation of a matrix and 
#  eigenvalues capture the scale -- here, we care about just the orientation)

std_svc = pipeline.make_pipeline(skpre.StandardScaler(), svm.SVC())
svc = std_svc.fit(table, image_labels)

## Predict Using Learned Model

##### Prediction Pipeline for One Image

In [None]:
# image -> features 
#       -> clusters for features 
#       -> cluster representation (Clusters of Counts aka Bag of Words)
#       -> normalized+svc

In [None]:
# image
test_image = my_read(osp.join(data_path, "airplanes/image_0700.jpg"))

# --> features
sift = cv2.xfeatures2d.SIFT_create()
_, test_descs = sift.detectAndCompute(test_image, None)

# --> clusters
test_desc_clusters = km_clusterer.predict(test_descs)

# --> histogram
test_cluster_counts = co.Counter(test_desc_clusters)

# (hack it into a pseudo-row for sklearn happiness)
test_pseudo_row = np.zeros((1,20))
for clust, count in test_cluster_counts.items():
    test_pseudo_row[0,clust] = count

# prediction
#print(test_pseudo_row)
svc.predict(test_pseudo_row)

In [None]:
def make_test_paths(obj_class):
    form = osp.join(data_path, obj_class, "*")
    img_paths = glob.glob(form)
    num_imgs  = len(img_paths)
    return img_paths[-5:]

def image_to_example(path):
    ' functional form of image (via filename) --> COW '
    test_image = my_read(path) 

    sift = cv2.xfeatures2d.SIFT_create()
    _, test_descs = sift.detectAndCompute(test_image, None)

    test_desc_clusters = km_clusterer.predict(test_descs)

    test_cluster_counts = co.Counter(test_desc_clusters)
    test_pseudo_row = np.zeros((1,20))
    for clust, count in test_cluster_counts.items():
        test_pseudo_row[0,clust] = count
    
    return test_pseudo_row

results = []
for oc in obj_classes:
    expected  = class_labels[oc]
    print(oc, expected)
    for test_img_path in make_test_paths(oc):
        example = image_to_example(test_img_path)
        predicted = svc.predict(example)[0]
        results.append((expected, predicted))
results = np.array(results, dtype=np.uint8)

In [None]:
cm_helper(plt.gca(), results[:,0], results[:,1])

# Exercises

##### Interactions between Learners and Features (Descriptors)

Let's compare svm and knn.  We used hog+knn ... let's use a good representation (hog) and a stronger learner (svm).  Use stratified CV to compare the following setups on the digits dataset:

 * svm, knn on raw images
 * svm, knn on hog features


In [None]:
default_magic = (1, -1.0, 0, 0.2, 1, 64, True)
hog_d = cv2.HOGDescriptor((8,8),        # image size
                          (8,8), (8,8), # frame size, frame steps
                          (8,8),        # cell size
                          18,            # number of bins 
                          *default_magic)

def extract_hog_features(images, hog_d=hog_d):
    hists = [hog_d.compute(img).squeeze() for img in images]
    data = np.stack(hists, 0)
    return data

In [None]:
digits = datasets.load_digits()
n_examples = len(digits.images)
data = digits.images.reshape(n_examples, -1)  # N rows, rest is flattened
tgts = digits.target

methods = [('knn(3)', neighbors.KNeighborsClassifier(n_neighbors=1)),
            ('svm',    svm.SVC(gamma=0.001))]

hog_data = extract_hog_features(digits.images.astype(np.uint8))

predicted = {}
for (name, m), use_hog in it.product(methods, [False, True]):
    my_data = hog_data if use_hog else data
    predicted[name, use_hog] = skms.cross_val_predict(m, my_data, tgts, 
                                                      cv=skms.StratifiedKFold())

In [None]:
fig, axes = plt.subplots(2,2, figsize=(10,10))

for ax, cnds in zip(axes.flat, predicted):
    acc = metrics.accuracy_score(digits.target, predicted[cnds])
    
    cm = metrics.confusion_matrix(digits.target, predicted[cnds])
    sns.heatmap(cm, annot=True, fmt='3d', ax=ax)
    
    ax.set_ylabel('Actual')
    ax.set_xlabel('Predicted');
    ax.set_title("Method,UseHog={}\nAcc={:5.4f}".format(cnds, acc))

fig.tight_layout()

A note on the terrible results with SVM and HOG.  The svm we are using has a "Gaussian radial basis function" kernel.  There is deep mathematics beneath it, but one consideration when using it is that it prefers data that is centered (column-wise) around 0 and has variances that are comparable.  You might like to redo the svm/hog example with a pipeline that incorporates a `StandardScaler` (as in the Bag-of-visual-Words example).  Does it help?

##### EigenDigits

Can we construct "eigendigits" (much like we constructed eigenface?  Yes.  Does it help?  Let's find out!  Work through the eigenfaces example, but using the digits data instead.  At the end, display the top 10 eigendigits.

In [None]:
digits_dataset = datasets.load_digits()
n_examples, *img_shape = digits_dataset.images.shape

digits = digits_dataset.images.reshape(n_examples, -1)
digits_target = digits_dataset.target

(digits_train,     digits_test, 
 digits_train_tgt, digits_test_tgt) = skms.train_test_split(digits, digits_target, test_size=0.25)

In [None]:
# define and fit the pipeline model
param_grid = {'pca__n_components' : [16, 32, 48],
              'svc__C'     : np.logspace(-2, 4, 7), 
              'svc__gamma' : np.logspace(-4, -1, 4)}

pipe = pipeline.make_pipeline(decomposition.PCA(svd_solver='randomized',
                                                whiten=True), 
                              svm.SVC(class_weight='balanced'))

model = skms.GridSearchCV(pipe, param_grid, n_jobs=-1).fit(digits_train, digits_train_tgt)

In [None]:
digits_preds = model.predict(digits_test)

print("Parameters: ", model.best_params_)
print(metrics.classification_report(digits_test_tgt, digits_preds))
cm_helper(plt.gca(), digits_test_tgt, digits_preds)

In [None]:
n_components = model.best_estimator_.named_steps['pca'].n_components
eigendigits = (model.best_estimator_.named_steps['pca']
               .components_.reshape(n_components, *img_shape))
fig, axes = plt.subplots(2,5,figsize=(10,4))
for ax, ed in zip(axes.flat, eigendigits):
    my_gshow(ax, ed)

##### Vocabulary Size

Does enlarging or reducing our vocabulary (the number of clusters)  help with the BOW approach?  Try working through the BOW example with different vocabulary sizes.  While you're at it, you might want to turn the example code above into a few functions to support building the vocabulary and extracting features from the images.

In [None]:
def make_paths(obj_class):
    form = osp.join("data/101_ObjectCategories", obj_class, "*")
    img_paths = glob.glob(form)
    num_imgs  = len(img_paths)
    return img_paths[:20]

def map_dict(src, f):
    return co.OrderedDict((k,f(v)) for k,v in src.items())

def create_vocab_and_training_data(img_paths, vocab_size=20):
    # calculate these now for future use
    numbered_images = enumerate(img_paths.values())
    image_labels    = list(it.chain.from_iterable([idx] * len(v) for idx, v in numbered_images))
    num_images      = len(image_labels)
    
    def get_sift_desc_one(img):
        sift = cv2.xfeatures2d.SIFT_create()
        key_points, descriptors = sift.detectAndCompute(img, None)
        return descriptors
    get_sift_desc_lst = ft.partial(map, get_sift_desc_one)

    # (*) here and (*) below can be improved upon
    lazy_desc = map_dict(img_paths, get_sift_desc_lst)
    eager_desc = map_dict(lazy_desc, list)
    
    # need a single array of *all* of the descriptors over each object
    all_desc = list(it.chain.from_iterable(eager_desc.values()))
    all_desc = np.concatenate(all_desc, 0)

    km_clusterer = cluster.KMeans(n_clusters=vocab_size)
    desc_clusters = km_clusterer.fit_predict(all_desc)

    # (*) here and (*) above can be improved upon
    img_labeled_descs = enumerate(it.chain.from_iterable(eager_desc.values()))
    res = list(it.chain.from_iterable([idx] * v.shape[0] for idx, v in img_labeled_descs))

    # now, we know descriptors -> clusters (visual words)
    # and we know  descriptors -> images
    del eager_desc, all_desc


    counts = co.Counter(zip(res, desc_clusters))
    table = np.zeros((num_images, vocab_size))
    for (img, clust), count in counts.items():
        table[img, clust] = count
    
    return km_clusterer, table, image_labels

def make_test_paths(obj_class):
    form = osp.join("101_ObjectCategories", obj_class, "*")
    img_paths = glob.glob(form)
    num_imgs  = len(img_paths)
    return img_paths[-5:]

def image_to_example(path, vocab, vocab_size=20):
    test_image = my_read(path) 

    sift = cv2.xfeatures2d.SIFT_create()
    _, test_descs = sift.detectAndCompute(test_image, None)

    test_desc_clusters = vocab.predict(test_descs)

    test_cluster_counts = co.Counter(test_desc_clusters)
    test_pseudo_row = np.zeros((1,vocab_size))
    for clust, count in test_cluster_counts.items():
        test_pseudo_row[0,clust] = count    
    return test_pseudo_row

In [None]:
# read images into a dictionary of class : [list of images]
obj_classes = [osp.split(d)[-1] for d in glob.glob("data/101_ObjectCategories/*")]
training_img_paths = co.OrderedDict((oc,[my_read(p) for p in make_paths(oc)]) 
                                                    for oc in obj_classes) # outer loop

# create vocabulary and training data
vocab_size = 20
vocab, training_data, labels = create_vocab_and_training_data(training_img_paths, vocab_size)

# build learning model
std_svc = pipeline.make_pipeline(skpre.StandardScaler(), svm.SVC())
svc = std_svc.fit(training_data, labels)

class_labels = dict((x,y) for y,x in enumerate(obj_classes))

results = []
for oc in obj_classes:
    expected  = class_labels[oc]
    for test_img_path in make_test_paths(oc):
        example = image_to_example(test_img_path, vocab, vocab_size)
        predicted = svc.predict(example)[0]
        results.append((expected, predicted))
results = np.array(results, dtype=np.uint8)