In [None]:
from utils_io import read_json, load_items

params = read_json('parameters.json')
RESIZE_X = params['resize']['x']
RESIZE_Y = params['resize']['y']
ITEM_FOLDER = params['item_folder']
items = load_items(ITEM_FOLDER)

hl = [] # histogram list

Load precomputed histograms of dominant colors for each item and view

In [None]:
views = ['top_01','top-side_01','top-side_02','bottom_01','bottom-side_01','bottom-side_02']
for item in items:
    for view in views:
        try:
            filename = ITEM_FOLDER + '/' + item + '/' + item + '_' + view + '_dc.json'
            dc = read_json(filename)
            hist = dc['hist']
            obj_cc = dc['cluster_centers']
            hl.append( (item, view, hist, obj_cc) )
        except IOError:
            pass

Calculate the distance matrix for each item and view

In [None]:
import numpy as np

ivdm = np.zeros( (len(hl),len(hl)) ) # Item/view distance matrix

In [None]:
from utils_color import calc_EMD2

for i, (it1, vi1, hi1, cc1) in enumerate(hl):
    for j, (it2, vi2, hi2, cc2) in enumerate(hl):
        if j>i:
            ivdm[i][j] = calc_EMD2(hi1, cc1, hi2, cc2)
            ivdm[j][i] = ivdm[i][j]

In [None]:
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
plt.imshow(ivdm,cmap='jet');

Calculate the distance matrix for items (minimum view distance)

In [None]:
n = len(items)
idm = np.ones( (n, n) ) * 1000 # item distance matrix

In [None]:
n = len(hl)
for i in range(n):
    for j in range(i+1, n):
        iti = items.index(hl[i][0])
        itj = items.index(hl[j][0])
        if ivdm[i][j] < idm[iti][itj]:
            idm[iti][itj] = ivdm[i][j]
            idm[itj][iti] = idm[iti][itj]

In [None]:
plt.imshow(idm,cmap='jet');

Plot distances to a selected item

In [None]:
def plot_distance(item, d):
    idx = items.index(item)
    plt.plot(idm[idx],'b-'); plt.plot(idm[idx],'bo'); plt.title(items[idx]); plt.plot([0,40],[d,d],'r-'); plt.show();
    for d, it in sorted([(dist, it) for it, dist in zip(items, idm[idx]) if dist < d]):
        print('%f %s' % (d, it))

In [None]:
from ipywidgets import interact
interact(plot_distance, item=items, d=20);

### Clustering

In [None]:
from sklearn.cluster import AffinityPropagation

In [None]:
af = AffinityPropagation(affinity='precomputed', damping=0.5).fit(-idm)

cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_

for idx, kls in enumerate(cluster_centers_indices):
    print(items[kls])
    for it in [it for it,lb in zip(items,labels) if lb==idx]:
        if it != items[kls]:
            print('    %s' % it)
    print

In [None]:
def print_cluster(item):
    label = labels[items.index(item)]
    cluster = [it for it,lb in zip(items,labels) if lb==label]
    for item in cluster:
        print(item)
        
interact(print_cluster, item=[items[i] for i in cluster_centers_indices]);

In [None]:
from utils_io import imread_rgb
import cv2
def show_cluster(item, view):
    label = labels[items.index(item)]
    cluster = [it for it,lb in zip(items,labels) if lb==label]
    pos = 1
    for item in cluster:
        filename = ITEM_FOLDER + '/' + item + '/' + item + '_' + view + '.png'
        image = imread_rgb(filename)
        image = cv2.resize(image,(RESIZE_X,RESIZE_Y))
        plt.subplot(130+pos); plt.imshow(image); plt.axis('off');
        pos += 1
        if pos==4:
            plt.show();
            pos = 1
            
interact(show_cluster, item=[items[i] for i in cluster_centers_indices], view=views);

### Distribution of Items in Bins

In [None]:
import random

competition_set = random.sample(items, 32)
bins = 5

In [None]:
competition_set

In [None]:
cluster = []
for idx, kls in enumerate(cluster_centers_indices):
    elements = []
    for it in [it for it,lb in zip(items,labels) if lb==idx]:
        if it in competition_set:
            print('    %s' % it)
            elements.append(it)
    print
    if elements:
        cluster.append(elements)

In [None]:
b = []
for ibin in range(bins):
    b.append([])

In [None]:
ibin = 0
for c in cluster:
    for item in c:
        b[ibin].append(str(item))
        ibin += 1
        if ibin == bins:
            ibin = 0

In [None]:
b

## Cluster the items in the competition set only

In [None]:
items = competition_set
hl = [] # histogram list
views = ['top_01','top-side_01','top-side_02','bottom_01','bottom-side_01','bottom-side_02']
for item in items:
    for view in views:
        try:
            filename = ITEM_FOLDER + '/' + item + '/' + item + '_' + view + '_dc.json'
            dc = read_json(filename)
            hist = dc['hist']
            obj_cc = dc['cluster_centers']
            hl.append( (item, view, hist, obj_cc) )
        except IOError:
            pass

In [None]:
ivdm = np.zeros( (len(hl),len(hl)) ) # Item/view distance matrix
for i, (it1, vi1, hi1, cc1) in enumerate(hl):
    for j, (it2, vi2, hi2, cc2) in enumerate(hl):
        if j>i:
            ivdm[i][j] = calc_EMD2(hi1, cc1, hi2, cc2)
            ivdm[j][i] = ivdm[i][j]
n = len(items)
idm = np.ones( (n, n) ) * 1000 # item distance matrix
n = len(hl)
for i in range(n):
    for j in range(i+1, n):
        iti = items.index(hl[i][0])
        itj = items.index(hl[j][0])
        if ivdm[i][j] < idm[iti][itj]:
            idm[iti][itj] = ivdm[i][j]
            idm[itj][iti] = idm[iti][itj]
af = AffinityPropagation(affinity='precomputed', damping=0.5).fit(-idm)

cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_

for idx, kls in enumerate(cluster_centers_indices):
    print(items[kls])
    for it in [it for it,lb in zip(items,labels) if lb==idx]:
        if it != items[kls]:
            print('    %s' % it)
    print