In [1]:
import numpy as np
import cPickle as pickle
import scipy.io
import sys
import os
sys.path.append(os.path.expanduser('~/projects/shape_sharing/src/'))

from sklearn.decomposition import RandomizedPCA
from sklearn.cluster import MiniBatchKMeans

from common import paths
from common import parameters
if parameters.small_sample:
    print "WARNING: Just computing on a small sample"


def pca_randomized(X_in, local_subsample_length, num_pca_dims):

    # take subsample
    rand_exs = np.sort(np.random.choice(
        X_in.shape[0],
        np.minimum(local_subsample_length, X_in.shape[0]),
        replace=False))
    X = X_in.take(rand_exs, 0)

    pca = RandomizedPCA(n_components=num_pca_dims)
    pca.fit(X)
    return pca


def cluster_data(X, local_subsample_length, num_clusters):

    # take subsample
    if local_subsample_length > X.shape[0]:
        X_subset = X
    else:
        to_use_for_clustering = \
            np.random.randint(0, X.shape[0], size=(local_subsample_length))
        X_subset = X[to_use_for_clustering, :]

    print X.shape
    print X_subset.shape

    # doing clustering
    km = MiniBatchKMeans(n_clusters=num_clusters)
    km.fit(X_subset)
    return km

# save path (open here so if an error is thrown I can catch it early...)

# initialise lists
shoeboxes = []
features = []

for count, sequence in enumerate(paths.RenderedData.train_sequence()):

    print "Processing " + sequence['name']

    # loading the data
    loadpath = paths.RenderedData.voxlets_dict_data_path + \
        sequence['name'] + '.mat'
    print "Loading from " + loadpath

    D = scipy.io.loadmat(loadpath)
    shoeboxes.append(D['shoeboxes'].astype(np.float16))
    features.append(D['features'].astype(np.float16))

    if count > parameters.max_sequences:
        print "SMALL SAMPLE: Stopping"
        break

np_all_sboxes = np.concatenate(shoeboxes, axis=0)
np_all_features = np.concatenate(features, axis=0)
print "All sboxes shape is " + str(np_all_sboxes.shape)
print "Features shape is " + str(np_all_features.shape)

In [4]:
from sklearn.manifold import Isomap
iso = Isomap()
iso.fit(np_all_sboxes)

In [34]:
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(kernel='rbf')
kpca.fit(np_all_sboxes)

In [25]:
#iso.training_data_ = []
from sklearn.cluster import FeatureAgglomeration
fa = FeatureAgglomeration(n_clusters=50)
fa.fit(np_all_sboxes)
#iso.transform(np_all_sboxes[10:12, :])

In [19]:
from sklearn.decomposition import RandomizedPCA
rpca = RandomizedPCA()
rpca.fit(np_all_sboxes)

In [36]:
%matplotlib inline
import matplotlib.pyplot as plt
X = rpca.transform(np_all_sboxes)
X2 = fa.transform(np_all_sboxes)
X_kpca = kpca.transform(np_all_sboxes)

In [41]:
plt.figure(figsize=(10, 10))
plt.subplot(121)
plt.plot(X[:, 9], X[:, 1], 'r.')
#plt.subplot(122)
#plt.plot(X2[:, 2], X2[:, 1], 'b.')
plt.subplot(122)
plt.plot(X_kpca[:, 9], X_kpca[:, 1], 'b.')

#pickle.dump(iso, open('/tmp/iso.pkl', 'w'), protocol=pickle.HIGHEST_PROTOCOL)