In [1]:
from numpy import *
import numpy as np
import cPickle
import scipy.io as io
from random import randrange 
from matplotlib import pyplot as plt
from os.path import join
from sklearn.cluster import KMeans
from sklearn import metrics
import cPickle as pickle
import matplotlib
from sklearn.feature_extraction import image
from ipywidgets import FloatProgress
from IPython.display import display

## Unsupervised feature learning

In [2]:
def normalization(patches):
    means_patches = mean(patches, axis=0)
    std_patches = std(patches, axis=0)
    patches = (patches - means_patches[np.newaxis,:])/(std_patches[np.newaxis,:])
    return patches

def whitening(patches):
    eig_values, eig_vec = np.linalg.eig(np.cov(patches.T))
    zca = eig_vec.dot(np.diag((eig_values+0.01)**-0.5).dot(eig_vec.T))
    patches = np.dot(patches, zca)
    return patches

In [None]:
# READ THE DATA
with open(join('cifar-10-batches-py','data_batch_1'),'rb') as f:
    data = pickle.load(f)
    
images = data['data'].reshape((-1,3,32,32)).astype('float64')
images = np.rollaxis(images, 1, 4)

In [4]:
# EXTRACT RANDOM PATCHES
rng = np.random.RandomState(0)
NBPATCH = 16
patches = np.zeros((NBPATCH*10000,6,6,3))
indice =0
for i in range(10000):
    patches[indice:indice+NBPATCH] = image.extract_patches_2d(images[i], (6,6), NBPATCH, random_state=rng)
    indice+=NBPATCH

patches = patches.reshape(NBPATCH*10000,108)


In [5]:
patches = normalization(patches)
patches = whitening(patches)

In [6]:
# RUN K-MEANS
NUM_CLUSTERS= 50
km = KMeans(n_clusters=NUM_CLUSTERS, n_jobs=1, random_state=0, n_init=1, verbose=True)
km.fit_predict(patches)
centroids = km.cluster_centers_.reshape((NUM_CLUSTERS,6,6,3))

Initialization complete
Iteration  0, inertia 8826650.968
Iteration  1, inertia 8215325.480
Iteration  2, inertia 8067719.961
Iteration  3, inertia 8004069.770
Iteration  4, inertia 7970058.544
Iteration  5, inertia 7946554.325
Iteration  6, inertia 7926754.545
Iteration  7, inertia 7911865.533
Iteration  8, inertia 7901392.475
Iteration  9, inertia 7893543.972
Iteration 10, inertia 7887505.209
Iteration 11, inertia 7882763.716
Iteration 12, inertia 7878864.410
Iteration 13, inertia 7875426.448
Iteration 14, inertia 7872519.959
Iteration 15, inertia 7869800.366
Iteration 16, inertia 7867355.577
Iteration 17, inertia 7865069.052
Iteration 18, inertia 7862912.291
Iteration 19, inertia 7861102.683
Iteration 20, inertia 7859578.374
Iteration 21, inertia 7858156.717
Iteration 22, inertia 7856814.314
Iteration 23, inertia 7855508.741
Iteration 24, inertia 7854182.266
Iteration 25, inertia 7852834.895
Iteration 26, inertia 7851435.723
Iteration 27, inertia 7850016.236
Iteration 28, inertia 78

## Learn feature

In [7]:
# READ THE DATA / YOU CAN READ EITHER THE SAME BATCH OR AN OTHER
np.set_printoptions(threshold=np.nan)

with open(join('cifar-10-batches-py','test_batch'),'rb') as f:
    data_2 = pickle.load(f)

data2 = data['data'].reshape((-1,3,32,32)).astype('float32')
data2 = np.rollaxis(images, 1, 4)
labels2 = data_2['labels']

In [8]:
#EXTRACT DETERMINIST PATCHES With STRIDE
patch_size = 6
s =1
loss = 32-(patch_size+1)*(32/(patch_size+s))
nb_patches = (32/(patch_size+s))
patches = np.zeros((0,patch_size,patch_size,3))
for x in range(0,32-loss,patch_size+s):
    for y in range(0,32-loss,patch_size+s):
        patches = np.concatenate((patches, images[:,x:x+patch_size,y:y+patch_size,:]), axis=0)

patches = patches.reshape((patches.shape[0],-1))


In [9]:
# REAPPLY THE SAME NORMALIZATION AND WHITENING
patches = normalization(patches)
patches = whitening(patches)

In [10]:
# GET THE CLUSTER ASSIEGNMENT FOR EACH PATCH
newCls = km.predict(patches)

In [11]:
# TRANSFORM THE PATCH TO BINARY VECTOR
Kpatches=np.zeros((160000,NUM_CLUSTERS))
for x in range(160000):
    Kpatches[x][newCls[x]]=1

In [12]:
# CONSTRUCT THE REPRESENTATION OF THE IMAGES USING THE BINARY VECTORS
cls_images =np.zeros((10000,nb_patches, nb_patches,NUM_CLUSTERS))
indices =0
a,b =nb_patches,nb_patches
for img in range(10000):
    for i in range(nb_patches):
        for j in range(nb_patches):
            cls_images[img][i][j] = Kpatches[indices]
            indices += 1

In [13]:
# CREATE THE FEATURES VECTORS THAT WILL BE USED IN NAIVE BAYES
# WE WILL CLASSIFY THE FEATURES(REPRESENTATION OF THE IMAFE) NOT THE IMAGES 

nb_features = 4*NUM_CLUSTERS
features = np.zeros((10000,nb_features))
half = nb_patches/2

for i in range(10000):
    im = cls_images[i]
    indice =0
    for k in range(NUM_CLUSTERS):
        features[i][indice]= sum(im[0:half,0:half,k])
        features[i][indice+1]= sum(im[0:half,half:,k])
        features[i][indice+2]= sum(im[half:,0:half,k])
        features[i][indice+3]= sum(im[half:,half:,k])
        indice+=4
#Save the features to be used in Naive Bayes        
pickle.dump(features, open("features/hard-k-150/raw-data/projecteatures-hard-300-16.obj", "wb"))