In [35]:
import numpy as np
import cv2
import os
from scipy import ndimage
from scipy.spatial import distance
from sklearn.cluster import KMeans


def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

def getImage(img_mat, plot=False):
    '''
        @description returns a 32x32 image given a single row
        repr of the image
        _Optionally plots the image_
        @param img_mat -> np.array: |img_mat| = (3072, ) OR (3072, 1)
        @param plot -> bool: whether to plot it or not
        @return image_repr: np.ndarray |image_repr| = (32, 32, 3)
    '''
    assert img_mat.shape in [(3072,), (3072, 1)] # sanity check
    r_channel = img_mat[:1024].reshape(32, 32)
    g_channel = img_mat[1024: 2 * 1024].reshape(32, 32)
    b_channel = img_mat[2 * 1024:].reshape(32, 32)
    image_repr = np.stack([r_channel, g_channel, b_channel], axis=2)
    assert image_repr.shape == (32, 32, 3) # sanity check
    if plot:
        import matplotlib.pyplot as plt
        plt.imshow(image_repr), plt.show(block=False)

    return image_repr

def getSIFT(img):
    '''
        @description Get the SIFT features of the input image
        @param img -> np.array: |img| => { (32, 32), (3072, 1||0) }
        @return descriptor -> np.array n x 128
    '''
    sift = cv2.xfeatures2d.SIFT_create()
    if img.shape in [(3072, 1), (3072,)]: img = getImage(img)
    kps, des = sift.detectAndCompute(img, None)
    return des if des is not None else np.array([]).reshape(0, 128)

def load_images(path):
    data_dict = unpickle(path)
    data_np = data_dict[b'data']
    labels_np = data_dict[b'labels']
    label_list = ['airplane', 'automobile', 'bird', 'cat', 
                  'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    descriptor_list = []
    sift_vectors = {key: [] for key in label_list} 
    #categorywise_sift = np.empty(dtype=np.float64)
    for i in range(len(data_np)):
        feature = getSIFT(data_np[i])
        #categorywise_sift[labels_np[i]].append(feature)
        descriptor_list.extend(feature)
        sift_vectors[label_list[labels_np[i]]].append(feature)
    #for i in range(len(label_list)):
    #    images[label_list[i]] = categorywise_sift[i]
    return [descriptor_list, sift_vectors]

def kmeans(k, descriptor_list):
    kmeans = KMeans(n_clusters = k, n_init=10)
    kmeans.fit(descriptor_list)
    visual_words = kmeans.cluster_centers_ 
    return visual_words
    
def image_class(all_bovw, centers):
    dict_feature = {}
    for key,value in all_bovw.items():
        category = []
        for img in value:
            histogram = np.zeros(len(centers))
            for each_feature in img:
                ind = find_index(each_feature, centers)
                histogram[ind] += 1
            category.append(histogram)
        dict_feature[key] = category
    return dict_feature

def find_index(image, center):
    count = 0
    ind = 0
    for i in range(len(center)):
        if(i == 0):
           count = distance.euclidean(image, center[i]) 
           #count = L1_dist(image, center[i])
        else:
            dist = distance.euclidean(image, center[i]) 
            #dist = L1_dist(image, center[i])
            if(dist < count):
                ind = i
                count = dist
    return ind

In [34]:
feature_list = load_images("cifar-10-python/cifar-10-batches-py/data_batch_1")
#print(data_dict.keys())
#data_np = data_dict[b'labels']
#print(images)
descriptor_list = feature_list[0] 
# Takes the sift features that is seperated class by class for train data
all_bovw_feature = feature_list[1] 
# Takes the sift features that is seperated class by class for test data
visual_words = kmeans(150, descriptor_list)
bovw_train = image_class(all_bovw_feature, visual_words) 
#

#print(feature)

KeyboardInterrupt: 

NameError: name 'opencv' is not defined

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import linear_model

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
clf = linear_model.SGDClassifier(max_iter=10000, alpha = 10000, loss = 'hinge', random_state = 40, tol=1e-3)
clf.fit(X_train, Y_label)
clf.predict(test_data)

In [None]:
!