In [24]:
import tensorflow as tf
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.cluster import KMeans
# from scipy.cluster.vq import kmeans
import matplotlib.pyplot as plt
import random
import joblib
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pandas as pd
from numpy.random import uniform
import warnings
from scipy.cluster.vq import vq



(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

In [25]:
def get_sift_feature_descriptor(image):
    sift = cv2.xfeatures2d.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)
    return keypoints, descriptors

In [26]:
def sift(data):
  descriptors = []
  desc=[]
  features = {}
  for index, img in tqdm(enumerate(data)):
    k, d = get_sift_feature_descriptor(img) 
    if d is not None:
      descriptors.extend(d)
      desc.append(d)
      features[index] = d
  
  return features,descriptors,desc

In [27]:
def get_cluster_centers(train_desc):
  print('get_cluster_center')
  optimal_k = 400
  kmeans = KMeans(n_clusters=optimal_k)
  kmeans.fit(train_desc)
#   dists = kmeans.fit_transform(train_desc)
  dists=[]
  bovw = kmeans.cluster_centers_
  # print(type(bow))
  print(np.array(bovw).shape) 
  return optimal_k,kmeans,bovw,dists

In [28]:
def CreateVisualDictionary(train_feature,train_descriptors,train_desc):
  print("create visual dictionary")
  optimal_k,kmeans,bovw,dists = get_cluster_centers(train_descriptors)
  joblib.dump((optimal_k, bovw), "bovw-codebook.pkl", compress=3)
  np.savetxt('foo.txt', dists, fmt='%d')
  # visual_words = []
  # for img_descr in train_desc:
  #     # print(img_descr.shape)
  #     # print(bovw.shape)
  #     # for each image, map each descriptor to the nearest codebook entry
  #     img_visual_words, distance = vq(img_descr, bovw)
  #     visual_words.append(img_visual_words)
  # print(type(visual_words))
  return optimal_k,kmeans,bovw,dists

In [29]:
def ComputeHistogram(features, bovw,kmeans):
  histograms = {}
  for img in tqdm(features):
    # all descriptors of the img
    all_descriptor = np.array(features[img],np.double)
    histogram = np.zeros(len(bovw),dtype=np.double)
    predictions = kmeans.predict(all_descriptor)
    # for each des, find the cluster and create histogram
    for pred in predictions:
      histogram[pred] += 1
    # update global histograms
    histograms[img] = histogram
  return histograms

In [30]:
def get_train_and_test(train_hist, y_train,test_hist,y_test):
  trainX = []
  trainY = []
  testX = []
  testY = []
  for x in train_hist:
    trainX.append(train_hist[x])
    trainY.append(y_train[x])

  for x in test_hist:
    testX.append(test_hist[x])
    testY.append(y_test[x])
  return trainX,trainY,testX,testY


def train_fit(Xtrain,Ytrain):
  model = LinearSVC(max_iter=100000)  #Default of 100 is not converging
  model.fit(Xtrain, Ytrain)
  return model


def test_fit(model,Xtest,Ytest):
  predictions = model.predict(Xtest)
  return predictions

In [31]:
def MatchHistogram(Xtrain,Xtest,Ytrain,Ytest,train_desc,bovw):
  model = LinearSVC(max_iter=100000)  #Default of 100 is not converging
  model.fit(Xtrain, Ytrain)
  predictions = model.predict(Xtest)
  print('Accuracy =', accuracy_score(Ytest, predictions))
  print('Confusion Matrix:\n', confusion_matrix(Ytest, predictions))
  print(classification_report(Ytest, predictions))
  visual_words = []
  distances = []
  for img_descr in train_desc:
      # print(img_descr.shape)
      # print(bovw.shape)
      # for each image, map each descriptor to the nearest codebook entry
      img_visual_words, distance = vq(img_descr, bovw)
      visual_words.append(img_visual_words)
      distances.append(distance)
  # print(type(visual_words))
  # return np.sqrt(np.sum((np.array(Xtrain) - np.array(Xtest))**2, axis=1))
  return distances,visual_words 

In [32]:
train_feature , train_descriptor, train_desc = sift(x_train)


60000it [00:27, 2147.70it/s]


In [33]:
test_feature, test_descriptor , test_desc = sift(x_test)

optimal_k,kmeans,bovw,dists = CreateVisualDictionary(train_feature,train_descriptor,train_desc)


10000it [00:04, 2059.08it/s]


create visual dictionary
get_cluster_center
(400, 128)


In [34]:
train_histogram = ComputeHistogram(train_feature,bovw,kmeans)

test_histogram = ComputeHistogram(test_feature, bovw,kmeans)



Xtrain_hist,Ytrain,Xtest_hist,Ytest = get_train_and_test(train_histogram, y_train,test_histogram,y_test)



distances = MatchHistogram(Xtrain_hist,Xtest_hist,Ytrain,Ytest,train_desc,bovw)

print('dist',distances)
# np.savetxt('foo1.txt', distances)

100%|██████████| 55176/55176 [00:36<00:00, 1526.10it/s]
100%|██████████| 9231/9231 [00:04<00:00, 1898.03it/s]


Accuracy = 0.6456505254035316
Confusion Matrix:
 [[623  17  49  76  30  30  58  12  40   9]
 [  9 481  11  92   7   8   1   4  14   1]
 [ 26  17 527  37 191  21  85   3  45   2]
 [ 96  48  76 452  69  40  42  36  59   5]
 [ 13   8 207  43 498  14 130   6  50   4]
 [  4  13   4  18   3 780   6  98  12  28]
 [152  32 147  51 163  32 258   3  88   6]
 [  2   6   4  22   3  51   2 759  15 101]
 [ 20  21  55  47  25  22  28  16 717   7]
 [  7   3   4   9   1  16   3  61  19 865]]
              precision    recall  f1-score   support

           0       0.65      0.66      0.66       944
           1       0.74      0.77      0.76       628
           2       0.49      0.55      0.52       954
           3       0.53      0.49      0.51       923
           4       0.50      0.51      0.51       973
           5       0.77      0.81      0.79       966
           6       0.42      0.28      0.33       932
           7       0.76      0.79      0.77       965
           8       0.68      0.75