In [18]:
# !pip install opencv-python
# !pip install tensorflow==1.15.2
# !pip install scipy
# !pip install -U scikit-learn

In [26]:
import numpy as np
import cv2
import os
import pickle
import sys
from scipy import ndimage
from scipy.spatial import distance
from sklearn.cluster import KMeans
from sklearn.cluster import MiniBatchKMeans
from collections import OrderedDict
import tensorflow as tf
import pickle

In [20]:
# # Detect hardware
# try:
#   tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
#   print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
# except ValueError:
#   raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

# tf.config.experimental_connect_to_cluster(tpu)
# tf.tpu.experimental.initialize_tpu_system(tpu)
# tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [21]:
# Implementing Bag of Features Model
n_classes=36
clustering_factor=6

def orb_features(images):
  orb_descriptors_class_by_class={}
  orb_descriptors_list=[]
  orb=cv2.ORB_create()
  for key,value in images.items():
    print(key, "Started")
    features=[]
    for img in value:
      kp,desc=orb.detectAndCompute(img,None)
      orb_descriptors_list.extend(desc)
      features.append(desc)
    orb_descriptors_class_by_class[key]=features
    print(key," Completed!")
  return [orb_descriptors_list,orb_descriptors_class_by_class]

In [22]:

# Creating a visual dictionary using only the train dataset 
# K-means clustering alogo takes only 2 parameters which are number of clusters (k) and descrpitors list
# It reurn an array which holds central points

def minibatchkmeans(k, descriptors_list):
  kmeans=MiniBatchKMeans(n_clusters=k)
  print("MiniBatchKMeans Initialized!")
  kmeans.fit(descriptors_list)
  print("Clusters Created!")
  visual_words=kmeans.cluster_centers_
  return visual_words, kmeans


In [23]:
# Loading train images into dictionaries which holds all images category by category

def load_images_by_category(folder):
  images={}
  for label in os.listdir(folder):
    print(label," started")
    category=[]
    path=folder+'/'+label
    for image in os.listdir(path):
      img=cv2.imread(path+'/'+image)
      #new_img=cv2.resize(img,(128,128))
      if img is not None:
        category.append(img)
    images[label]=category
    print(label, "ended")
  return images

In [24]:

# Creating histograms for train images

# Function takes 2 parameters. The first one is a dictionary that holds the descriptors that are separated class by class 
# And the second parameter is the clustered model
# Returns a dictionary that holds the histograms for each images that are separated class by class. 

def create_histogram(all_descs,kmeans):
  features_dict={}
  for key,value in all_descs.items():
    print(key," Started!")
    category=[]
    for desc in value:
      raw_words=kmeans.predict(desc)
      hist = np.array(np.bincount(raw_words,minlength=n_classes*clustering_factor))
      category.append(hist)
    features_dict[key]=category
    print(key," Completed!")
  return features_dict

In [25]:
train_folder='C:/Users/Sandeep Roy/Desktop/data2/data/Train'

# Load train images
train_images=load_images_by_category(train_folder)
#print(len(train_images))

#print(len(train_images['a'][0][0]))

#Extracting orb features from each image stored in train_images list

orbs=orb_features(train_images)
all_train_descriptors=orbs[0]
train_descriptors_by_class=orbs[1]

#print(len(orbs[0]))
#print(len(orbs[1]['0'][1]))

# Calling MiniBatchkmeans function and getting central points
visual_words,kmeans=minibatchkmeans(n_classes*clustering_factor,all_train_descriptors)


# Calling create_histogram and getting histogram for each image
bows_train=create_histogram(train_descriptors_by_class,kmeans)

#print((bows_train['a'][0][1]))

# Saving .csv file
import csv
loc='C:/Users/Sandeep Roy/Desktop/data2/data/train.csv'
with open(loc,'w',newline='') as file:
  writer=csv.writer(file)
  header=[]
  for i in range (1,n_classes*clustering_factor):
    header.append(str('pixel')+str(i))
  header.append('Label')
  writer.writerow(header)
  count=0
  for label in bows_train:
     # print(len(bows_train[label]))
    for i in range(len(bows_train[label])):
      list=[]
      for j in range(150):
        list.append(bows_train[label][i][j])
      list.append(label)
      writer.writerow(list)

1  started
1 ended
2  started
2 ended
3  started
3 ended
4  started
4 ended
5  started
5 ended
6  started
6 ended
7  started
7 ended
8  started
8 ended
9  started
9 ended
a  started
a ended
b  started
b ended
c  started
c ended
d  started
d ended
e  started
e ended
f  started
f ended
g  started
g ended
h  started
h ended
i  started
i ended
j  started
j ended
k  started
k ended
l  started
l ended
m  started
m ended
n  started
n ended
o  started
o ended
p  started
p ended
q  started
q ended
r  started
r ended
s  started
s ended
t  started
t ended
u  started
u ended
v  started
v ended
w  started
w ended
x  started
x ended
y  started
y ended
z  started
z ended
1 Started
1  Completed!
2 Started
2  Completed!
3 Started
3  Completed!
4 Started
4  Completed!
5 Started
5  Completed!
6 Started
6  Completed!
7 Started
7  Completed!
8 Started
8  Completed!
9 Started
9  Completed!
a Started
a  Completed!
b Started
b  Completed!
c Started
c  Completed!
d Started
d  Completed!
e Started
e  Completed!

In [28]:
# Save the trained Kmeans model
with open('kmeans_model.pkl','wb') as f:
    pickle.dump(kmeans, f)