In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os

from sklearn.cluster import MiniBatchKMeans

In [2]:
# uncomment for colab
# install openCV, unzip dataset, and rename folder to /imgs/

# !pip install -U opencv-python
# !unzip dataset.zip
# !mv COMP338_Assignment1_Dataset imgs

Collecting opencv-python
  Downloading opencv_python-4.5.4.58-cp37-cp37m-manylinux2014_x86_64.whl (60.3 MB)
[K     |████████████████████████████████| 60.3 MB 38 kB/s 
Installing collected packages: opencv-python
  Attempting uninstall: opencv-python
    Found existing installation: opencv-python 4.1.2.30
    Uninstalling opencv-python-4.1.2.30:
      Successfully uninstalled opencv-python-4.1.2.30
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.[0m
Successfully installed opencv-python-4.5.4.58
Archive:  dataset.zip
   creating: COMP338_Assignment1_Dataset/
  inflating: COMP338_Assignment1_Dataset/.DS_Store  
   creating: __MACOSX/
   creating: __MACOSX/COMP338_Assignment1_Dataset/
  inflating: __MACOSX/COMP338_Assignment1_Dataset/._.DS_Store  
   creating

In [3]:
import cv2
class FeaturePoint:
  def __init__(self, xy, size, angle=0):
    self.xy = xy
    self.size = size
    self.angle = angle

def sift(img):
  rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  sift = cv2.SIFT_create()
  featureObjects, descriptors = sift.detectAndCompute(rgb_img, None)
  features = [FeaturePoint(f.pt, f.size, f.angle) for f in featureObjects]
  return features, descriptors

# def find_features(sift, img): #unused
#   return sift.detect(rgb_img,None)

In [4]:
def extract_features_and_descriptors(img):
  features, descriptors = sift(img)
  # features is a 1d array containing the location of each feature point
  return features, descriptors

# run above function over every image
# and return (imagesFeatures, imagesDescriptors)
def extract_all_features_and_descriptors(images):
  allFeatureDescriptors = zip(*map(lambda img: extract_features_and_descriptors(img), images))
  imagesFeatures, imagesDescriptors = tuple(allFeatureDescriptors)
  return list(imagesFeatures), list(imagesDescriptors)

In [5]:
def get_all_images(basePath, datasetName=''):
  path = f"{basePath}/{datasetName}/"
  filesList = os.listdir(path)
  filesList = filter(lambda fileName: "jpg" in fileName, filesList) #filter out any non jpg files
  return [cv2.imread(path + fileName) for fileName in filesList]


# take imagesDescriptors, a 3d array of shape [images][features][featureDescriptors]
# and convert into 2d array of [images * features][featureDescriptors]
def get_array_of_all_descriptors(imagesDescriptors):
  return np.concatenate(imagesDescriptors, axis=0)


In [6]:
basePath = "./imgs/Training"
datasetName = "dog"
images = get_all_images(basePath, datasetName)
imagesFeatures, imagesDescriptors = extract_all_features_and_descriptors(images)

In [7]:
def cluster_model(k, descriptors):
    batch_size = int(len(descriptors)/42)
    init_size = 3 * k
    kmeans = MiniBatchKMeans(n_clusters=k, batch_size=batch_size, init_size=init_size).fit(descriptors)
    return kmeans

In [35]:
def find_bag_of_features(kmeansModel, imgDescriptors):
  return kmeansModel.predict(imgDescriptors)

def find_all_bags_of_features(kmeansModel, imagesDescriptors):
  return [find_bag_of_features(kmeansModel, img) for img in imagesDescriptors]

In [36]:
allDescriptors = get_array_of_all_descriptors(imagesDescriptors)
kmeansModel = cluster_model(500, allDescriptors)
#not sure this is right??

In [37]:
imagesDescriptorClusters = find_all_bags_of_features(kmeansModel, imagesDescriptors)


In [27]:
 #image download utility function
import urllib.request as urlR

def url_to_image(url):
	# download the image, convert it to a NumPy array, and then read
	# it into OpenCV format
	resp = urlR.urlopen(url)
	image = np.asarray(bytearray(resp.read()), dtype="uint8")
	image = cv2.imdecode(image, cv2.IMREAD_COLOR)
	# return the image
	return image

In [28]:
img = url_to_image("https://upload.wikimedia.org/wikipedia/commons/thumb/b/bb/Victoria_Building%2C_University_of_Liverpool_2019.jpg/1528px-Victoria_Building%2C_University_of_Liverpool_2019.jpg")

In [29]:
# random utility functions
def get_histogram(img, channel, normalize=False):
  hist = cv2.calcHist([img], [channel], None, [256], [0, 256])
  if (normalize):
    cv2.normalize(hist, hist)
  return hist
def plt_histograms(img, normalize=False):
  for i, col in enumerate(['b', 'g', 'r']):
      hist = get_histogram(img, i, normalize)
      plt.plot(hist, color = col)
  plt.show()