In [1]:
# Mustafa Ghanim , Department of Electrical-Electronics Engineering 
# Import the required packages
import numpy as np
import pickle # built-in Python based structure serialization function to save/load dictionary
import cv2
from sklearn.cluster import KMeans
from sklearn.svm import LinearSVC
#from IPython.core.debugger import set_trace
from sklearn.svm import SVC
from scipy.cluster.vq import vq
import os.path as osp
from random import shuffle
from glob import glob
from sklearn.cluster import MeanShift,estimate_bandwidth
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
print('Since SIFT functions are removed in OpenCV 4.x due to patent issues {} version is used '.format(cv2.__version__))

Since SIFT functions are removed in OpenCV 4.x due to patent issues 3.4.2 version is used 


In [45]:
# Define all required functions

# Not used but useful functions to normalize and gray-scale images
def load_image(path):
    return im2single(cv2.imread(path))[:, :, ::-1]


def load_image_gray(path):
    img = load_image(path)
    return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

def gridSIFT(img, step = 30, size = 40,display = False,is_print = False):
    sift = cv2.xfeatures2d.SIFT_create()

    gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    rows, cols = gray.shape
    kp = []
    patch_cnt = 0
    for x in range(step,cols,step):
        for y in range(step,rows,step):
            patch_cnt+= 1
            kp.append(cv2.KeyPoint(x, y, size))
    kp, des = sift.compute(img, kp)
    if(display == True):
            org_img = img.copy()
            I = cv2.drawKeypoints(img,kp,img)
            plt.figure(figsize=(15,15))
            plt.subplot(121), plt.imshow(cv2.cvtColor(org_img, cv2.COLOR_BGR2RGB))
            plt.xticks([]), plt.yticks([])
            plt.title('Original Image')
            plt.subplot(122), plt.imshow(cv2.cvtColor(I, cv2.COLOR_BGR2RGB))
            plt.xticks([]), plt.yticks([])
            plt.title('Patch Features on The Image')
            plt.show()
            plt.close()
    if(is_print == True):
        print('Total number of image patches in this grid = {} (size: {} x {})'.format(patch_cnt,size,size))
    return kp,des


def my_means_shift(X,kps,bandwidth = 15):
    
    print('Means-shift algorithm starts ..')
    kps_locations = []
    for i in kps:
        kps_locations.append(i.pt)
   
    ms = MeanShift(bandwidth = bandwidth,bin_seeding = True)
    ms.fit(kps_locations)
    labels = ms.labels_
    kps_centers = ms.cluster_centers_
    labels_unique = np.unique(labels)
    estimated_K = len(labels_unique)
    n_clusters = estimated_K
    print('The Estimated Number of Clusters Using  Means-Shift = {}'.format(estimated_K))
    SIFT_centers = []
    for i in labels_unique:
        labels_corresp = np.where(labels == i)[0]
        SIFT_sum = 0
        for j in labels_corresp:
            SIFT_sum+= X[j]
        SIFT_centers.append(SIFT_sum/len(labels_corresp))

    return np.array(SIFT_centers),estimated_K
        
def features_size(image_paths,step,size,keypoints_thres = 500,grid_sift = False,is_print = False):
    sift = cv2.xfeatures2d.SIFT_create(keypoints_thres)
    cnt = 0
    for path in image_paths:
        img = cv2.imread(path)

        gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        if(grid_sift == True):
            
            kp,descriptors= gridSIFT(img,step = step, size = size,is_print = is_print) 
        else:  
            ## Compute the descriptors
            (kp, descriptors) = sift.detectAndCompute(gray, None)

        ## Compute the final array of descriptors
        try:
                X = np.concatenate((X,descriptors))
                kps = np.concatenate((kps,kp))
        except:
                X = descriptors
                kps = kp
    print('The size of SIFT discriptors for this experiment = {} x {}'.format(X.shape[0],X.shape[1]))            
    print('The average SIFT discriptors  per image = {:.2f}'.format(X.shape[0]/len(image_paths)))
                
    return kps,X            

def compute_dictionary(image_paths, K,keypoints_thres = 500, grid_sift = False, means_shift = False,means_shift_clustering = False,is_print = False):
    ## Using sift from opencv
    ## set no of keypoints 
    sift = cv2.xfeatures2d.SIFT_create(keypoints_thres)
    cnt = 0
    for path in image_paths:
        cnt+= 1
        if(cnt % 30 == 0):
 
            print('SIFTs Extracted of {} train images, {} many images left'.format(cnt,len(image_paths) - cnt))

        img = cv2.imread(path)

        gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        if(grid_sift == True):
            
            kp,descriptors= gridSIFT(img,is_print = is_print)
           
        else:  
            ## Compute the descriptors
            (kp, descriptors) = sift.detectAndCompute(gray, None)

        ## Compute the final array of descriptors
        try:
                X = np.concatenate((X,descriptors))
                kps = np.concatenate((kps,kp))
        except:
                X = descriptors
                kps = kp
    print('Total number of SIFT descriptors = {}, {} per image as average'.format(X.shape[0],X.shape[0]/len(image_paths)))
    #Clustering 
    print('Starting clustering operations ..')
    # Means-Shift
  #  bandwidth = estimate_bandwidth(X, quantile=0.2)
   # print('The Estimated Bandwidth Parameter for Means-Shift = {}'.format(bandwidth))
    if(means_shift == True and means_shift_clustering == True):
        print('K estimation and clustering SIFTs are done with Means-Shift')
        SIFT_centers, n_clusters = my_means_shift(X,kps)
        return SIFT_centers 
    elif(means_shift == True and means_shift_clustering == False):
        print('K estimation by Means-Shift and clustering by Kmeans')
        SIFT_centers, n_clusters = my_means_shift(X,kps)
        kmeans = KMeans(n_clusters, random_state=0).fit(X)
        dictionary = kmeans.cluster_centers_
        return dictionary 
    else:
        print('Used Number of clusters by user input = {}'.format(K))
        kmeans = KMeans(K, random_state=0).fit(X)
        dictionary = kmeans.cluster_centers_
        return dictionary 
        

In [46]:
def hist_calc_and_feature_quan(image_paths, dictionary_filename,grid_sift = False,keypoints_thres = 500,tfidf_opt = False,is_print = False):

    sift = cv2.xfeatures2d.SIFT_create(keypoints_thres)

    with open(dictionary_filename, 'rb') as f:
        dictionary = pickle.load(f)
    print('Used dictionary: {}'.format(dictionary_filename))
  # # Histogram of counts from the assignments

    def calc_histogram(assignments):

        hist = [0 for i in range(dictionary.shape[0])]

        for i in assignments:
            hist[i] += 1

        hist = [i / sum(hist) for i in hist]

        return hist
    qunatized_features = []
    # frequency–inverse document frequency, an optimization method (extra for this project) 
    # can be used to optimize some experiments by giving statistical weights according to frequency-term importance
    # Inverse Data Frequency (IDF) ratio is found in log format. 
    
    def tfidf(tf_hists):
        epsilon = 1e-6

        tfidf = []
        df = []
        T = len(tf_hists)

        for i in range(dictionary.shape[0]):
            temp = 0
            for hist in tf_hists:
                if hist[i] > 0:
                    temp += 1
            df.append(temp)
        idf = np.log([T / max(i,epsilon) for i in df])

        for hist in tf_hists:
            tfidf.append(hist * idf)

        return np.array(tfidf)

    for path in image_paths:

        img = cv2.imread(path)

        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Compute the patch-based (dense) sift descriptors
        if(grid_sift == True):
            kp,descriptors = gridSIFT(img,is_print = is_print)
        else:  
            ## Compute the keypoint-based descriptors
            (_, descriptors) = sift.detectAndCompute(gray, None)
        # Assign the nearest code from codebook (dictionary) to the corresponding observation (feature quntization)
        # Nearest neighbor step
        (assignments, _) = vq(descriptors, dictionary)

        qunatized_features.append(calc_histogram(assignments))
    if(tfidf_opt == True):
        print('Optimization is done with tf–idf')
        return tfidf(qunatized_features) 
    else: 
        return qunatized_features



def svm_classify(train_image_feats, train_labels, test_image_feats):

    ##Linear svm (One vs rest)
    # C is the regularization parameter  
    clf = LinearSVC(random_state= 0, tol=1e-4,C=7)

    ## Fit the model
    clf.fit(train_image_feats, train_labels)

    ## Predict
    predicted_categories = clf.predict(test_image_feats)

    return predicted_categories 

# this function is manually implemented to calculate CM, it gives the same result with the built-in function. 
def find_confusion_matrix(test_labels,predicted_labels):
    # Do not show +0e expressions when importing labels form CSV file
    np.set_printoptions(suppress=True) 

    labels=np.unique(predicted_labels)
    N = len(labels) # N is how many different types of labels are there. 
    confusion_matrix = np.zeros((N, N))
    
    for i in range(len(predicted_labels)):
        confusion_matrix[int(test_labels[i])][int(predicted_labels[i])] +=1
    
    return confusion_matrix


def classification_results(train_image_paths, test_image_paths, train_labels, test_labels,
    categories,predicted_categories):

    cat2idx = {cat: idx for idx, cat in enumerate(categories)}

    # confusion matrix
    out_true = [cat2idx[cat] for cat in test_labels]
    out_predicted = [cat2idx[cat] for cat in predicted_categories]
    # you can use the manually implemented CM matrix function as well
    conf_mat = confusion_matrix(out_true, out_predicted) #find_confusion_matrix(out_true,out_predicted)
    # Normalized Confusion matrix
    conf_mat = conf_mat.astype(np.float) / conf_mat.sum(axis=1)[:, np.newaxis]
   
    valid_acc = np.mean(np.diag(conf_mat))
    # Regular CM:
    conf_mat = 100 * conf_mat
    fig, ax = plt.subplots()
   # plt.figure()
    im = ax.imshow(conf_mat, interpolation='nearest', cmap=plt.cm.Reds)
    ax.figure.colorbar(im, ax=ax)
    plt.title('Validation Accuracy = {:4.2f}%'.format(valid_acc*100))
    tick_marks = np.arange(len(categories))
    plt.tight_layout()
    plt.xticks(tick_marks, categories, rotation=45)
    plt.yticks(tick_marks, categories)
    plt.show()
    plt.close()
    
    print('Numeric Representation of The Found Confusion Matrix ..')
    print(conf_mat)
    FP = conf_mat.sum(axis=0) - np.diag(conf_mat)  
    FN = conf_mat.sum(axis=1) - np.diag(conf_mat)
    print("Total False Positive Values = {}".format(np.sum(FP)))
    print("Total Flase Negative Values = {}".format(np.sum(FN)))
    return conf_mat 

def show_predicted_image(test_label,predicted_label,image_path):
    plt.figure(figsize=(8,8))
    image = cv2.imread(image_path)
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.xticks([]), plt.yticks([])
    plt.title('Actual Category : {} --> Predicted Category: {}'.format(test_label,predicted_label))
 


In [47]:
def read_dataset(data_path, train_categories, validation_categories, img_format='jpg'):

    train_image_paths = []
    test_image_paths = []
    train_labels = []
    test_labels = []

    for cat in train_categories:
        
        # train
        pth = osp.join(data_path, cat, '*.{:s}'.format(img_format))  
        pth = glob(pth)  
        shuffle(pth)
        train_image_paths.extend(pth)
        train_labels.extend([cat.partition('_')[0]]*len(pth))
    for cat in validation_categories:    
        # test
        pth = osp.join(data_path, cat, '*.{:s}'.format(img_format))
        pth = glob(pth)
        shuffle(pth)
        test_image_paths.extend(pth)
        test_labels.extend([cat.partition('_')[0]]*len(pth))

    return train_image_paths, test_image_paths, train_labels, test_labels

In [48]:
# Reading validation and train images

train_categories = ['airplanes_train','cars_train','faces_train','motorbikes_train']

validation_categories = ['airplanes_validation','cars_validation','faces_validation','motorbikes_validation']

categories = ['airplanes','cars','faces','motorbikes']
working_path = 'E:\My Google Drive\OZU\Spring2020\Computer Vision\Projects\Final Project\Final-Assignment_data\Final-Assignment data'
dataset_folder_name = 'dataset'
data_path = osp.join(working_path, dataset_folder_name)

train_image_paths, test_image_paths, train_labels, test_labels = read_dataset(data_path,train_categories,validation_categories)
print('There are {} train and {} validation images ready to be process ..'.format(len(train_image_paths),len(test_image_paths)))
print('There are {} different classes in this project '.format(len(np.unique(train_labels))))
# all train/validation category folders should be under dataset_folder_name (as given in LMS) 

There are 160 train and 400 validation images ready to be process ..
There are 4 different classes in this project 


In [49]:
print('Computing Bag- of - Features Dictionary')

dictionary_filename = 'test.pkl'
if not osp.isfile(dictionary_filename):
    # Construct the vocabulary
    print('No existing visual word dictionary found. Computing one from training images')
    dictionary_size = 5  # It's same with clusters number, not important when using means-shift
    # if means_shift is TRUE and means_shift_clustering is TRUE then all clustering operations are done with Means-Shift
    # if if means_shift is TRUE and means_shift_clustering is FALSE then K found by MS will be used in Kmeans
    # if both variables are FALSE then you have to consider dictionary_size as the Kmeans actual output size 
    dictionary = compute_dictionary(train_image_paths, dictionary_size,grid_sift = True,means_shift = False,means_shift_clustering= False)
    with open(dictionary_filename, 'wb') as f:
        pickle.dump(dictionary, f)
        print('{:s} saved'.format(dictionary_filename))

Computing Bag- of - Features Dictionary
No existing visual word dictionary found. Computing one from training images
SIFTs Extracted of 30 train images, 130 many images left
SIFTs Extracted of 60 train images, 100 many images left
SIFTs Extracted of 90 train images, 70 many images left
SIFTs Extracted of 120 train images, 40 many images left
SIFTs Extracted of 150 train images, 10 many images left
Total number of SIFT descriptors = 19288, 120.55 per image as average
Starting clustering operations ..
Used Number of clusters by user input = 5
test.pkl saved


In [50]:
dictionary_filename = 'test.pkl'

train_image_feats = hist_calc_and_feature_quan(train_image_paths, dictionary_filename,grid_sift = False,tfidf_opt=  False)
test_image_feats = hist_calc_and_feature_quan(test_image_paths, dictionary_filename,grid_sift = False,tfidf_opt = False)

Used dictionary: test.pkl
Used dictionary: test.pkl


In [None]:
print('SVM Classification and Confusion Matrix Results')
predicted_categories = svm_classify(train_image_feats, train_labels, test_image_feats)
# %matplotlib inline

cm = classification_results(train_image_paths, test_image_paths, train_labels, test_labels, categories,
             predicted_categories)