In [None]:
import pandas as pd
import cv2
import numpy as np
import os
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize
from sklearn.svm import SVC
import joblib

class_n = 12

data_folder = '../dataset' 

training_set_path = data_folder + '/train'
validation_set_path = data_folder + '/valid'
test_set_path = data_folder + '/test'
#test_set_path = 'detection_output' 

retrain = True
# Set to false to reuse existing parameters, and only run the test set for performance stats
# Retraining is expensive so dont retrain unless metaparemeters are changed

training_save_folder = ''

#Meta parameters
K = 500
sift_Nfeatures = 500
svm_regularisation = 10
svm_gamma = 0.001

The 
1. Extract sift feature vectors from the images bounding boxes, package into BoVW
2. use scikit svc classifier (kernal: tbd)

Note that this program does not know if the input files are training set, validation set, or output from the hog detector. 

In [44]:
# Load patches
def loadPatchesLabels(path: str):
    image_dir = path + '/images'
    label_dir = path + '/labels'

    patches = []
    class_ids = []

    for img_name in os.listdir(image_dir):
        if not img_name.lower().endswith('.jpg'):
            continue

        img_path = os.path.join(image_dir,img_name)
        label_path = os.path.join(label_dir,img_name.replace('.jpg','.txt'))

        if not os.path.exists(label_path):
            raise NameError('cannot find label at '+ label_path)
        
        if not os.path.exists(img_path):
            raise NameError('cannot find image at '+ img_path)
        
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        h,w = img.shape[:2]
        
        with open(label_path,'r') as l:
            for line in l:
                details=line.strip().split()
                if len(details)!=5:
                    raise ImportError
                class_id,x_center,y_center,width,height=map(float,details)
                    
                x_center_abs= x_center*w # Transform label detail into pixel location
                y_center_abs= y_center*h
                width_abs = width*w
                height_abs = height*h

                x1=int(x_center_abs-width_abs/2)
                x2=int(x_center_abs+width_abs/2)
                y1=int(y_center_abs-height_abs/2)
                y2=int(y_center_abs+height_abs/2)
                x1, y1 = max(0, x1), max(0, y1)
                x2, y2 = min(w, x2), min(h, y2)
                insect_patch = img[y1:y2, x1:x2]
                #print(f'patch added: {width_abs}x{height_abs}')

                patches.append(insect_patch)
                class_ids.append(class_id)
    
    return patches, class_ids

valid_patches, valid_class_ids = loadPatchesLabels(validation_set_path)
train_patches, train_class_ids = loadPatchesLabels(training_set_path)

In [45]:
# this builds the visual vocabulary for BoVW histogram
def trainVisualWords(patches):
    sift = cv2.SIFT_create(nfeatures = sift_Nfeatures)
    descriptor_dictionary = []
    for patch in patches:
        keypoints, descriptors = sift.detectAndCompute(patch, None)
        if descriptors is None:
            print("Has a non-detection patch")
            continue
        descriptor_dictionary.append(descriptors)
    
    descriptor_dictionary = np.vstack(descriptor_dictionary)
    kmeans = KMeans(n_clusters=K, random_state=42, verbose=0)
    kmeans.fit(descriptor_dictionary)

    descriptor_vocabulary = kmeans.cluster_centers_
    np.save(training_save_folder + "vocabulary.npy", descriptor_vocabulary)
    return descriptor_vocabulary

In [46]:
def patchToBoVW(patch, vocab):
    sift = cv2.SIFT_create(nfeatures = sift_Nfeatures)
    # Compute SIFT features for the new patch
    keypoints, descriptors = sift.detectAndCompute(patch, None)
    if descriptors is None:
        bovw_vector = np.zeros(K)  # empty patch â†’ zero histogram
    else:
        distances = np.linalg.norm(descriptors[:, np.newaxis] - vocab, axis=2)
        nearest_words = np.argmin(distances, axis=1)

        # Build histogram (word frequency count)
        bovw_vector, _ = np.histogram(nearest_words, bins=np.arange(K+1))
        bovw_vector = normalize(bovw_vector.reshape(1, -1), norm='l2').flatten()
    return bovw_vector


In [47]:
# Training process (BoVW)
if retrain:
    #The training pipeline
    #Obtain vocabulary, this updates vocab file in save folder
    vocab = trainVisualWords(train_patches)
    # get BoVW for each patch
    train_bovw_matrix = []
    for patch in train_patches:
        train_bovw_matrix.append(patchToBoVW(patch, vocab))
else:
    vocab = np.load(training_save_folder + 'vocabulary.npy')


Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch
Has a non-detection patch


In [48]:
# Training process (SVM)
if retrain:
    clf = SVC(kernel='linear', C=svm_regularisation, gamma=svm_gamma, probability=True, class_weight='balanced')
    clf.fit(train_bovw_matrix, train_class_ids)

    joblib.dump(clf, training_save_folder + 'svm_model.bin')
else:
    clf = joblib.load(training_save_folder + 'svm_model.bin')

In [49]:
#Run this pipeline on the validation set
#initiate confusion matrix
cm = np.zeros((class_n, class_n))
for i in range(len(valid_patches)):
    bovw = patchToBoVW(valid_patches[i], vocab)
    predicted_class_id = int(clf.predict([bovw])[0])
    true_class_id = int(valid_class_ids[i])
    #print(f'Patch number {i}: true class {true_class_id} predicted class {predicted_class_id}')
    cm[true_class_id][predicted_class_id] += 1

cm

array([[76., 22., 22., 17., 10., 11.,  5.,  3.,  4.,  2.,  6.,  0.],
       [14., 57.,  8.,  9.,  1.,  3.,  5.,  1.,  4.,  2.,  5.,  1.],
       [19., 12., 16.,  5.,  4., 14.,  9.,  3.,  5.,  4.,  5.,  4.],
       [18., 15.,  6., 54., 13.,  4.,  5.,  5.,  8.,  4.,  4.,  3.],
       [11.,  7.,  1.,  4., 21.,  3.,  3.,  3.,  9.,  6.,  1.,  3.],
       [11.,  5., 19.,  7.,  6., 30.,  4.,  8.,  3.,  2.,  8.,  1.],
       [ 7.,  4.,  9.,  7.,  8.,  5., 36.,  7.,  7.,  4.,  6.,  2.],
       [ 1.,  4.,  2.,  4.,  6.,  3., 12., 59.,  5.,  2.,  2.,  1.],
       [ 4.,  5.,  5., 13., 12.,  4.,  3.,  5., 19.,  9.,  5.,  7.],
       [ 2.,  4., 12.,  5.,  5.,  5.,  4.,  4., 16., 39.,  2.,  9.],
       [ 6., 14., 10.,  7.,  1., 12., 16.,  4.,  2.,  3., 57.,  0.],
       [ 3.,  2.,  9.,  5.,  4.,  1.,  3.,  4.,  4.,  5.,  0., 65.]])

In [51]:
# Statistics on validation set
total = 0
correct = 0
for i in range(12):
    for j in range(12):
        total += cm[i][j]
        if (i == j):
            correct += cm[i][j]
accuracy = correct/total
accuracy


np.float64(0.39448173005219983)