1. sift to find descriptors for every image, and stack all the descriptors
2. use the stacked descriptor to train a kmean for clustering
3. create a histogram for every image
4. train a linear model to predict the number count of treelogs

In [112]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
class ObjectCounting():
    def __init__(self,n_cluster):
        self.n_cluster = n_cluster
        self.mega_histogram=None
        self.kmeans_model = KMeans(n_clusters = n_cluster,verbose=1,n_init=1,max_iter=5)
        self.kmean_trained = None
        self.reg = LinearRegression()
    def _get_img(self,folder):   
        img_list=[]
        img_file_list=[]
        for root,_,files in os.walk(folder):
            for file in files:
                file_path = os.path.join(root,file)
                img_list.append(cv2.imread(file_path))
                img_file_list.append(file)
        return img_list, len(img_list), img_file_list
#     def _img2gray(self,img):
#         return cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    def _get_desc(self,img):
        sift = cv2.xfeatures2d.SIFT_create()
        keypoint, descriptor = sift.detectAndCompute(img, None)
        return keypoint, descriptor
    
    def _get_desc_list(self,img_list):
        for img in img_list:
#             gray = self._img2gray(img)
            keypoint, descriptor = self._get_desc(img)
            desc_list.append(descriptor)
        return desc_list
    def _get_stacked_Desc(self,desc_list):
        desc_vstack = np.array(desc_list[0])
        for desc in desc_list[1:]:
            desc_vstack = np.vstack((desc_vstack, desc))
        return desc_vstack
    def _desc_clustering(self,desc_vstack):
        self.kmean_trained = self.kmeans_model.fit_predict(desc_vstack)
    def _get_desc_histogram(self,n_images,desc_list):
        self.mega_histogram = np.array([np.zeros(self.n_cluster) for i in range(n_images)])
        jobs = 0
        for img in range(n_images):
            descs = len(desc_list[img])
            for desc in range(descs):
                cluster = self.kmean_trained[jobs+desc]
                self.mega_histogram[img][cluster] += 1
            jobs += descs
    def ModelTraining(self,train,answer):
        print("Loading Training Dataset")
        train,n_images,img_file_list = self._get_img(train)
        print("Preprocessing Training Dataset")
        desc_list = self._get_desc_list(train)
        desc_vstack = self._get_stacked_Desc(desc_list)
        print("Clustering Descriptors")
        self._desc_clustering(desc_vstack)
        print("Creating Bag of Words Vocabulary !")
        self._get_desc_histogram(n_images,desc_list)
        
        X = self.mega_histogram
        Y = np.zeros(n_images)
        for idx,img_file in enumerate(img_file_list):
            Y[idx] = answer.set_index('Image Name').loc[img_file,"Counts"]
        
        self.reg.fit(X, Y)
        
    def predict(self,test):
        test,n_images = self._get_img(test)
        desc_list = self._get_desc_list(test)       
        desc_vstack = self._get_stacked_Desc(desc_list)
        test_clusters = self.kmean_trained.predict(desc_vstack)
        
        mega_histogram = np.array([np.zeros(self.n_cluster) for i in range(n_images)])
        jobs = 0
        for img in range(n_images):
            descs = len(desc_list[img])
            for desc in range(descs):
                cluster = test_clusters[jobs+desc]
                mega_histogram[img][cluster] += 1
            jobs += descs
            
        X = mega_histogram
        predicted_y = self.reg.predict(X)  
        return predicted_y

In [113]:
answer=pd.read_excel('C://Users//gvtc4//OneDrive//Desktop//TreeLogs//Image Count.xlsx')
train='C://Users//gvtc4//OneDrive//Desktop//TreeLogs//train'
test='C://Users//gvtc4//OneDrive//Desktop//TreeLogs//test'

In [114]:
ObjectCounting = ObjectCounting(n_cluster=60)
ObjectCounting.ModelTraining(train,answer)

Loading Training Dataset !
Preprocessing Training Dataset !
Clustering Descriptors !
Initialization complete
start iteration
done sorting
end inner loop
Iteration 0, inertia 452235430000.0
start iteration
done sorting
end inner loop
Iteration 1, inertia 433271700000.0
start iteration
done sorting
end inner loop
Iteration 2, inertia 428276840000.0
start iteration
done sorting
end inner loop
Iteration 3, inertia 426090920000.0
start iteration
done sorting
end inner loop
Iteration 4, inertia 424730800000.0
Creating Bag of Words Vocabulary !


KeyError: 'the label [Img1.jpg] is not in the [index]'

In [None]:
predictions = ObjectCounting.predict(test)