In [None]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from skimage.feature import canny
from skimage.transform import hough_ellipse
from skimage.draw import ellipse_perimeter
import copy
import math
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.externals.six import StringIO  
from IPython.display import Image  
from sklearn.tree import export_graphviz
import pydotplus
from sklearn.metrics import confusion_matrix
from skimage.transform import rotate
from skimage.feature import local_binary_pattern
from skimage import data
from skimage.color import label2rgb
from sklearn.metrics import confusion_matrix


class CVModel():

    def __init__(self, image_path, label_path, vis = False, conf = False, cv = False):
        
        #Initializes an instance of the CVModel object type

        self.labels = pd.read_csv(label_path)
        self.labels = list(self.labels['Label'])
        self.folder = image_path
        self.files = os.listdir(self.folder)
        self.data = [[] for x in range(len(self.files))]
        self.vis = vis
        self.conf = conf
        self.cv = cv
        self.skip = []
        
        for i in range(len(self.labels)):
            if self.labels[i] == 8:
                self.skip.append(i)

    def preProcess(self, image):
        
        """
        This method will take in a raw image and returns a filtered image of just
        the leaves that has been smoothed via median smoothing
        """

        rgb_cs = cv2.imread(image, cv2.IMREAD_COLOR)
        hsv_cs = cv2.cvtColor(rgb_cs, cv2.COLOR_BGR2HSV)

        lower_green = np.array([50,60,50])
        upper_green = np.array([255,200,255])

        #Removes non-green colors and returns color to the images
        filtered = cv2.inRange(hsv_cs, lower_green, upper_green)
        colored = cv2.bitwise_and(rgb_cs, rgb_cs, mask = filtered)

        #Smooths the images with 
        kernel = np.ones((3, 3),np.float32)/9
        smoothed = cv2.filter2D(colored, -1, kernel)
        med = cv2.medianBlur(smoothed,5)

        #Shows the filtered and smooth image
        cv2.imshow("Smooothed", med)
        cv2.waitKey()

        return(rgb_cs, hsv_cs, colored, smoothed, med)
    
    def globalFeatures(self, rgb_cs, hsv_cs, colored, smoothed, med, i):
        
        """
        This method will take a processed image and extract the following features:
        Mean HSV values
        Mean area of leaves
        Total area of leaves
        Density of leafy area
        Amount of leaves
        """

        self.data[i].append(np.mean(hsv_cs))

        #Uses Hough Transform as a heuristic for leaf identification   
        img = cv2.cvtColor(colored, cv2.COLOR_BGR2GRAY)
        circles = cv2.HoughCircles(img, cv2.HOUGH_GRADIENT, 1, 27, param1=15, param2=8, minRadius=3, maxRadius=15)

        try:
            #If there is at least one leaf found
            
            circles = circles.tolist()

            self.data[i].append(sum([math.pi * (r**2) for cir in circles for (x, y, r) in cir])/len(circles[0]))
            self.data[i].append(sum([math.pi * (r**2) for cir in circles for (x, y, r) in cir]))
            self.data[i].append((img.shape[0] * img.shape[1])/sum([math.pi * (r**2) for cir in circles for (x, y, r) in cir]))
            self.data[i].append(len(circles[0]))

        except:
            #If no leaves are found
            
            self.data[i].append(0)
            self.data[i].append(0)
            self.data[i].append(0)
            self.data[i].append(0)

    def localFeatures(self, rgb_cs, hsv_cs, colored, smoothed, med, i):
        
        """
        This method will take a processed image and extract the Local Binary Pattern histogram
        """
        
        #Analying the binary of the image to identify regions of similarity, boundaries and background
        img = cv2.cvtColor(rgb_cs, cv2.COLOR_BGR2GRAY)
        lbp = local_binary_pattern(img, 24, 3, "uniform")

        
        #Parses the binary patterns into a histogram
        n_bins = int(lbp.max() + 1)
        hist, _ = np.histogram(lbp, density = True, bins = n_bins, range = (0, n_bins))

        #Uses the histogram bins as a feature
        for x in hist:
            self.data[i].append(x)

    def visualizeTree(self):
        
        """
        This method creates a visualization for the Random Forest Classifier to show
        the most important features for classification
        """

        dot_data = StringIO()
        export_graphviz(self.clf, out_file=dot_data, filled=True, rounded=True, special_characters=True)
        graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
        Image(graph.create_png())


    def classify(self):
        
        """
        This method trains the Random Forest Classifier and prints the confusion matrix
        """

        X_train, X_test, y_train, y_test = train_test_split(self.data, self.labels, test_size=0.2)

        self.clf = RandomForestClassifier()
        self.clf.fit(X_train, y_train)

        preds = self.clf.predict(X_test)

        print(confusion_matrix(y_test, preds))

        #Optional fuctionality can be used via arguments
        if self.conf:
            print(classification_report(y_test, y_pred, target_names=self.labels))

        if self.vis:
            self.visualizeTree()

        if self.conf:
            self.conMat()


    def run(self):
        
        """
        This is a wrapper method that calls the other methods of the class
        """

        #Loops through images and extracts the features
        for i in range(len(self.data)):
            if i not in self.skip:

                name = os.path.join(self.folder, self.files[i])

                rgb_cs, hsv_cs, colored, smoothed, med = self.preProcess(name)
                self.globalFeatures(rgb_cs, hsv_cs, colored, smoothed, med, i)
                self.localFeatures(rgb_cs, hsv_cs, colored, smoothed, med, i)

        
        #Identifies any instances of errors with feature extraction
        ind = []
        for i in range(len(self.data)):
            if len(self.data[i]) != 31:
                ind.append(i)

        ind.reverse()

        for i in ind:
            del self.data[i]
            del self.labels[i]

        #Saves features in a CSV file and calls the classification method on saved features
        np.savetxt("less_data.csv", self.data)
        self.data = np.asarray(self.data)
        self.classify()


x = CVModel(r"C:\Users\elire\Downloads\images", r"C:\Users\elire\Downloads\DeepWeeds-master\DeepWeeds-master\labels\labels.csv", cv = 5)
x.run()

