### pylegoclassifier workbook
### magnus wood, december 2020, bsyse 530 semester project
The below code block will be used in the 'pylegoclassifer.py' module. It will be used in the matlab integration, where images obtained by Eric will use functions from this code to do lego color classification.

This jupyter notebook exists solely for developing it. I should probably share it too.

### pylegoclassifier.py functionality
### The code needs to do this:

1. Take an image file in and ensure it is in the right format.
2. Perform background segmentation using ImageSegmentation.
3. Data extraction:
    a. 
    b. 
4. Pass the dataframe to the 

In [14]:
#%%writefile pylegoclassifier.py

# import the needed packages
import numpy as np
from matplotlib import pyplot as plt
import cv2 as cv
from scipy import ndimage
from skimage import morphology
from skimage import exposure
import os
from math import pi
from math import isnan
import pandas as pd
from sklearn.model_selection  import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score
from skimage.filters import sobel

# set random seed
np.random.seed(26)

# the NaiveBayes classifier I wrote for assignment 6 in BSYSE_530, modified a little for this purpose
class NaiveBayes:
    # P(c|x) = P(x|c) * P(c) / P(x)
    # P(x|x) is the posterior probability
    # P(x|c) is the likelihood
    # P(c) is the class prior probability, or the prob of c occuring indpendently. 
    # P(x) is the predictor prior probability, or the prob of x occuring independently
    
    def fit(self, features, target):
        # define class variables
        self.classes = np.unique(target)
        self.count = len(self.classes)
        self.feature_nums = features.shape[1]
        self.rows = features.shape[0]
        
        # calculate statistics for all those features
        self.calc_statistics(features, target)
        
        # prior is the random chance of drawing a particular class based on its proportion in the dataset
        self.prior = self.calc_prior(features, target)
        
              
    def get_predictions(self, input_vector):
        predictions = []
        
        for i in range(len(input_vector)):
            result = self.calc_posterior((input_vector.iloc[i,:]))
            predictions.append(result)
        return predictions
     

    def predict(self, observation):
        #call the calc_posterior function on the observation
        pred_class = self.calc_posterior(observation)
        return pred_class
        
        
    def calc_statistics(self, features, target):
        # calculate mean, variance for each column and convert to numpy array
        self.mean = features.groupby(target).apply(np.mean).to_numpy()
        self.var = features.groupby(target).apply(np.var).to_numpy()
        return self.mean, self.var
    
    
    def calc_prior(self, features, target):
        # this is the probability of picking one of a class at random from the dataset
        self.prior = (features.groupby(target).apply(lambda x: len(x)/self.rows).to_numpy())
        return self.prior
    
    
    def calc_posterior(self, x):
        # this is the probability, post evidence
        # x is a numpy array
        # x is feature vector for one observation 
                
        # make a list that we will add each classes posterior prob to
        posteriors = []
        
        # iterate through the classes
        for i in range(0, self.count):
            # for each class look at the prior probability for the class
            prior = self.prior[i]
            
            # calculate the conditional probability for the 
            conditional = np.sum(self.gaussian_density(i, x))
            posterior = prior + conditional
            #  print(f"i = {i}, prior = {prior}, conditional = {conditional}, posterior = {posterior}")
            posteriors.append(posterior)

        return self.classes[np.argmax(posteriors)]
        
        
    def gaussian_density(self, class_idx, x):
        # calc probability from gaussian denssityy fucntion (normal dist)
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        # this part sucked and I had a typo that cost me hours
        numerator = np.exp(-((x-mean)**2 / (2 * var)))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator
        
    
    def pdf(self, x, mean, stdev):
        # calculate probability density function
        exponent = np.exp(-((x-mean)**2 / (2*stdev**2)))
        return exponent * (1/(np.sqrt(2*np.pi)*stdev))

        
    def get_accuracy(self, test, predictions):
        correct = 0
        for i in range(len(test)):
            if test.iloc[i] == predictions[i]:
                correct += 1
        return (correct / float(len(test)))


# TODO: read these and see how it works        
# https://www.mathworks.com/help/matlab/matlab_external/matlab-arrays-as-python-variables.html        
# https://www.mathworks.com/help/matlab/matlab_external/passing-data-to-python.html        
            
# this exists only for my testing purposes
class MatlabSurrogate():
    def __init__(self):
        self.state_of_mind = "Badass."
        
        
    def acquire_kinect_image(self, filename):
        # give this function a filename, and it will load that image with opencv
        # this will be a BGR format, because that is how opencv rolls
        kinect_image = cv.imread(filename)
        print(f"kinect has acquired the image with shape = {kinect_image.shape}")
        return kinect_image
    
    
    # function to display images resized, using opencv
    def imshow(self, image):
        w, h = int(image.shape[1]/4), int(image.shape[0]/4)
        cv.namedWindow("output", cv.WINDOW_NORMAL)
        cv.resizeWindow("output", (w, h))
        cv.imshow("output", image)
        cv.waitKey(0)
        cv.destroyAllWindows()
    
    
# I should probably have one image processing class that takes in a single image and then spits out a dataframe that could be used for prediction
# replaces ImageSegmenter
class ImageProcess():
    def __init__(self):
        print("image processor activated! use 'process_image_to_df()' to get back a pandas df")
    
    def dummy_method(self, a):
        if type(a) is np.ndarray:
            result = "object is a numpy.ndarray, this is perfect. Is the image RGB order or BGR?"
            return result
        else:
            result = "object is a " + str(type(a)) + "and I'm gonna have a hard time with that"
            return result
      
    
        
    def bg_segmentation(self, image, mode="hsv"):
        
        if mode=="sobel":
            from skimage.filters import sobel
            
            gray_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
            
            # find the edges
            elev_map = sobel(gray_image)
            
            # threshold it
            foreground = np.zeros_like(image)
            foreground[gray_image < 30] = 1
            foreground[gray_image > 150] = 2
          
            #TODO add this
        
        else:
            
#             # gaussian blur
#             blur_image = ndimage.gaussian_filter(image, sigma=4)
            
            
            # create an hsv mask for red colors
            color_mask = cv.inRange(cv.cvtColor(image, cv.COLOR_BGR2HSV), 
                                 (0, 0, 100),
                                 (180, 255, 255)).astype(np.uint8)
            
            black_mask = cv.inRange(cv.cvtColor(image, cv.COLOR_BGR2HSV), 
                                 (0, 0, 0),
                                 (179, 255, 30)).astype(np.uint8)
            
#             hsv_mask = black_mask + color_mask
            hsv_mask = black_mask + color_mask
            
            hsv_mask = np.where(hsv_mask > 0, 1, 0).astype(np.uint8)

            
#             # erode the mask
#             hsv_mask = morphology.erosion(hsv_mask, morphology.disk(5))
            
#             # gaussian blur
            hsv_mask = ndimage.gaussian_filter(hsv_mask, sigma=1)

            # erode the mask
            hsv_mask = morphology.erosion(hsv_mask, morphology.disk(5))

            # median filter to despeckle
            hsv_mask = ndimage.median_filter(hsv_mask, size=(3, 3)).astype(np.uint8)

            # binary dilation 
            hsv_mask = morphology.binary_dilation(hsv_mask, np.ones((20, 20))).astype(np.uint8)

            # fill the holes
            hsv_mask = ndimage.binary_fill_holes(hsv_mask).astype(np.uint8)

            # erode the mask
            hsv_mask = morphology.erosion(hsv_mask, morphology.disk(5))
            
            # TODO: remove this it is for testing purposes to show the segmentation
            m = MatlabSurrogate()
            m.imshow(cv.bitwise_and(image, image, mask=hsv_mask).astype(np.uint8))
            
            # apply the mask and return the result        
            return hsv_mask

        
    def bg_segmentation_eucdist(self, img_cube, roi_origin=(50, 50)):
        
        def euc_dist(roi_channels, sample_channels):
            dist = [(roi_channels[i] - sample_channels[i])**2 for i in range(0, len(sample_channels))]
            euc_dist = np.sqrt(np.sum(dist))
            return euc_dist
        
        # variables
        dist_th = 150

        # define the roi using these values and use it to subset my_image and return the subset image
        roi = np.array(img_cube[roi_origin[0]:roi_origin[0]+20, roi_origin[1]:roi_origin[1]+20,:])

        ################################################################
        # calculate the mean intensity value for the roi at each channel and store in a vector
        roi_mean_vector = np.zeros(shape=(img_cube.shape[2], 1))

        # iterate through all the channels
        for channel in range(0, img_cube.shape[2]):
            # channel of interest, reshaped to a vector
            coi = img_cube[:,:,channel]
            coi_vector = coi.reshape((img_cube.shape[0]* img_cube.shape[1]), 1)

            # mean intensity for the channel added to intensity vector
            roi_mean_vector[channel] = np.mean(coi_vector)
        #################################################################
        # knn
        output_array = np.zeros(shape=(img_cube.shape[0], img_cube.shape[1]))

        # time this process
        import time
        start_time = time.time()

        for i in range(0, output_array.shape[0]):
            for j in range(0, output_array.shape[1]):
                # calculate the euc distance from the pixel[i,j] to roi_mean_vector
                distance = euc_dist(roi_mean_vector, img_cube[i, j])
                if distance < dist_th:
                    output_array[i, j] = 1

        print(time.time() - start_time)

        # TODO: image enhancement on the output array to get rid of holes

        # label the objects
        labels, num_features = ndimage.measurements.label(output_array)

        # retain only the object 1, the apple
        mask = np.where(labels == 1, 1, 0).reshape(output_array.shape)

        # median filter to denoise
        mask = ndimage.median_filter(mask, size=(3, 3)).astype(np.int)

        return mask





        
    
    # this is the parent function of this class, it will call the other classes
    def process_image_to_df(self, image, area_th):
        # get a mask by background segmentation using hsv values
        mask = self.bg_segmentation(image)
        
        # output image with drawn on contours
        output_image = image.copy()
        
        # find the contours of the detected objects in the image
        contours, hier = cv.findContours(mask, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)

        # create the df that we'll return for this image
        df = pd.DataFrame(columns=['y'])

      
        # blank canvas
        cimg = np.zeros_like(image)

        # reset the object num
        object_num = 0

        # draw all the contours on the image
        for cnt in contours:

            # blank canvas
            cimg_subset = np.zeros_like(image)

            # get the x, y, w, h of the bounding rect for the contour
            x, y, w, h = cv.boundingRect(cnt)

            # contour features
            area = cv.contourArea(cnt)
            rect_area = w * h
            fullosity = area / rect_area

            # get rid of tiny objects that are probably noise
            if area > area_th and fullosity > .5:
                aspect_ratio = float(w)/h
                extent = float(area/ rect_area)
                hull = cv.convexHull(cnt)
                hull_area = cv.contourArea(hull)
                solidity = float(area)/hull_area


                eq_diameter = np.sqrt(4*area/np.pi)

                M= cv.moments(cnt)
                cx= int(M['m10']/M['m00'])
                cy= int(M['m01']/M['m00'])
                    
                # draw the contour on the blank image as a filled white object
                cv.drawContours(cimg, [cnt], 0, color=(255, 255, 255), thickness=-1)

                # draw the bounding box on the cimg and output img as a green boundary
                cv.rectangle(cimg, (x, y), (x+w, y+h), (0, 255,0), 2)
                cv.rectangle(output_image, (x, y), (x+w, y+h), (0, 255,0), 2)

                # take this rectangle as a subset of the image, and calculate things within it
                # define the object subset of the image and mask
                cimg_subset = cimg[y:y+h, x:x+w]
                img_subset = image[y:y+h, x:x+w, :]

                img_subset_hsv = cv.cvtColor(img_subset, cv.COLOR_BGR2HSV)

                # create an hsv mask to remove the black background again
                color_mask = cv.inRange(cv.cvtColor(img_subset, cv.COLOR_BGR2HSV), 
                                     (0, 0, 100),
                                     (180, 255, 255)).astype(np.uint8)

                black_mask = cv.inRange(cv.cvtColor(img_subset, cv.COLOR_BGR2HSV), 
                                     (0, 0, 0),
                                     (90, 100, 10)).astype(np.uint8)

                hsv_mask = black_mask + color_mask

                # apply the mask
                f = cv.bitwise_and(img_subset_hsv, img_subset_hsv, mask=hsv_mask).astype(np.uint8)

                # calculate where the object is
                pts = np.where(cimg_subset == 255)
                hue = img_subset_hsv[pts[0], pts[1], 0]
                sat = img_subset_hsv[pts[0], pts[1], 1]
                val = img_subset_hsv[pts[0], pts[1], 2]
                r = img_subset[pts[0], pts[1], 0]
                g = img_subset[pts[0], pts[1], 1]
                b = img_subset[pts[0], pts[1], 2]

                # add the object labels to the cimg for identification
                cv.putText(cimg, text= str(object_num), 
                           org=(cx - 5,cy - 5), 
                           fontFace= cv.FONT_HERSHEY_SIMPLEX,
                           fontScale=3, 
                           color=(255,0,255), 
                           thickness=5, 
                           lineType=cv.LINE_AA)
                
                # add the object labels to the cimg for identification
                cv.putText(output_image, text= str(object_num), 
                           org=(cx - 5,cy - 5), 
                           fontFace= cv.FONT_HERSHEY_SIMPLEX,
                           fontScale=3, 
                           color=(255,255,255), 
                           thickness=5, 
                           lineType=cv.LINE_AA)
                

        #         print(r.mean(), g.mean(), b.mean(), gli.mean())
                df = df.append({'color' : 0,
                                'x': x,
                                'y': y,
                                'object_num': object_num,
                                'r': r.mean(),
                                'g': g.mean(),
                                'b': b.mean(),
                                'hue': hue.mean(),
                                'sat': sat.mean(),
                                'val': val.mean()
                                 }, ignore_index=True)

                # last thing we do on this loop is increment the object_num
                object_num += 1
    
        # end result should be a pandas dataframe and the contour image with numbers
        return df.sort_values(by='y', axis=0, ascending=True), output_image
    
    
    def hsv_slide_tool(self, image):
        
        def empty(a):
            pass
        
        h, w = int(image.shape[1]/4), int(image.shape[0]/4)
        cv.namedWindow('masked_image', cv.WINDOW_NORMAL)
        cv.resizeWindow('masked_image', 800, 600)
        
        cv.namedWindow("trackbars")
        cv.resizeWindow("trackbars", 800, 300)
        
        cv.createTrackbar("hue_min", "trackbars", 0, 179, empty)
        cv.createTrackbar('hue_max', 'trackbars', 179, 179, empty)
        cv.createTrackbar('sat_min', 'trackbars', 0, 255, empty)
        cv.createTrackbar('sat_max', 'trackbars', 255, 255, empty)
        cv.createTrackbar('val_min', 'trackbars', 0, 255, empty)
        cv.createTrackbar('val_max', 'trackbars', 255, 255, empty)

        while True:
            # get image
            img_hsv = cv.cvtColor(image, cv.COLOR_BGR2HSV)
            
            # get trackbar positions
            h_min = cv.getTrackbarPos("hue_min", "trackbars")
            h_max = cv.getTrackbarPos('hue_max', 'trackbars')
            s_min = cv.getTrackbarPos('sat_min', 'trackbars')
            s_max = cv.getTrackbarPos('sat_max', 'trackbars')
            v_min = cv.getTrackbarPos('val_min', 'trackbars')
            v_max = cv.getTrackbarPos('val_max', 'trackbars')
            
            # create mask
            lower_hsv = np.array([h_min, s_min, v_min])
            higher_hsv = np.array([h_max, s_max, v_max])
            mask = cv.inRange(img_hsv, lower_hsv, higher_hsv)
            masked_image = cv.bitwise_and(img_hsv, img_hsv, mask=mask)
            
            
            cv.imshow('masked_image', masked_image)
            k = cv.waitKey(1000) & 0xFF # large wait time
            if k == 113 or k == 27:
                break
        
        cv.destroyAllWindows()
            
        
        

        
        
    

In [15]:
################### testing this out like its matlab ##################
imageproc = ImageProcess() # does the background segmentation and other image processing methods, also data extraction
matlab = MatlabSurrogate() # does some image loading and display, pretending we're using some 

test_image = matlab.acquire_kinect_image("images/legos_0.png")

# use the segmentation function to segment the image.
# seg_image = imageproc.bg_segmentation(test_image)

# matlab.imshow(seg_image)



# # process the data fully and receive a df backfuschia
image_df, cimg = imageproc.process_image_to_df(test_image, area_th = 1000)

matlab.imshow(cimg)

    

image processor activated! use 'process_image_to_df()' to get back a pandas df
kinect has acquired the image with shape = (2559, 1440, 3)


In [16]:
test_image = matlab.acquire_kinect_image("images/legos_0.png")

# use the segmentation function to segment the image.
seg_image = imageproc.bg_segmentation(test_image)

matlab.imshow(seg_image)

kinect has acquired the image with shape = (2559, 1440, 3)


In [13]:
hsv_image = cv.imread("images/legos_0.png")
imageproc = ImageProcess()
imageproc.hsv_slide_tool(hsv_image)

image processor activated! use 'process_image_to_df()' to get back a pandas df


In [None]:
# # data and labels
# X = df.iloc[:,1:]
# y = df.iloc[:,0]

# # split into train test sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75)

# for c in np.unique(y).astype(np.int):
#     print(c)
#     X_c = X_train.iloc[:, c]
#     print(X_c)

# #         self._mean = X_c.groupby('')


# P(A|B) = P(B|A) * P(A) / P(B)
class NaiveBayes:
    # P(c|x) = P(x|c) * P(c) / P(x)
    # P(x|x) is the posterior probability
    # P(x|c) is the likelihood
    # P(c) is the class prior probability, or the prob of c occuring indpendently. 
    # P(x) is the predictor prior probability, or the prob of x occuring independently
    
    def fit(self, features, target):
        # define class variables
        self.classes = np.unique(target)
        self.count = len(self.classes)
        self.feature_nums = features.shape[1]
        self.rows = features.shape[0]
        
        # calculate statistics for all those features
        self.calc_statistics(features, target)
        
        # prior is the random chance of drawing a particular class based on its proportion in the dataset
        self.prior = self.calc_prior(features, target)
#         print(f"self.prior = {self.prior}")
#         print(f"self.mean = {self.mean}")
#         print(f"self.var = {self.var}")
        
              
    def get_predictions(self, input_vector):
        predictions = []
        
        for i in range(len(input_vector)):
#             print(f"input_vector {i}")
            result = self.calc_posterior((input_vector.iloc[i,:]))
#             print(f"result is {result}")
            predictions.append(result)
        return predictions
     

    def predict(self, observation):
        #call the calc_posterior function on the observation
        pred_class = self.calc_posterior(observation)
        return pred_class
        
        
    def calc_statistics(self, features, target):
        # calculate mean, variance for each column and convert to numpy array
        self.mean = features.groupby(target).apply(np.mean).to_numpy()
        self.var = features.groupby(target).apply(np.var).to_numpy()
        return self.mean, self.var
    
    
    def calc_prior(self, features, target):
        # this is the probability of picking one of a class at random from the dataset
        self.prior = (features.groupby(target).apply(lambda x: len(x)/self.rows).to_numpy())
        return self.prior
    
    
    def calc_posterior(self, x):
        # this is the probability, post evidence
        # x is a numpy array
        # x is feature vector for one observation 
                
        # make a list that we will add each classes posterior prob to
        posteriors = []
        
        # iterate through the classes
        for i in range(0, self.count):
            # for each class look at the prior probability for the class
            prior = self.prior[i]
            
            # calculate the conditional probability for the 
            conditional = np.sum(self.gaussian_density(i, x))
            posterior = prior + conditional
#             print(f"i = {i}, prior = {prior}, conditional = {conditional}, posterior = {posterior}")
            posteriors.append(posterior)

        return self.classes[np.argmax(posteriors)]
        
        
    def gaussian_density(self, class_idx, x):
        # calc probability from gaussian denssityy fucntion (normal dist)
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        # this part sucked and I had a typo that cost me hours
        numerator = np.exp(-((x-mean)**2 / (2 * var)))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator
        
    
    def pdf(self, x, mean, stdev):
        # calculate probability density function
        exponent = np.exp(-((x-mean)**2 / (2*stdev**2)))
        return exponent * (1/(np.sqrt(2*np.pi)*stdev))

        
    def get_ accuracy(self, test, predictions):
        correct = 0
        for i in range(len(test)):
            if test.iloc[i] == predictions[i]:
                correct += 1
        return (correct / float(len(test)))
                

    def train_model
# data and labels
X = df.iloc[:,1:]
y = df.iloc[:,0]

# split into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75)

# initialize the Naive Bayes class as an object
nb = NaiveBayes()

# sumnmarize the dataset to train the model
# this gets class means, var, priors, etc
nb.fit(X_train, y_train)

# # # make predictions using the train set
y_train_predictions = nb.get_predictions(X_train)
acc = nb.get_accuracy(y_train, y_train_predictions)
prec = precision_score(y_train, y_train_predictions, average="micro")
rec = recall_score(y_train, y_train_predictions, average="micro")
print(f"precision is {prec}, recall is {rec}, accuracy = {acc}")

# confusion matrix
labels = [(i, c) for i, c in labels_dict.items()]
cm = confusion_matrix(y_train, y_train_predictions)
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(cm)
plt.title('confusion matrix of the classifier')
fig.colorbar(cax)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
print(labels)


In [None]:
# use the test set to see how we do
y_test_predictions = nb.get_predictions(X_test)

# scores
acc = nb.get_accuracy(y_test, y_test_predictions)
prec = precision_score(y_test, y_test_predictions, average="micro")
rec = recall_score(y_test, y_test_predictions, average="micro")
print(f"precision is {prec}, recall is {rec}, accuracy = {acc}")

# confusion matrix
labels = [(i, c) for i, c in labels_dict.items()]
cm = confusion_matrix(y_test, y_test_predictions)
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(cm)
plt.title('confusion matrix of the classifier')
fig.colorbar(cax)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
print(labels)


In [None]:
# from sklearn.externals import joblib 
  
# # Save the model as a pickle in a file 
# joblib.dump(knn, 'filename.pkl') 
  
# # Load the model from the file 
# knn_from_joblib = joblib.load('filename.pkl')  
  
# # Use the loaded model to make predictions 
# knn_from_joblib.predict(X_test) 


In [None]:
df.head()

hsv_image = cv.cvtColor(image, cv.COLOR_BGR2HSV)

# create an hsv mask
test_image = cv.inRange(hsv_image, 
                         (50, 20, 0),
                         (160, 255, 255)).astype(np.uint8)

test_image = cv.bitwise_and(image, image, mask =test_image).astype(np.uint8)
print(test_image[0])

plt.imshow(test_image)

In [None]:
# # import the cherry images
# # C:\data\BSYSE_530\machine_vision\images\Cherries
# # there are five, with different light conditions
# # DSC_0052, 0054, 0056, 0057, 0058
# # we need to take these images and cut them into little pieces for the process to work

# # convert them to RGB
# images = [cv.cvtColor(cv.imread("C:/data/BSYSE_530/machine_vision/images/Cherries/DSC_0052.jpg"), cv.COLOR_BGR2RGB),
#           cv.cvtColor(cv.imread("C:/data/BSYSE_530/machine_vision/images/Cherries/DSC_0054.jpg"), cv.COLOR_BGR2RGB),
#           cv.cvtColor(cv.imread("C:/data/BSYSE_530/machine_vision/images/Cherries/DSC_0056.jpg"), cv.COLOR_BGR2RGB),
#           cv.cvtColor(cv.imread("C:/data/BSYSE_530/machine_vision/images/Cherries/DSC_0057.jpg"), cv.COLOR_BGR2RGB),
#           cv.cvtColor(cv.imread("C:/data/BSYSE_530/machine_vision/images/Cherries/DSC_0058.jpg"), cv.COLOR_BGR2RGB)]

# titles = ["DSC_0052", "DSC_0054", "DSC_0056","DSC_0057","DSC_0058"]

# masked_images = []
# masks = []
# adj_images = []

# # # # image adjustment, rescale intensity
# # for i in range(0, 5):
# #     img = images[i]
# #     p2, p98 = np.percentile(img, (2, 98))
# #     adj_img = exposure.rescale_intensity(img, in_range=(p2, p98))
# #     adj_images.append(adj_img)
    
# # create the mask
# # try to screen out all the white regions
# background_mask = cv.inRange(images[0],
#                              (70,70,90),
#                              (120,120,120)).astype(np.int) * -1
# print(background_mask.shape)
# print(type(background_mask))
# # background_mask = morphology.binary_dilation(background_mask, np.ones((3, 3)))
# # closing
# background_mask = morphology.closing(background_mask, morphology.disk(2))

# # print(background_mask.shape)
# # print(background_mask)
# # print(np.mean(images[0][650:700,400:500,0]), np.mean(images[0][600:700,0:100,1]), np.mean(images[0][600:700,0:100,2]))

# # now use BGR2HSV to reverse the red and blue to make it easier for hsv filtering of the red (not around 0/360 break)
# hsv_image = cv.cvtColor(images[0], cv.COLOR_BGR2HSV)

# # create an hsv mask
# cherry_mask = cv.inRange(hsv_image, 
#                          (70, 30, 20),
#                          (255, 255, 255)).astype(np.int)


# cherry_mask = get_tgi_mask(cv.cvtColor(cv.imread("C:/data/BSYSE_530/machine_vision/images/Cherries/DSC_0056.jpg"), cv.COLOR_BGR2RGB).astype(np.float64))

# # make that array of truth values 0 or 255 into a 1 0 array
# # cherry_mask = np.where(cherry_mask > 250, 1, 0).astype(np.int)

# # median filter to denoise
# # cherry_mask = ndimage.median_filter(cherry_mask, size=(3, 3)).astype(np.int)


# # do a little dilation to make the mask look nice
# cherry_mask = morphology.binary_dilation(cherry_mask, np.ones((3, 3)))

# # closing
# # cherry_mask = morphology.closing(cherry_mask, morphology.disk(4))

# # erode the mask
# cherry_mask = morphology.erosion(cherry_mask, morphology.disk(2))

# #combine the cherry mask and the background mask
# # cherry_mask = cherry_mask + background_mask

# for image in images:

#     # apply the mask
#     masked_image = np.zeros(image.shape)
#     for channel in range(image.shape[2]):
#         masked_image[:,:,channel] = image[:,:,channel] * cherry_mask
    
#     # the images are going back into "BGR" but thats really RGB
#     masked_images.append(masked_image.astype(np.uint8))

# # # show the images from the last batch just for kicks
# # plot_images(titles=["cherry_mask"], 
# #             images=[cherry_mask],
# #             fsize=30)



# # # show the images from the last batch just for kicks
# plot_images(titles=titles, 
#             images=masked_images,
#             fsize=30)

In [None]:
# df = pd.DataFrame(columns=['y'])

# # produce the individual images we are going to use for our data set in the neural network step
# for light_level, img_rgb in enumerate(masked_images):

#     # create the image subsets and name them as appropriate for location
#     cherry_0_0 = img_rgb[100:200,200:300,:]
#     cherry_0_1 = img_rgb[80:180,300:400,:]
#     cherry_0_2 = img_rgb[90:190,375:475,:]
#     cherry_0_3 = img_rgb[100:200,500:600,:]
#     cherry_0_4 = img_rgb[100:200,600:700,:]
#     cherry_0_5 = img_rgb[100:200,700:800,:]

#     cherry_1_0 = img_rgb[225:325,190:290,:]
#     cherry_1_1 = img_rgb[225:325,275:375,:]
#     cherry_1_2 = img_rgb[225:325,375:475,:]
#     cherry_1_3 = img_rgb[225:325,500:600,:]
#     cherry_1_4 = img_rgb[225:325,600:700,:]
#     cherry_1_5 = img_rgb[225:325,700:800,:]

#     cherry_2_0 = img_rgb[375:475,175:275,:]
#     cherry_2_1 = img_rgb[375:475,275:375,:]
#     cherry_2_2 = img_rgb[375:475,375:475,:]
#     cherry_2_3 = img_rgb[375:475,500:600,:]
#     cherry_2_4 = img_rgb[375:475,600:700,:]
#     cherry_2_5 = img_rgb[375:475,700:800,:]
    
#     rectangle_0 = img_rgb[525:550,350:350 + 25,:]
#     rectangle_1 = img_rgb[525:550,382:382 + 25,:]
#     rectangle_2 = img_rgb[527:552,415:415 + 25,:]
#     rectangle_3 = img_rgb[527:552,450:450 + 25,:]
#     rectangle_4 = img_rgb[528:553,484:484 + 25,:]
#     rectangle_5 = img_rgb[528:553,519:519 + 25,:]
#     rectangle_6 = img_rgb[529:554,554:554 + 25,:]
        
#     sticky_note = img_rgb[250:430,800:1000,:]

#     images = [cherry_0_0, cherry_0_1, cherry_0_2, cherry_0_3, cherry_0_4, cherry_0_5,
#               cherry_1_0, cherry_1_1, cherry_1_2, cherry_1_3, cherry_1_4, cherry_1_5,
#               cherry_2_0, cherry_2_1, cherry_2_2, cherry_2_3, cherry_2_4, cherry_2_5,
#               rectangle_0, rectangle_1, rectangle_2, rectangle_3, rectangle_4, rectangle_5,
#               rectangle_6, sticky_note]

# #     labels = ["light_color_cherry", "light_color_cherry", "light_color_cherry", "light_color_cherry", "light_color_cherry", "light_color_cherry",
# #               "moderate_color_cherry", "moderate_color_cherry", "moderate_color_cherry", "moderate_color_cherry", "moderate_color_cherry", "moderate_color_cherry",
# #               "dark_color_cherry", "dark_color_cherry", "dark_color_cherry", "dark_color_cherry", "dark_color_cherry", "dark_color_cherry",
# #               "light_color_rectangle", "light_color_rectangle", "moderate_color_rectangle", "moderate_color_rectangle", "moderate_color_rectangle", "dark_color_rectangle",
# #               "dark_color_rectangle", "sticky_notes"]

#     labels = [0, 0, 0, 0, 0, 0,
#               1, 1, 1, 1, 1, 1,
#               2, 2, 2, 2, 2, 2,
#               3, 3, 4, 4, 4, 5, 5, 6]
    
#     labels_dict = {0: "light_color_cherries",
#                   1: "moderate_color_cherries",
#                   2: "dark_color_cherries",
#                   3: "light_color_rectangles",
#                   4: "moderate_color_rectangles",
#                   5: "dark_color_rectangles",
#                   6: "sticky_notes"}
    
#     titles = ["cherry_0_0", "cherry_0_1", "cherry_0_2", "cherry_0_3", "cherry_0_4", "cherry_0_5",
#               "cherry_1_0", "cherry_1_1", "cherry_1_2", "cherry_1_3", "cherry_1_4", "cherry_1_5",
#               "cherry_2_0", "cherry_2_1", "cherry_2_2", "cherry_2_3", "cherry_2_4", "cherry_2_5",
#               "rectangle_0", "rectangle_1", "rectangle_2", "rectangle_3", "rectangle_4", "rectangle_5",
#               "rectangle_6", "sticky_note"]

    
#     # iterate through the zone of interest images
#     for i, image in enumerate(images):
                
# #         # set file name with light level and image title                       
# #         filename =  str(labels[i]) + " " + titles[i] + "_" + str(light_level) + ".jpg"
               
# #         # resize all images to same size for later use
# #         bgr_image = cv.resize(image, (100,100), interpolation = cv.INTER_AREA)
# #         bgr_image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
# #         cv.imwrite("cherries/" + filename, bgr_image)    

# #         # do your dataset creation right here. 
# #         hsv_image = cv.cvtColor(bgr_image, cv.COLOR_BGR2HSV)
        
#         # 
#         p1, p2 = np.percentile(image[:,:,0], (2, 99))
#         red_channel = exposure.rescale_intensity(image[:,:,0], in_range=(p1, p2))
#         blue_channel = exposure.rescale_intensity(image[:,:,1], in_range=(p1, p2))
#         green_channel = exposure.rescale_intensity(image[:,:,2], in_range=(p1, p2))
            
#         test_image = image.astype(np.float64)
#         r = test_image[:,:,0] / np.max(test_image[:,:,0])
#         g = test_image[:,:,1] / np.max(test_image[:,:,1])
#         b = test_image[:,:,2] / np.max(test_image[:,:,2])
        
#         #  gli, ngrdi, r_bg, rbg, tgi*, br, rg
#         rg_index_labels = ["gli", "ngrdi", "r_bg", "rbg", "tgi", "br", "rg"]
#         rg_index = [calc_index(test_image, idx) for idx in rg_index_labels]

#         # get the binary mask for this image, convert to unsigned 8-bit int
#         bin_image = get_tgi_mask(image)
#         print(type(bin_image), bin_image.dtype)
#         contours, hier = cv.findContours(bin_image, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
#         cnt = contours[0]
#         x, y, w, h = cv.boundingRect(cnt)
        
#         area = np.sum(bin_image)
#         cnt_area = cv.contourArea(cnt)
#         aspect_ratio = float(w)/h
#         rect_area = w * h
#         extent = float(cnt_area)/rect_area
#         hull = cv.convexHull(cnt)
#         hull_area = cv.contourArea(hull)
#         solidity = float(cnt_area)/hull_area
#         eq_diameter = np.sqrt(4*cnt_area/np.pi)
    
    
        
#         # try converting the images to pandas data frames, each of these channels and indices as a reshaped column. 
#         # then use pandas data frame commands to get some values
#         df_images = pd.DataFrame()
#         df_images["r_rs"] = np.ndarray.flatten(red_channel)
#         df_images["b_rs"] = np.ndarray.flatten(green_channel)
#         df_images["g_rs"] = np.ndarray.flatten(blue_channel)
#         df_images["r"] = np.ndarray.flatten(r)
#         df_images["b"] = np.ndarray.flatten(g)
#         df_images["g"] = np.ndarray.flatten(b)
#         df_images["gli"] = np.ndarray.flatten(rg_index[0])
#         df_images["ngrdi"] = np.ndarray.flatten(rg_index[1])
#         df_images["r_bg"] = np.ndarray.flatten(rg_index[2])
#         df_images["rbg"] = np.ndarray.flatten(rg_index[3])
#         df_images["tgi"] = np.ndarray.flatten(rg_index[4])
#         df_images["br"] = np.ndarray.flatten(rg_index[5])
#         df_images["rg"] = np.ndarray.flatten(rg_index[6])
               
#         df = df.append({'y' : labels[i],
#                         'mean_r_rs': df_images.r_rs[df_images.r_rs > 0].mean(),
#                         'mean_g_rs': df_images.g_rs[df_images.g_rs > 0].mean(),
#                         'mean_b_rs': df_images.b_rs[df_images.b_rs > 0].mean(),
#                         'area': area,
#                         "cnt_area": cnt_area,
# #                         "aspect_ratio": aspect_ratio,
# #                         "rect_area": rect_area,
# #                         "extent": extent,
# #                         "hull_area": hull_area, 
# #                         "solidity": solidity,
# #                         "eq_diameter": eq_diameter,
#                         'mean_r': df_images.r[df_images.r > 0].mean(),
#                         'mean_g': df_images.g[df_images.g > 0].mean(),
#                         'mean_b': df_images.b[df_images.b > 0].mean(),
#                         'gli': df_images.gli[df_images.gli < 0].mean(),
# #                         'ngrdi': df_images.ngrdi[df_images.ngrdi < 0].mean(),
#                         'r_bg': df_images.r_bg.mean(),
#                         'rbg': df_images.rbg.mean(),
#                         'tgi': df_images.tgi[df_images.tgi < 0].mean(),
#                         'br': df_images.br[df_images.br < 0].mean(),
#                         'rg': df_images.rg.mean()
#                        }, ignore_index=True)
        

#         # show the images from the last batch just for kicks
# plot_images(titles=rg_index_labels, 
#             images=rg_index,
#             fsize=30)

# for image in rg_index:
#     flat_img = np.ndarray.flatten(image)
#     print(flat_img.min(), flat_img.max())
# print(df)

    

In [None]:
# # do a wacky thing here
# # wacky_images = [exposure.equalize_hist(img[:,:,0]) for img in images]
# # wacky_images = [exposure.equalize_adapthist(img[:,:,0]) for img in images]

# test_image = cv.cvtColor(cv.imread("C:/data/BSYSE_530/machine_vision/images/Cherries/DSC_0052.jpg"), cv.COLOR_BGR2RGB).astype(np.float64)
# r = test_image[:,:,0] / np.max(test_image[:,:,0])
# g = test_image[:,:,1] / np.max(test_image[:,:,1])
# b = test_image[:,:,2] / np.max(test_image[:,:,2])


# #  gli, ngrdi, r_bg, rbg, tgi*, br, rg
# rg_index_labels = ["gli", "ngrdi", "r_bg", "rbg", "tgi", "br", "rg"]
# rg_index = [calc_index(test_image, idx) for idx in rg_index_labels]

# # show the images from the last batch just for kicks
# plot_images(titles=rg_index_labels, 
#             images=rg_index,
#             fsize=15)
