In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd 
import numpy as np 
import skimage
import scipy as sp
import time
import cv2
import os


from subprocess import check_output
from sklearn.feature_extraction.image import grid_to_graph
from sklearn.cluster import AgglomerativeClustering
from sklearn.utils.testing import SkipTest
from sklearn.utils.fixes import sp_version
from skimage import transform
from tensorflow import keras
from scipy import misc
from PIL import Image, ImageOps
from glob import glob

In [None]:
CLASSES = check_output(["ls","../input/train"]).decode("utf8").strip().split("\n")
NUM_CLASSES = len(CLASSES)

In [None]:
data_dir = '../input/'
train_dat = os.path.join(data_dir, 'train')
test_dat = os.path.join(data_dir, 'test')
sample_dat = pd.read_csv(os.path.join(data_dir,'sample_submission.csv'))

In [None]:
dir_list = []
for c in CLASSES:
    files = check_output(["ls", "../input/train/%s" % c]).decode("utf8").strip().split("\n")
    dir_list.append(files)

df = pd.DataFrame({"n_images": [len(x) for x in dir_list]},
                  index=CLASSES).sort_values(['n_images'],
                                             ascending=False,
                                             kind='mergesort')

In [None]:
train = []
for category_id, category in enumerate(CLASSES):
    for file in os.listdir(os.path.join(train_dat,category)):
        train.append(['train/{}/{}'.format(category,file),category_id,category])
train = pd.DataFrame(train,columns=['file','category_id','category'])

In [None]:
images = {}
for class_folder_name in os.listdir(train_dat):
    class_folder_path = os.path.join(train_dat, class_folder_name)
    class_label = class_folder_name
    images[class_label] = []
    for image_path in glob(os.path.join(class_folder_path, "*.png")):
        image_rgb = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
        images[class_label].append(image_bgr)

In [None]:
for key,value in images.items():
    print("{0} -> {1}".format(key,len(value)))
num_cats = len(images)
classes = [ctype for ctype in images]

In [None]:
def plot_for_class(label):
    nb_rows = 3
    nb_cols = 3
    fig, axs = plt.subplots(nb_rows, nb_cols, figsize=(6,6))
    
    n = 0
    for i in range(0, nb_rows):
        for j in range(0,nb_cols):
            axs[i, j].xaxis.set_ticklabels([])
            axs[i, j].yaxis.set_ticklabels([])
            axs[i, j].imshow(images[label][n])
            n += 1

In [None]:
def create_mask_for_plant(image):
    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    sensitivity = 35
    lower_hsv = np.array([60 - sensitivity, 100, 50])
    upper_hsv = np.array([60 + sensitivity, 255, 255])
    
    mask = cv2.inRange(image_hsv, lower_hsv, upper_hsv)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    
    return mask

def segment_plant(image):
    mask = create_mask_for_plant(image)
    output = cv2.bitwise_and(image, image, mask = mask)
    return output

def sharpen_image(image):
    image_blurred = cv2.GaussianBlur(image, (0,0) ,3)
    image_sharp = cv2.addWeighted(image, 1.5, image_blurred, -0.5, 0)
    return image_sharp

def deskew(image):
    m = cv2.moments(image)
    if abs(m['mu02']) < 1e-2:
        return image.copy()
    skew = m['mu11']/m['mu02']
    M = np.float32([[1,skew,-0.5*SZ*skew], [0,1,0]])
    image = cv2.warpAffine(img, M, (SZ,SZ), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)
    return img

In [None]:
def find_contours(mask_image):
    return cv2.findContours(mask_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]

def calculate_largest_contour_area(contours):
    if len(contours) == 0:
        return 0
    c = max(contours, key=cv2.contourArea)
    return cv2.contourArea(c)

def calculate_contours_area(contours, min_contour_area = 250):
    area = 0
    for c in contours:
        c_area = cv2.contourArea(c)
        if c_area >= min_contour_area:
            area += c_area
    return area

In [None]:
areas = []
largest_contour_area = []
labels = []
nb_of_contours = []

for classes in images.keys():
    for image in images[classes]:
        mask = create_mask_for_plant(image)
        contours = find_contours(mask)
        
        area = calculate_contours_area(contours)
        largest_area = calculate_largest_contour_area(contours)
        
        areas.append(area)
        nb_of_contours.append(len(contours))
        largest_contour_area.append(largest_area)
        labels.append(classes)

In [None]:
features_df = pd.DataFrame()
features_df["label"] = labels
features_df["area"] = areas
features_df["largest_area"] = largest_contour_area
features_df["number_of_components"] = nb_of_contours

In [None]:
features_df.groupby("label").describe()