# Capstone Notebook

In [None]:
import warnings
import pandas as pd
import seaborn as sns
import os
import numpy as np
from sklearn.metrics import classification_report
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
%matplotlib inline
warnings.filterwarnings("ignore")


### As a note: I simply renamed the folder 'Capsicum' to 'Pepper' to better represent the vegetable to my audience. The rest of the original data is unaltered.

In [None]:
# create a list of our classes of images
classes = ['Bean', 'Bitter_Gourd', 'Bottle_Gourd', 'Brinjal', 'Broccoli', 'Cabbage', 'Carrot', 'Cauliflower', 'Cucumber', 'Papaya', 'Pepper', 'Potato', 'Pumpkin', 'Radish', 'Tomato']

# create labels for them
class_labels = {name:i for i, name in enumerate(classes)}

num_classes = len(classes)

image_size = (180, 180)


In [None]:
# function for loading our data

def load():
    directory = 'imgs'
    categories = ['test', 'train', 'val']
    
    output = []
    
    for category in categories:
        path = os.path.join(directory, category)
        print(f"Loading images at {path}...")
        
        images = []
        labels = []
        
        for file in os.listdir(path):
            label = class_labels[file]
            
            #iterating through each image
            for image in os.listdir(os.path.join(path, file)):
                
                #get the path name of each image
                image_path = os.path.join(os.path.join(path, file), image)
                
                #open and then resize with our parameter set earlier
                image = cv2.imread(image_path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, image_size)
                
                #append labels and images
                images.append(image)
                labels.append(label)
                
        images = np.array(images, dtype = 'float32')
        labels = np.array(labels, dtype = 'int32')
        
        output.append((images, labels))
   
    return output

In [None]:
(test_images, test_labels), (train_images, train_labels), (val_images, val_labels) = load()

In [None]:
# checking our arrays for null values
trainna = np.isnan(train_images).sum()
testna = np.isnan(test_images).sum()
valna = np.isnan(val_images).sum()

print(trainna, testna, valna)

In [None]:
# creating a base model
# these are just random layers I picked to try out for this test

base_test_1 = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (5,5), activation='relu', input_shape=(180, 180, 3)),
    tf.keras.layers.MaxPooling2D(5,5),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation=tf.nn.softmax)
])

In [None]:
#compiling our base model
base_test_1.compile(optimizer='adam', 
                    loss='sparse_categorical_crossentropy', 
                    metrics=['accuracy']) 

In [None]:
# fitting and running our CNN
base_test_1_fit = base_test_1.fit(train_images, 
                                  train_labels, 
                                  batch_size=50, 
                                  epochs=1, 
                                  validation_data=(val_images, val_labels), 
                                  shuffle=True)

around 39% accuracy for our base. not too great but it is just that - a base model. let's add more complexity to our CNN.

In [None]:
# trying some new layers to try and improve model accuracy
model_1 = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (5,5), activation='relu', input_shape=(180, 180, 3)),
    tf.keras.layers.MaxPooling2D(3,3),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(3,3),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.softmax)
])

In [None]:
model_1.compile(optimizer='adam', 
                loss='sparse_categorical_crossentropy', 
                metrics=['accuracy'])

In [None]:
model_1_fit = model_1.fit(train_images, 
                          train_labels, 
                          batch_size=50, 
                          epochs=3, 
                          validation_data=(val_images, val_labels), 
                          shuffle=True)

This is a solid improvement! let's try adding more complexity.

In [None]:
# adding EVEN MORE LAYERS
model_2 = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (2,2), activation='relu', input_shape=(180, 180, 3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (2,2), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (2,2), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(128, activation=tf.nn.softmax)
])

In [None]:
model_2.compile(optimizer='adam', 
                loss='sparse_categorical_crossentropy', 
                metrics=['accuracy'])

In [None]:
model_2_fit = model_2.fit(train_images, 
                          train_labels, 
                          batch_size=50, 
                          epochs=3, 
                          validation_data=(val_images, val_labels), 
                          shuffle=True)

In [None]:
# you guessed it
model_3 = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (2,2), activation='relu', input_shape=(180, 180, 3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (2,2), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (2,2), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (2,2), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (2,2), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(128, activation=tf.nn.softmax)
])

In [None]:
model_3.compile(optimizer='adam', 
                loss='sparse_categorical_crossentropy', 
                metrics=['accuracy'])

In [None]:
model_3_fit = model_3.fit(train_images, 
                          train_labels, 
                          batch_size=30, 
                          epochs=3, 
                          validation_data=(val_images, val_labels), 
                          shuffle=True)

As we can see, more complexity appears to help up until a certain point. Model 2 has the best performance, but let's try to tune it and see if we can push towards 99% accuracy.

In [None]:
model_4 = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (2,2), activation='relu', input_shape=(180, 180, 3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (2,2), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (2,2), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(128, activation=tf.nn.softmax)
])

In [None]:
model_4.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model_4_fit = model_4.fit(train_images, 
                          train_labels, 
                          batch_size=50, 
                          epochs=10, 
                          validation_data=(val_images, val_labels), 
                          shuffle=True)

98% accuracy for our model! wonderful!

In [None]:
def plot_acc_loss(model):
    
    fig = plt.figure(figsize=(10, 5))
    
    # plotting our accuracy
    plt.subplot(221)
    plt.plot(model.history['accuracy'],'bo--', label='accuracy')
    plt.plot(model.history['val_accuracy'],'ro--', label='validation accuracy')
    plt.title('training accuracy vs. validation accuracy')
    plt.xlabel('# of epochs')
    plt.ylabel('accuracy')
    plt.legend()
    
    # plotting our loss
    plt.subplot(222)
    plt.plot(model.history['loss'],'bo--', label='loss')
    plt.plot(model.history['val_loss'],'ro--', label='validation loss')
    plt.title('training loss vs. validation loss')
    plt.xlabel('# of epochs')
    plt.ylabel('loss')
    plt.legend()
    
    plt.show()

In [None]:
plot_acc_loss(model_4_fit)