## Import all the Dependencies

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import numpy as np
import pandas as pd
import keras
import math
import statistics
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score as ac
from sklearn.metrics import precision_score as pr
from sklearn.metrics import recall_score as rs
from sklearn.metrics import multilabel_confusion_matrix as cm
from sklearn.metrics import confusion_matrix as cms
from sklearn.metrics import roc_auc_score as auc

: 

## Set all the Constants

In [None]:
BATCH_SIZE = 64
IMAGE_SIZE = 256
CHANNELS=3
EPOCHS=20

: 

## Import data into tensorflow dataset object

In [None]:
from google.colab import drive
drive.mount('/content/drive')

: 

In [None]:
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    "/content/drive/MyDrive/Colab Notebooks/Mini Dataset ",
    seed=123,
    shuffle=True,
    image_size=(IMAGE_SIZE,IMAGE_SIZE),
    batch_size=BATCH_SIZE
)

: 

In [None]:
class_names = dataset.class_names
class_names

: 

In [None]:
"""for image_batch, labels_batch in dataset.take(1):
    print(image_batch.shape)
    print(labels_batch.numpy())"""

: 

## Visualize some of the images from our dataset

In [None]:
plt.figure(figsize=(10, 10))
for image_batch, labels_batch in dataset.take(1):
    for i in range(12):
        ax = plt.subplot(3, 4, i + 1)
        plt.imshow(image_batch[i].numpy().astype("uint8"))
        plt.title(class_names[labels_batch[i]])
        plt.axis("off")

: 

## Train Test Split

In [None]:
#Create the ten splits for the 10-fold cross validation
#dataset is where your data is stored and it should be in this format:
#dataname:
    #class one:
        #images
    #class two:
        #images:
    #class three:
        #images
datagen_train = tf.keras.preprocessing.image.ImageDataGenerator(
            validation_split=0.2)
datagen_test = tf.keras.preprocessing.image.ImageDataGenerator(
            validation_split=0.2)#two data generators(reader) that will read the data as it is, the first one is for the train,
                                #and the second one is for the test

#We need to read the images and then resave them in folders.
#The below autocreates these folders and saves them.
seeds=[1,2,3,4,5,6,7,8,9,10]
for j in range(0,10):
    os.mkdir(str(j))
    os.mkdir(str(j)+"/test")
    os.mkdir(str(j)+"/train")
    print("Train Test Splitting",j,"Commencing")
    for i, one_class in enumerate(os.listdir("/content/drive/MyDrive/Colab Notebooks/Mini Dataset ")):
        print("Test Class",one_class)
        os.mkdir(str(j)+"/test/"+one_class)
        gen = datagen_test.flow_from_directory(
                "/content/drive/MyDrive/Colab Notebooks/Mini Dataset ",
                target_size = (224, 224),
                batch_size = 1,
                classes = [one_class],
                save_to_dir = f'{j}/test/{one_class}',#this allows it to go to the path where the images will be saved, they should be created before which is why we have the chdir
                save_prefix = 'new_image',
                save_format = 'jpg',
                seed=seeds[j],#seed is to be able to reproduce the split. If we have the same seed, then the data will be split in the same way everytime
                subset="validation",#this is to say that this will be the validation or test part, which will take a 0.2 partition of the data as specified in datagen_test and datagen_train
                shuffle=True#shuffle is true to be able to create different splits
              )
        for i in range(0,len(gen)):
                gen.next()
    for i, one_class in enumerate(os.listdir("/content/drive/MyDrive/Colab Notebooks/Mini Dataset ")):
        os.mkdir(str(j)+"/train/"+one_class)
        print("Train Class",one_class)
        gen = datagen_train.flow_from_directory(
                "/content/drive/MyDrive/Colab Notebooks/Mini Dataset ",
                target_size = (224, 224),
                batch_size = 1,
                classes = [one_class],
                save_to_dir = f'{j}/train/{one_class}',
                save_prefix = 'new_image',
                save_format = 'jpg',
                seed=seeds[j],
                subset="training",#this is to say that this will be the training part, which will take a 0.2 partition of the data as specified in datagen_test and datagen_train
                shuffle=True
              )
        for i in range(0,len(gen)):
                gen.next()

: 

## Resize and Rescale

In [None]:
resize_and_rescale = tf.keras.Sequential([
  layers.experimental.preprocessing.Resizing(IMAGE_SIZE, IMAGE_SIZE),
  layers.experimental.preprocessing.Rescaling(1./255),
])

: 

## CNN Model Architecture



In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import MobileNet

def build_model():
    base_model = MobileNet(weights='imagenet', include_top=False)  # Load MobileNet as base model

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    # Add dense layers to make the model learn more complex functions
    x = Dense(1024, activation='relu')(x)  # Dense layer 1
    x = Dense(1024, activation='relu')(x)  # Dense layer 2
    x = Dense(512, activation='relu')(x)   # Dense layer 3
    preds = Dense(3, activation='softmax')(x)  # Final layer with softmax activation for 3 classes

    # Create the model instance tying together the input of the base model and the output of the final layer
    model = Model(inputs=base_model.input, outputs=preds)

    return model


: 

In [None]:
"""
def build_model():
  model = keras.Sequential([
      resize_and_rescale,
      layers.Conv2D(128, (3, 3), activation='relu', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
      layers.MaxPooling2D((2, 2)),

      layers.Conv2D(64, (3, 3), activation='relu'),
      layers.MaxPooling2D((2, 2)),

      layers.Conv2D(32, (3, 3), activation='relu'),
      layers.MaxPooling2D((2, 2)),

      layers.Flatten(),
      layers.Dropout(0.5),  # Dropout to reduce overfitting
      layers.Dense(512, activation='relu'),
      layers.Dense(3, activation='softmax')  # 3 classes: rusty_leaf, miner_leaf, healthy_leaf
  ])
  return model

"""


: 

## 10 10-fold Cross-Validation

In [None]:
datagen_train = tf.keras.preprocessing.image.ImageDataGenerator(
            rescale=1./255,
            validation_split=0.2,
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip=True,
            fill_mode='nearest')

: 

In [None]:

datagen_val = tf.keras.preprocessing.image.ImageDataGenerator(
            rescale=1./255,
            validation_split=0.2)

: 

In [None]:
datagen_test = tf.keras.preprocessing.image.ImageDataGenerator(
            rescale=1./255)
#3 different ImageDataGenerator, the first one is for the train data, the second one
                                     #is for the validation data, and the third one is for the test.

total_evaluate_histories=[]
total_models=[]
total_histories=[]#all of the histories of all cross-validations in a way that [[histories of first crossvalidation],[],[],[],...]
#same thing for total_models and total_evaluate_histories


: 

In [None]:
os.mkdir("Histories")
os.mkdir("Evaluate_Histories")
for i in range(0,10):
    os.mkdir("Histories/"+str(i))
    os.mkdir("Evaluate_Histories/"+str(i))


: 

In [None]:
import json

for j in range(10): #We already split the data 10 different ways and save them into folders, now we will have to read each split
#as either train, which will be automatically recorded as train and validation
    print("train test split", j)
    models=[]#save the models
    histories=[]#save the histories of the fitting
    evaluate_histories=[]#save the histories of the evaluation
    seeds=[1,2,3,4,5,6,7,8,9,10]
    train_root="./"+str(j)+"/train"
    test_root="./"+str(j)+"/test"
    test_generator = datagen_test.flow_from_directory(
            test_root,
            target_size=(IMAGE_SIZE, IMAGE_SIZE),
            batch_size=BATCH_SIZE,
            shuffle=True
            )
    for i in range(0,10):#Cross_Validation, doing this 10 times for 10-fold
      if os.listdir("Histories/"+str(j)).count('histories_'+str(i))==0:
          print("train test split", j, i,"th fold")
          seed=seeds[i]#Setting the seed so that the result is reproducible as well as to have a perfect split between the train and validation
          train_generator = datagen_train.flow_from_directory(
              train_root,
              seed=seed,
              target_size=(IMAGE_SIZE, IMAGE_SIZE),
              batch_size=BATCH_SIZE,
              shuffle=True,
              subset='training')#reading the training data
          val_generator = datagen_val.flow_from_directory(
              train_root,
              seed=seed,
              target_size=(IMAGE_SIZE, IMAGE_SIZE),
              batch_size=BATCH_SIZE,
              shuffle=True,
              subset='validation')
          if j>0:
            with open("Histories/"+str(j)+'/histories_'+str(i-1), encoding='utf-8') as json_file:
                histories = json.load(json_file)
            with open("Evaluate_Histories/"+str(j)+'/evaluate_histories_'+str(i-1), encoding='utf-8') as json_file:
                evaluate_histories = json.load(json_file)
          model=build_model()
          #compile the model, train it, and then reset it
          model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
          history=model.fit(train_generator, validation_data=val_generator, epochs=20,verbose=1,steps_per_epoch=train_generator.samples//train_generator.batch_size,validation_steps=val_generator.samples//val_generator.batch_size)#fitting the data
          histories.append(history.history)
          with open('histories_'+str(j), 'w') as fout:
              json.dump(histories, fout)
          models.append(model)
          evaluate_history=model.evaluate(test_generator,steps=test_generator.samples // test_generator.batch_size)#evaluating
          evaluate_histories.append(evaluate_history)
          with open('evaluate_histories_'+str(j), 'w') as fout:
              json.dump(evaluate_histories, fout)
          model.reset_states()#reset the model
    total_models.append(models)
    total_histories.append(histories)

: 

## Train the Model

In [None]:
datagen_all = tf.keras.preprocessing.image.ImageDataGenerator(
            rescale=1./255)

: 

In [None]:
model_generator = datagen_all.flow_from_directory(
            "/content/drive/MyDrive/Colab Notebooks/Mini Dataset ",
            target_size=(IMAGE_SIZE, IMAGE_SIZE),
            batch_size=BATCH_SIZE,
            )

: 

In [None]:
model=build_model()

#compile the model, train it, and then reset it
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

: 

In [None]:
history = model.fit(
    model_generator,
    batch_size=BATCH_SIZE,
    verbose=1,
    epochs=EPOCHS
)

: 

## Plotting the Accuracy and Loss Curves

In [None]:
history

: 

In [None]:
history.params

: 

In [None]:
history.history.keys()

: 

In [None]:
type(history.history['loss'])

: 

In [None]:
len(history.history['loss'])

: 

In [None]:
history.history['loss'][:5] # show loss for first 5 epochs

: 

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

: 

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(range(EPOCHS), acc, label='Training Accuracy')
plt.plot(range(EPOCHS), val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(range(EPOCHS), loss, label='Training Loss')
plt.plot(range(EPOCHS), val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

: 

## Run prediction on a sample image

In [None]:
import numpy as np
for images_batch, labels_batch in test_ds.take(1):

    first_image = images_batch[0].numpy().astype('uint8')
    first_label = labels_batch[0].numpy()

    print("first image to predict")
    plt.imshow(first_image)
    print("actual label:",class_names[first_label])

    batch_prediction = model.predict(images_batch)
    print("predicted label:",class_names[np.argmax(batch_prediction[0])])

: 

In [None]:
def predict(model, img):
    img_array = tf.keras.preprocessing.image.img_to_array(images[i].numpy())
    img_array = tf.expand_dims(img_array, 0)

    predictions = model.predict(img_array)

    predicted_class = class_names[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    return predicted_class, confidence

: 

In [None]:
plt.figure(figsize=(15, 15))
for images, labels in test_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))

        predicted_class, confidence = predict(model, images[i].numpy())
        actual_class = class_names[labels[i]]

        plt.title(f"Actual: {actual_class},\n Predicted: {predicted_class}.\n Confidence: {confidence}%")

        plt.axis("off")

: 

## Saved Model

In [None]:
import os
model_version=max([int(i) for i in os.listdir("../saved_models") + [0]])+1
model.save(f"../models/{model_version}")

: 