# Modelling and Evaluating for Mildew Detection in Cherry Leaves

## Objectives
* Develop a binary classification model to distinguish between healthy and powdery mildew-infected cherry leaves.
* Evaluate the model's performance and optimize it to meet or exceed the project accuracy goal.

## Inputs
* Preprocessed and possibly augmented image data ready for model training.

## Outputs
* Trained machine learning model.
* Evaluation reports detailing the model's performance on test data.

## Additional Comments
* Focus on achieving the balance between model complexity and performance to avoid overfitting.

---

# Import packages

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import joblib

## Set working directory

In [None]:
current_dir = os.getcwd()
print("Original working directory:", current_dir)

### Change the current working directory to the project root

In [None]:
relative_path_to_root = '..'
os.chdir(os.path.abspath(os.path.join(current_dir, relative_path_to_root)))

# Verify the change
print("New current working directory:", os.getcwd())

In [None]:

work_dir = os.getcwd()
work_dir

## Define Input and Output Directories

In [None]:
base_path = "inputs/cherry_leaves_dataset/cherry-leaves"
train_path = os.path.join(base_path, 'train')
validation_path = os.path.join(base_path, 'validation')
test_path = os.path.join(base_path, 'test')

### Define where to save the outputs

In [None]:
output_dir = 'outputs/model_outputs'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

### Load Image Shape Embeddings

In [None]:
image_shape = joblib.load('outputs/data_visualization/v1/average_image_shape.pkl')

---

# Image Data Augmentation

### ImageDataGenerator

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

* ### Augment training image dataset

In [None]:
train_generator = train_datagen.flow_from_directory(
    train_path,
    target_size=image_shape[:2],
    batch_size=32,
    class_mode='binary'
)

* ### Augment validation image dataset

In [None]:
validation_generator = validation_datagen.flow_from_directory(
    validation_path,
    target_size=image_shape[:2],
    batch_size=32,
    class_mode='binary'
)

* ### Augment test image dataset

In [None]:
test_generator = test_datagen.flow_from_directory(
    test_path,
    target_size=image_shape[:2],
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

---

## Plot augmented images

In [None]:
def plot_augmented_images(datagen, directory_path, image_shape, batch_size=10, rows=2, cols=5):
    """
    Plot augmented images from a specified directory.
    """
    generator = datagen.flow_from_directory(
        directory=directory_path,
        target_size=image_shape[:2],
        batch_size=batch_size,
        class_mode='binary',
        shuffle=True
    )
    
    # Get a batch of images
    images, labels = next(generator)
    
    # Plot the images
    fig, axes = plt.subplots(rows, cols, figsize=(2 * cols, 2 * rows))
    for i, ax in enumerate(axes.flat):
        if i < batch_size:
            ax.imshow(images[i])
            ax.axis('off')
            ax.set_title(f"Label: {'Healthy' if labels[i] < 0.5 else 'Powdery Mildew'}")
    plt.tight_layout()
    plt.show()


* ### Plot augmented training images

In [None]:
plot_augmented_images(
    datagen=train_datagen,
    directory_path=train_path,
    image_shape=image_shape,
    batch_size=10
)

* ### Plot augmented validation images

In [None]:
plot_augmented_images(
    datagen=validation_datagen,
    directory_path=validation_path,
    image_shape=image_shape,
    batch_size=10
)

* ### Plot augmented test images

In [None]:
plot_augmented_images(
    datagen=test_datagen,
    directory_path=test_path,
    image_shape=image_shape,
    batch_size=10
)

---

# Model creation

## ML model

In [None]:
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=image_shape),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


* ### Model Summary 

In [None]:
model.summary()

* ### Early Stopping 

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=3)

## Fit Model For Training

In [None]:
history = model.fit(
    train_generator,
    epochs=4,
    validation_data=validation_generator,
    callbacks=[early_stop]
)

## Save Model

In [None]:
model_path = os.path.join(output_dir, 'mildew_detector_model.h5')
model.save(model_path)

---

# Model Performace

## Model Performance Visualization

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0, 1])
plt.legend(loc='lower right')
plt.savefig(os.path.join(output_dir, 'model_accuracy.png'))
plt.show()

Evaluate model on test set

In [None]:
results = model.evaluate(test_generator)
print(f"Test Loss: {results[0]}, Test Accuracy: {results[1]}")

## Save evaluation data

In [None]:
results_path = os.path.join(output_dir, 'test_results.pkl')
joblib.dump(results, results_path)

---

## Predict on new data

In [None]:
import os
import random
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt

def predict_random_image_from_final_test():
    # Define the path to the final_test directory
    final_test_path = 'inputs/cherry_leaves_dataset/cherry-leaves/final_test'
    
    # List all files in the final_test directory
    test_images = os.listdir(final_test_path)
    
    # Randomly select an image
    random_image_name = random.choice(test_images)
    
    # Construct the full path to the image
    image_path = os.path.join(final_test_path, random_image_name)
    
    # Load and preprocess the image
    img = image.load_img(image_path, target_size=image_shape[:2])
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

Make a prediction

In [None]:
    prediction = model.predict(img_array)

Determine the predicted class based on the prediction probability

In [None]:
    predicted_class = "Powdery Mildew" if prediction[0] > 0.5 else "Healthy"

Display the image and the prediction

In [None]:
    plt.imshow(img)
        plt.title(f"Predicted: {predicted_class}\n{random_image_name}")
        plt.axis('off')
        plt.show()

Run the function to predict and display a random image from final_test

In [None]:
predict_random_image_from_final_test()

---