# Modeling and Evaluation
This notebook contains model training and evaluation. All generated models, logs, and results will be saved in the '../out/modeling/' directory.

In [1]:
# Change working directory to the notebook's parent directory
import os
os.chdir(os.path.dirname(os.path.abspath("../jupyter_notebooks/ModelingandEvaluation.ipynb")))
print(f"Current working directory: {os.getcwd()}")

Current working directory: /workspaces/mildew-detector/jupyter_notebooks


In [2]:
import os
# Ensure output directory exists
output_dir = '../out/modeling'
os.makedirs(output_dir, exist_ok=True)
print(f'Modeling output directory: {output_dir}')

Modeling output directory: ../out/modeling


## Load Split Data and Prepare Data Generators
Load the split images from the out/split/train, out/split/val, and out/split/test directories and create Keras ImageDataGenerators for training, validation, and testing.

In [3]:
# Set up split data directories
split_base = '../data/split'
train_dir = os.path.join(split_base, 'healthy/train'), os.path.join(split_base, 'powdery_mildew/train')
val_dir = os.path.join(split_base, 'healthy/val'), os.path.join(split_base, 'powdery_mildew/val')
test_dir = os.path.join(split_base, 'healthy/test'), os.path.join(split_base, 'powdery_mildew/test')
print('Train dirs:', train_dir)
print('Val dirs:', val_dir)
print('Test dirs:', test_dir)

# For Keras generators, we use the parent split_base directory


Train dirs: ('../data/split/healthy/train', '../data/split/powdery_mildew/train')
Val dirs: ('../data/split/healthy/val', '../data/split/powdery_mildew/val')
Test dirs: ('../data/split/healthy/test', '../data/split/powdery_mildew/test')


### Data Generators for Model Training, Validation, and Testing
The following code creates Keras `ImageDataGenerator` objects for the train, validation, and test splits. These generators will efficiently load and preprocess images from the split dataset folders, applying normalization and batching for model training and evaluation.

In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = (224, 224)
batch_size = 18

datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_directory(
    directory=split_base,
    target_size=img_size,
    batch_size=batch_size,
    classes=['healthy', 'powdery_mildew'],
    class_mode='binary',
    subset=None,
    shuffle=True,
    seed=42,
    color_mode='rgb',
)

val_generator = datagen.flow_from_directory(
    directory=split_base,
    target_size=img_size,
    batch_size=batch_size,
    classes=['healthy', 'powdery_mildew'],
    class_mode='binary',
    subset=None,
    shuffle=False,
    seed=42,
    color_mode='rgb',
)

test_generator = datagen.flow_from_directory(
    directory=split_base,
    target_size=img_size,
    batch_size=batch_size,
    classes=['healthy', 'powdery_mildew'],
    class_mode='binary',
    subset=None,
    shuffle=False,
    seed=42,
    color_mode='rgb',
)


2025-07-20 16:10:52.092707: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-20 16:10:53.059200: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Found 6144 images belonging to 2 classes.
Found 6144 images belonging to 2 classes.
Found 6144 images belonging to 2 classes.
Found 6144 images belonging to 2 classes.
Found 6144 images belonging to 2 classes.


## Define and Compile the Model
Set up a simple convolutional neural network (CNN) for binary classification of healthy vs. powdery mildew leaves.

In [5]:
from tensorflow.keras import layers, models, optimizers

model = models.Sequential([
    layers.Input(shape=(224, 224, 3)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

## Train the Model
Train the CNN model using the training and validation data generators. Model checkpoints and early stopping are used to save the best model and prevent overfitting.

In [7]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

checkpoint_path = os.path.join(output_dir, 'best_model.keras')
checkpoint = ModelCheckpoint(
    checkpoint_path, monitor='val_loss', save_best_only=True, verbose=1
)
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)

history = model.fit(
    train_generator,
    epochs=30,
    validation_data=val_generator,
    callbacks=[checkpoint, early_stop]
)

Epoch 1/30


  self._warn_if_super_not_called()


[1m342/342[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 541ms/step - accuracy: 0.8197 - loss: 0.3714
Epoch 1: val_loss improved from inf to 0.02808, saving model to ../out/modeling/best_model.keras

Epoch 1: val_loss improved from inf to 0.02808, saving model to ../out/modeling/best_model.keras
[1m342/342[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 784ms/step - accuracy: 0.8200 - loss: 0.3709 - val_accuracy: 0.9930 - val_loss: 0.0281
Epoch 2/30
[1m342/342[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 784ms/step - accuracy: 0.8200 - loss: 0.3709 - val_accuracy: 0.9930 - val_loss: 0.0281
Epoch 2/30
[1m342/342[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 534ms/step - accuracy: 0.9924 - loss: 0.0326
Epoch 2: val_loss improved from 0.02808 to 0.00822, saving model to ../out/modeling/best_model.keras

Epoch 2: val_loss improved from 0.02808 to 0.00822, saving model to ../out/modeling/best_model.keras
[1m342/342[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3