Varroa Mite Detection 1.3

In [1]:
# Cell Block 1: Importing Libraries
import os
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold, train_test_split
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
# Cell Block 2: Load Data and Preprocess

df = pd.read_csv('labels.csv')

df['filename'] = df['filename'].apply(lambda fn: os.path.join('images', fn.strip()))

filepaths = df['filename'].values
labels = df['has_mite'].values

In [6]:
# Cell Block 3: K-Fold Cross-Validation

# Early stopping callback
earlystop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    min_delta=1e-4,
    restore_best_weights=True
)

k = 5  # Number of folds
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

fold = 1
all_test_accuracies = []
best_epochs = []

# Model creation function
def create_model():
    model = models.Sequential([
        layers.Input(shape=(224, 224, 3)),
        layers.Conv2D(32, 3, activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(64, 3, activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(128, 3, activation='relu'),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Preprocessing function
def preprocess(paths, labels):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))

    def load_img(path, label):
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, [224, 224])
        img = img / 255.0
        return img, label

    return ds.map(load_img).batch(32).prefetch(tf.data.AUTOTUNE)

for trainval_index, test_index in skf.split(filepaths, labels):
    print(f"\n🧪 Fold {fold} -----------------------------")

    # Split into trainval and test
    X_trainval, X_test = filepaths[trainval_index], filepaths[test_index]
    y_trainval, y_test = labels[trainval_index], labels[test_index]

    # Further split trainval into train and val (e.g. 80/20)
    val_split = int(0.8 * len(X_trainval))
    X_train, X_val = X_trainval[:val_split], X_trainval[val_split:]
    y_train, y_val = y_trainval[:val_split], y_trainval[val_split:]


    # Preprocess each split
    train_ds = preprocess(X_train, y_train)
    val_ds = preprocess(X_val, y_val)
    test_ds = preprocess(X_test, y_test)



    # Train model with early stopping
    model = create_model()
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=100,
        callbacks=[earlystop],
        verbose=1
    )

    # Record best epoch (smallest val_loss)
    best_epoch = np.argmin(history.history['val_loss']) + 1
    best_epochs.append(best_epoch)
    print(f"🏁 Best epoch for Fold {fold}: {best_epoch}")

    # Evaluate on test set
    test_loss, test_acc = model.evaluate(test_ds, verbose=0)
    all_test_accuracies.append(test_acc)
    print(f"✅ Fold {fold} test accuracy: {test_acc:.4f}")

    fold += 1

# Calculate mean best epoch
ep_mean = int(np.round(np.mean(best_epochs)))
print(f"\n📌 Mean optimal epoch across folds: {ep_mean}")


🧪 Fold 1 -----------------------------
Epoch 1/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 441ms/step - accuracy: 0.5865 - loss: 1.0372 - val_accuracy: 0.6061 - val_loss: 0.6788
Epoch 2/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 409ms/step - accuracy: 0.6189 - loss: 0.6779 - val_accuracy: 0.6061 - val_loss: 0.6834
Epoch 3/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 422ms/step - accuracy: 0.6189 - loss: 0.6788 - val_accuracy: 0.6061 - val_loss: 0.6721
Epoch 4/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 410ms/step - accuracy: 0.6189 - loss: 0.6712 - val_accuracy: 0.6061 - val_loss: 0.6747
Epoch 5/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 418ms/step - accuracy: 0.6189 - loss: 0.6692 - val_accuracy: 0.6061 - val_loss: 0.6720
Epoch 6/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 431ms/step - accuracy: 0.6189 - loss: 0.6696 - val_accuracy: 0.6061 -

In [7]:
# Cell Block 4: Final Model Training on data again, but for ep_mean Epochs (no validation needed)

## NOTE: THIS IS NOT TESTING ON HIDDEN DATA - IT WAS TECHNICALLY SEEN IN THE k-FOLD.
## CAN TALK ABOUT THIS TO AYUSHMAN

# Split all data into 80% train and 20% test
X_train_final, X_test_final, y_train_final, y_test_final = train_test_split(
    filepaths, labels, test_size=0.2, stratify=labels, random_state=42
)

# Use your same preprocess function
train_final_ds = preprocess(X_train_final, y_train_final)
test_final_ds = preprocess(X_test_final, y_test_final)

# Create and train model with ep_mean
final_model = create_model()
final_model.fit(
    train_final_ds,
    epochs=ep_mean,
    verbose=1
)


Epoch 1/7
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 402ms/step - accuracy: 0.5324 - loss: 0.8620
Epoch 2/7
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 394ms/step - accuracy: 0.6309 - loss: 0.6829
Epoch 3/7
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 383ms/step - accuracy: 0.6125 - loss: 0.6782
Epoch 4/7
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 381ms/step - accuracy: 0.6309 - loss: 0.6915
Epoch 5/7
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 387ms/step - accuracy: 0.6309 - loss: 0.6899
Epoch 6/7
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 386ms/step - accuracy: 0.6309 - loss: 0.6882
Epoch 7/7
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 384ms/step - accuracy: 0.6309 - loss: 0.6866


<keras.src.callbacks.history.History at 0x12d09784bc0>

In [8]:
# Cell Block 5: Final Evaluation

# Evaluate final model on the 20% test set
final_test_loss, final_test_acc = final_model.evaluate(test_final_ds, verbose=0)

# Report both results
print(f"\n📊 {k}-Fold Test Accuracy: {np.mean(all_test_accuracies):.4f} ± {np.std(all_test_accuracies):.4f}")
print(f"🏁 Final model test accuracy (trained on 80% for {ep_mean} epochs): {final_test_acc:.4f}")


📊 5-Fold Test Accuracy: 0.6364 ± 0.0360
🏁 Final model test accuracy (trained on 80% for 7 epochs): 0.6000
