Varroa Mite Detection 1.2

In [2]:
# Cell Block 1: Importing Libraries
import os
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
import numpy as np
from tensorflow.keras import layers, models

In [3]:
# Cell Block 2: Load Data and Preprocess

df = pd.read_csv('labels.csv')

df['filename'] = df['filename'].apply(lambda fn: os.path.join('images', fn.strip()))

filepaths = df['filename'].values
labels = df['has_mite'].values

In [4]:
# Cell Block 3: K-Fold Cross-Validation
from tensorflow.keras.callbacks import EarlyStopping

# define it once (you can tweak patience, monitor, etc. as you like)
earlystop = EarlyStopping(
    monitor='val_loss',        # watch validation loss
    patience=3,               # decreased patience from 5 to 3
    min_delta=1e-4,            # add a minimum delta to reduce sensitivity to small changes and floating point error
    restore_best_weights=True  # roll back to the best weights seen
)


k = 10
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

fold = 1
all_test_accuracies = []

for trainval_index, test_index in skf.split(filepaths, labels):
    print(f"\n🧪 Fold {fold} -----------------------------")

    # Split into trainval and test
    X_trainval, X_test = filepaths[trainval_index], filepaths[test_index]
    y_trainval, y_test = labels[trainval_index], labels[test_index]

    # Further split trainval into train and val (e.g. 80/20)
    val_split = int(0.8 * len(X_trainval))
    X_train, X_val = X_trainval[:val_split], X_trainval[val_split:]
    y_train, y_val = y_trainval[:val_split], y_trainval[val_split:]


    def preprocess(paths, labels):
        ds = tf.data.Dataset.from_tensor_slices((paths, labels))

        def load_img(path, label):
            img = tf.io.read_file(path)
            img = tf.image.decode_jpeg(img, channels=3)
            img = tf.image.resize(img, [224, 224])
            img = img / 255.0
            return img, label

        return ds.map(load_img).batch(32).prefetch(tf.data.AUTOTUNE)

    train_ds = preprocess(X_train, y_train)
    val_ds = preprocess(X_val, y_val)
    test_ds = preprocess(X_test, y_test)


    # Build a fresh model for each fold
    def create_model():
        model = models.Sequential([
            layers.Input(shape=(224, 224, 3)),
            layers.Conv2D(32, 3, activation='relu'),
            layers.MaxPooling2D(),
            layers.Conv2D(64, 3, activation='relu'),
            layers.MaxPooling2D(),
            layers.Conv2D(128, 3, activation='relu'),
            layers.MaxPooling2D(),
            layers.Flatten(),
            layers.Dense(64, activation='relu'),
            layers.Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model

    model = create_model()
    # # Train
    # model.fit(train_ds, validation_data=val_ds, epochs=100, verbose=1)

    # Train with early stopping
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=100,            # generous upper bound
        callbacks=[earlystop], # ← here!
        verbose=1
    )


    # Evaluate on test set
    test_loss, test_acc = model.evaluate(test_ds)
    all_test_accuracies.append(test_acc)
    print(f"✅ Fold {fold} test accuracy: {test_acc:.4f}")


    fold += 1


🧪 Fold 1 -----------------------------
Epoch 1/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 438ms/step - accuracy: 0.5788 - loss: 1.0416 - val_accuracy: 0.5839 - val_loss: 0.6897
Epoch 2/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 421ms/step - accuracy: 0.6290 - loss: 0.6774 - val_accuracy: 0.5839 - val_loss: 0.6818
Epoch 3/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 444ms/step - accuracy: 0.6290 - loss: 0.6804 - val_accuracy: 0.5839 - val_loss: 0.6798
Epoch 4/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 405ms/step - accuracy: 0.6306 - loss: 0.6723 - val_accuracy: 0.5839 - val_loss: 0.6800
Epoch 5/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 393ms/step - accuracy: 0.6290 - loss: 0.6643 - val_accuracy: 0.5839 - val_loss: 0.6816
Epoch 6/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 392ms/step - accuracy: 0.6290 - loss: 0.6694 - val_accuracy: 0.5839 -

In [5]:
# Cell Block 4: Final Evaluation
mean_acc = np.mean(all_test_accuracies)
std_acc = np.std(all_test_accuracies)
print(f"\n📊 {k}-Fold Test Accuracy: {mean_acc:.4f} ± {std_acc:.4f}")


📊 10-Fold Test Accuracy: 0.6280 ± 0.0522


Add Precision, Make a Confusion Matrix
