Varroa Mite Detection

In [None]:
# Cell 0: Installation
%pip uninstall -y keras
%pip install --upgrade tensorflow

Found existing installation: keras 3.9.2
Uninstalling keras-3.9.2:
  Successfully uninstalled keras-3.9.2
Note: you may need to restart the kernel to use updated packages.
^C
Note: you may need to restart the kernel to use updated packages.


Collecting keras>=3.5.0 (from tensorflow)
  Using cached keras-3.9.2-py3-none-any.whl.metadata (6.1 kB)
Using cached keras-3.9.2-py3-none-any.whl (1.3 MB)
Installing collected packages: keras
Successfully installed keras-3.9.2


In [2]:
# Cell 1: Imports & global parameters

import os
import numpy as np
import pandas as pd
import tensorflow as tf
# import keras

from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Speed up tf.data pipelines
AUTOTUNE = tf.data.experimental.AUTOTUNE

# Adjustable parameters
img_height = 224
img_width  = 224
batch_size = 32
epochs     = 10
num_folds  = 5

# Your data locations
data_dir   = 'images'       # folder containing all bee jpgs


# Cell 2: Check TensorFlow and Keras versions

print("TensorFlow version:", tf.__version__)
print("Keras version: ", tf.keras.__version__) 







TensorFlow version: 2.19.0
Keras version:  3.9.2


In [3]:
# Cell 2: Load labels & preprocess

df = pd.read_csv('labels.csv')

filepaths = df['filename'].values
has_mite = df['has_mite'].values

# Convert boolean to int if needed
df['has_mite'] = df['has_mite'].astype(int)

df.head()

Unnamed: 0,filename,has_mite
0,2017-08-28_16-32-55_30_sec-mp4-bee_id_6686-900...,1
1,2017-08-30_15-42-59-mp4-bee_id_9232-4830-1_png...,0
2,2017-10-17_16-41-10-mp4-bee_id_6940-2115-1_png...,0
3,2017-10-17_1-39-36-mp4-bee_id_3468-105-1_png.r...,0
4,2017-08-30_15-42-59-mp4-bee_id_9226-4545-1_png...,0


In [4]:
# Cell 3: Model‐building function

def create_model(input_shape=(img_height, img_width, 3)):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Rescaling(1./255, input_shape=input_shape),
        tf.keras.layers.Conv2D(32, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(64, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(128, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

In [5]:
# Cell 4: K‑Fold Cross‑Validation loop

kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
fold_no = 1
results = []

for train_val_idx, test_idx in kf.split(df):
    # Split into train+val and test
    train_val_df = df.iloc[train_val_idx]
    test_df      = df.iloc[test_idx]
    
    # Further split train_val into train & validation (20% val)
    train_df, val_df = train_test_split(
        train_val_df,
        test_size=0.2,
        stratify=train_val_df['has_mite'],
        random_state=42
    )
    
    # Create tf.data Datasets from each DataFrame
    train_ds = tf.keras.utils.image_dataset_from_dataframe(
        train_df,
        directory=data_dir,
        x_col='filename',
        y_col='has_mite',
        label_mode='binary',
        image_size=(img_height, img_width),
        batch_size=batch_size,
        shuffle=True
    ).prefetch(AUTOTUNE)
    
    val_ds = tf.keras.utils.image_dataset_from_dataframe(
        val_df,
        directory=data_dir,
        x_col='filename',
        y_col='has_mite',
        label_mode='binary',
        image_size=(img_height, img_width),
        batch_size=batch_size,
        shuffle=False
    ).prefetch(AUTOTUNE)
    
    test_ds = tf.keras.utils.image_dataset_from_dataframe(
        test_df,
        directory=data_dir,
        x_col='filename',
        y_col='has_mite',
        label_mode='binary',
        image_size=(img_height, img_width),
        batch_size=batch_size,
        shuffle=False
    ).prefetch(AUTOTUNE)
    
    # Build and train
    model = create_model()
    print(f"\n=== Training fold {fold_no}/{num_folds} ===")
    model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epochs,
        verbose=1
    )
    
    # Evaluate on test set
    test_loss, test_acc = model.evaluate(test_ds, verbose=0)
    
    # Gather predictions for sklearn metrics
    y_true = np.concatenate([y.numpy() for x, y in test_ds], axis=0)
    y_prob = model.predict(test_ds).flatten()
    y_pred = (y_prob > 0.5).astype(int)
    
    # Compute metrics
    acc  = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec  = recall_score(y_true, y_pred, zero_division=0)
    f1   = f1_score(y_true, y_pred, zero_division=0)
    
    results.append({
        'fold': fold_no,
        'test_loss': test_loss,
        'test_accuracy': test_acc,
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1_score': f1
    })
    
    fold_no += 1


AttributeError: module 'keras.api.utils' has no attribute 'image_dataset_from_dataframe'

In [None]:
# Cell 5: Summarize & display metrics

results_df = pd.DataFrame(results)
print("Per‑fold results:")
display(results_df)

print("\nAverage across folds:")
display(results_df.mean().to_frame().T.round(4))

Per‑fold results:



Average across folds:


0
