
# YOLOv8 Binary Object Detection and Cropping Pipeline



## Instructions on How to use this notebook:
1. Click on Runtime -> Change runtime type -> Click t4 GPU (this is the free option if you have paid subscription feel free to use other GPU)

2. Next Use the side to upload your image file

3. Replace path below with your image path. To do this use sidebar and right click on image file name ans select copy path. Paste this into `org_image` variable in the cell below and run the cell

In [22]:
org_img = "/content/W5_20220509_2_ST220182_A_JPG.rf.e306f0dc23897e2094b74be4951e684c.jpg"

Run the cell below to download the requirements and  model weights and install all required modules

In [2]:
!pip install gdown
!wget --no-check-certificate "https://drive.google.com/uc?export=download&id=1VAd8ZZJOVgIyhvx2uWxMaXBxJveJkddS" -O requirements.txt
!gdown 1eul2LTjjFX4ye3QXn2l4fNP6Cz1ltUIx -O cls_model.keras
!gdown 1IdQXwGsizccY9TSPiL2dMmVFUAZ58NRr -O detect_model.pt
!pip install -r requirements.txt

--2025-07-21 23:44:24--  https://drive.google.com/uc?export=download&id=1VAd8ZZJOVgIyhvx2uWxMaXBxJveJkddS
Resolving drive.google.com (drive.google.com)... 173.194.212.139, 173.194.212.101, 173.194.212.138, ...
Connecting to drive.google.com (drive.google.com)|173.194.212.139|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://drive.usercontent.google.com/download?id=1VAd8ZZJOVgIyhvx2uWxMaXBxJveJkddS&export=download [following]
--2025-07-21 23:44:24--  https://drive.usercontent.google.com/download?id=1VAd8ZZJOVgIyhvx2uWxMaXBxJveJkddS&export=download
Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 142.251.107.132, 2607:f8b0:400c:c32::84
Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|142.251.107.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 768 [application/octet-stream]
Saving to: ‘requirements.txt’


2025-07-21 23:44:25 (34.0 MB/s) - ‘requirements.txt’ saved [768/76

## Run the script below runs the inference on the image.
A summary of results is saved in `class_summary.csv` and the each image path, predicted class and confidence is saved in `detailed_predictions.csv`. Individual cropped images in their corresponding prdiction folders are save in `results`

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from collections import defaultdict
import csv
import pandas as pd
from sahi import AutoDetectionModel
from sahi.predict import get_sliced_prediction

# =======================
# 1. Load Classification Model
# =======================
if tf.config.list_physical_devices('GPU'):
    with tf.device('/GPU:0'):
        model_cls = load_model('/content/cls_model.keras')
else:
    model_cls = load_model('/content/cls_model.keras')

# =======================
# 2. Load Detection Model
# =======================
detection_model = AutoDetectionModel.from_pretrained(
    model_type="ultralytics",
    model_path='/content/detect_model.pt',
    confidence_threshold=0.2
)

# =======================
# 3. Get Sliced Prediction
# =======================
result = get_sliced_prediction(
    org_img,
    detection_model,
    slice_height=752,
    slice_width=752,
    overlap_height_ratio=0.3,
    overlap_width_ratio=0.3,
    postprocess_type="GREEDYNMM"
)

# =======================
# 4. Crop Prediction Boxes
# =======================
def crop_predictions_from_sahi(image_path, sahi_result, conf_thresh=0.05):
    img = cv2.imread(image_path)
    crops = []
    for obj in sahi_result.object_prediction_list:
        if obj.score.value < conf_thresh:
            continue
        x1, y1, x2, y2 = map(int, obj.bbox.to_xyxy())
        crop = img[y1:y2, x1:x2]
        crops.append(crop)
    return crops

def save_crops(crops, save_dir, base_filename="crop"):
    os.makedirs(save_dir, exist_ok=True)
    for idx, crop in enumerate(crops):
        save_path = os.path.join(save_dir, f"{base_filename}_{idx + 1}.jpg")
        cv2.imwrite(save_path, crop)
    print(f"{len(crops)} cropped images saved to {save_dir}")

crops = crop_predictions_from_sahi(org_img, result)
crop_dir = 'cropped_results'
save_crops(crops, save_dir=crop_dir, base_filename="insect")

# =======================
# 5. Classify with Thresholding and Save Results
# =======================
class_names = ['Caddisfly', 'Dipteran', 'Mayfly', 'Other', 'Stonefly', 'Terrestrial']
class_thresholds = {
    0: 0.4,   # Caddisfly
    1: 0.4,   # Dipteran
    2: 0.4,   # Mayfly
    3: 0.0,   # Other
    4: 0.4,   # Stonefly
    5: 0.4    # Terrestrial
}
OTHER_IDX = 3  # Index for "Other" class

img_size = (224, 224)
output_dir = "results"
os.makedirs(output_dir, exist_ok=True)
for class_name in class_names:
    os.makedirs(os.path.join(output_dir, class_name), exist_ok=True)

detailed_csv_path = "detailed_predictions.csv"
summary_csv_path = "class_summary.csv"
class_counts = defaultdict(int)

with open(detailed_csv_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Image Name', 'Raw Prediction', 'Confidence', 'Threshold', 'Final Prediction'])

    for file_name in os.listdir(crop_dir):
        if not file_name.lower().endswith(('.png', '.jpg', '.jpeg')):
            continue

        img_path = os.path.join(crop_dir, file_name)
        img = image.load_img(img_path, target_size=img_size)
        img_array = image.img_to_array(img) / 255.0
        img_batch = np.expand_dims(img_array, axis=0)

        preds = model_cls.predict(img_batch)
        probs = preds[0]
        pred_idx = np.argmax(probs)
        raw_class = class_names[pred_idx]
        confidence = float(probs[pred_idx])
        threshold = class_thresholds.get(pred_idx, 0.5)

        # Apply threshold
        if confidence >= threshold:
            final_idx = pred_idx
            rerouted = False
        else:
            final_idx = OTHER_IDX
            rerouted = True

        final_class = class_names[final_idx]
        class_counts[final_class] += 1

        # Write to CSV
        writer.writerow([
            file_name,
            raw_class,
            round(confidence, 4),
            threshold,
            final_class
        ])

        # Save result image
        plt.figure()
        plt.imshow(img)
        plt.title(f"{final_class} ({confidence:.2f})")
        plt.axis('off')
        save_path = os.path.join(output_dir, final_class, f"{os.path.splitext(file_name)[0]}_pred.png")
        plt.savefig(save_path)
        plt.close()

        if rerouted:
            print(f"{file_name} → {raw_class} ({confidence:.2f}) ➡ rerouted to 'Other'")
        else:
            print(f"{file_name} → {final_class} ({confidence:.2f}) ✅")

# =======================
# 6. Save Summary CSV
# =======================
with open(summary_csv_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Class', 'Count'])
    for class_name in class_names:
        writer.writerow([class_name, class_counts[class_name]])

print(f"\nSummary CSV saved to: {summary_csv_path}")
print("Class Distribution:")
for cls, count in class_counts.items():
    print(f"{cls}: {count} times")


Performing prediction on 9 slices.
157 cropped images saved to cropped_results
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
insect_128.jpg → Dipteran (0.85) ✅
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
insect_74.jpg → Dipteran (0.89) ✅
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
insect_113.jpg → Dipteran (0.81) ✅
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
insect_40.jpg → Other (0.69) ✅
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
insect_66.jpg → Dipteran (0.68) ✅
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
insect_122.jpg → Dipteran (0.85) ✅
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
insect_103.jpg → Dipteran (0.71) ✅
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
insect_57.jpg → Dipteran (0.90) ✅
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
in

## Run cell below to download csv files and image prediction folder

In [4]:
from google.colab import files

# Download csv files
files.download('/content/detailed_predictions.csv')
files.download('/content/class_summary.csv')

#Download image folder
import shutil

# Zip the folder
shutil.make_archive('/content/results', 'zip', '/content/results')

# Download the zip file
files.download('/content/results.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Want to run inference with another image? Run cell below to clear out current results.

In [24]:
!rm -rf /content/cropped_results
!rm -rf /content/results/
!rm -rf /content/detailed_predictions.csv
!rm -rf /content/class_summary.csv
!rm -rf /content/results.zip