# Nest Detection Training

Train a model which detects nests. The data from a tutorial is taken. That is just a tutorial and but the method isn't that different to the birds detection paper. 

Then evaluate the model on the leaked data

In [None]:
!git clone https://github.com/weecology/DeepForest.git

In [None]:
! pip uninstall -y deepforest

In [None]:
%cd DeepForest
!pip install -e .
%cd ..

In [None]:
import os
import sys

deepforest_path = os.path.abspath("DeepForest")
deepforest_path



In [None]:
if deepforest_path not in sys.path:
    sys.path.append(deepforest_path)

deepforest_path_d = "/home/jovyan/DeepForest/deepforest"
if deepforest_path_d not in sys.path:
    sys.path.append(deepforest_path_d)
    

In [None]:
!ls /home/jovyan/DeepForest

In [None]:
# load the modules
# import comet_ml
import wandb
import os
import time
import numpy as np
import pandas as pd
import torch
from deepforest import main
from deepforest import get_data
from deepforest import utilities
from deepforest import preprocess
from tqdm import tqdm
from pytorch_lightning.loggers import CometLogger, WandbLogger
import zipfile
import matplotlib.pyplot as plt
import subprocess

In [None]:
wandb_logger = WandbLogger()

In [None]:
# 

In [None]:
crop_dir = os.path.join(os.getcwd(), "train_data_folder")

# save to file and create the file dir
annotations_file = os.path.join(crop_dir, "train.csv")
validation_file = os.path.join(crop_dir, "valid.csv")
test_file = os.path.join(crop_dir, "test.csv")

In [None]:
annotations_file

In [None]:
# initialize the model and change the corresponding config file
m = main.deepforest(label_dict={"Nest": 0})

m.config["batch_size"] = 30

# move to GPU and use all the GPU resources
m.config["gpus"] = "-1"
m.config["train"]["csv_file"] = annotations_file
m.config["train"]["root_dir"] = os.path.dirname(annotations_file)
m.config["train"]["epochs"] = 100

# Define the learning scheduler type
m.config["train"]["scheduler"]["type"] = "cosine"
m.config["score_thresh"] = 0.4

m.config["validation"]["csv_file"] = validation_file
m.config["validation"]["root_dir"] = os.path.dirname(validation_file)

In [None]:
# create a pytorch lighting trainer used to training
# m.create_trainer()
m.create_trainer(logger=wandb_logger)
# load the lastest release model (RetinaNet)
m.use_bird_release()

In [None]:
# Start the training
start_time = time.time()
m.trainer.fit(m)
print(f"--- Training on GPU: {(time.time() - start_time):.2f} seconds ---")

In [None]:
from pathlib import Path
root_folder = Path("models")

# Save the model checkpoint
m.trainer.save_checkpoint(
    os.path.join(root_folder, "checkpoint_cosine_lr.pl")
)
# TODO do I need this even?
torch.save(m.model.state_dict(), os.path.join(root_folder, "weights_cosine_lr"))

In [None]:
!rm -rf pred_result_test

In [None]:
save_dir = os.path.join(os.getcwd(), "pred_result_test")
results = m.evaluate(
    test_file, os.path.dirname(test_file), iou_threshold=0.4, savedir=save_dir
)


In [None]:
results["box_precision"]

In [None]:
results["box_recall"]

In [None]:
root_folder

In [None]:
# Load from the saved checkpoint and evaluate it to see if the persistance workds
model = main.deepforest.load_from_checkpoint(
    os.path.join(root_folder, "checkpoint_cosine_lr.pl")
)

In [None]:
save_dir = os.path.join(os.getcwd(), "pred_result_test")
results_loaded = model.evaluate(
    test_file, os.path.dirname(test_file), iou_threshold=0.4, savedir=save_dir
)


In [None]:
## Inspecting the results on the leaked data

In [None]:
results_loaded["box_precision"]

In [None]:
results_loaded["box_recall"]

In [None]:
## with default score threshold of
print(f"score threshold: {model.model.score_thresh}")

# Add a path to an image to test the model on
raster_path = "./deep_forest_root/dataset/Horus_04_27_2022_DJI_0245.JPG"
predicted_raster = model.predict_tile(
    raster_path, return_plot=True, patch_size=400, patch_overlap=0.25, thickness=10, color=(255, 0, 0)
)
plt.imshow(predicted_raster)
plt.show()

In [None]:
model.model.score_thresh = 0.8
# Add a path to an image to test the model on
raster_path = "./deep_forest_root/dataset/Horus_04_27_2022_DJI_0245.JPG"
predicted_raster = model.predict_tile(
    raster_path, return_plot=True, patch_size=300, patch_overlap=0.25, thickness=10, color=(255, 0, 0)
)
plt.imshow(predicted_raster)
plt.show()

In [None]:
raster_path = "./deep_forest_root/dataset/Horus_04_27_2022_DJI_0245.JPG"
predicted_detections = model.predict_tile(
    raster_path, return_plot=False, patch_size=300, patch_overlap=0.25
)

In [None]:
predicted_detections

In [None]:
predicted_detections[predicted_detections.score > 0.8]

In [None]:
import pandas as pd
df_nest_data = pd.read_csv("deep_forest_root/dataset/nest_data.csv")
df_nest_data[df_nest_data.image_path == "Horus_04_27_2022_DJI_0245.JPG"]

In [None]:
# TODO evaluate on full size images

In [None]:
## predict on an orthomosaic

raster_path = "datasets/JetportNew_C.tif"
predicted_detections = model.predict_tile(
    raster_path, return_plot=True, patch_size=300, patch_overlap=0.25
)

## Have a look at the predictions on an orthomosaic

In [None]:
from deepforest import visualize
import supervision as sv
import cv2
import matplotlib.pyplot as plt

# Convert the tree detections to Supervision format for visualization
sv_detections = visualize.convert_to_sv_format(predicted_detections)

# Create a bounding box annotator
bounding_box_annotator = sv.BoxAnnotator()

image_path = "datasets/JetportNew_C.tif"
image = cv2.imread(image_path)

# Annotate the image with bounding boxes
annotated_frame = bounding_box_annotator.annotate(
    scene=image,
    detections=sv_detections
)

from PIL import Image
im = Image.fromarray(annotated_frame)
im.save("datasets/JetportNew_C_predicted.jpeg")

# Display the annotated image using Matplotlib
plt.imshow(annotated_frame)
plt.axis('off')  # Hide axes for a cleaner look
plt.show()

### Evaluate manually

In [None]:
# !unzip "/home/jovyan/deep_forest_root/JetportNew A.zip"
!mv "/home/jovyan/datasets/JetportNew A" "/home/jovyan/datasets/JetportNew_A"

In [None]:
!pwd

In [None]:

# load the modules
import os

from deepforest import main
from pathlib import Path

extract_folder = Path("/home/jovyan/datasets/JetportNew_A")

model.model.score_thresh = 0.8
predictions = []

for image_name in extract_folder.glob("*.JPG"):

    df_pred = model.predict_tile(
        extract_folder / image_name, return_plot=False, patch_size=400, patch_overlap=0.25
    )

    predictions.append(df_pred)




In [None]:
df_prediction_short = pd.concat(predictions, axis=0)[["image_path", "xmin", "ymin", "xmax", "ymax", "label"]]
df_prediction_short

df_prediction_short.to_csv("predictions.csv")

df_gt = pd.read_csv("/home/jovyan/deep_forest_root/dataset/nest_data.csv")

df_filtered = df_gt[df_gt.image_path.isin( [x.name for x in list(extract_folder.glob("*.JPG"))] )]

df_filtered.to_csv("ground_truth.csv")

In [None]:
def calculate_iou(box1, box2):
    """
    Calculate Intersection over Union (IoU) between two bounding boxes.
    Each box is represented as [xmin, ymin, xmax, ymax]
    """
    x1_max = max(box1[0], box2[0])
    y1_max = max(box1[1], box2[1])
    x2_min = min(box1[2], box2[2])
    y2_min = min(box1[3], box2[3])

    # Calculate intersection area
    inter_width = max(0, x2_min - x1_max)
    inter_height = max(0, y2_min - y1_max)
    inter_area = inter_width * inter_height

    # Calculate areas of both bounding boxes
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])

    # Calculate union area
    union_area = box1_area + box2_area - inter_area

    # Return IoU
    if union_area == 0:
        return 0
    return inter_area / union_area

In [None]:
def calculate_precision_recall(ground_truth_file, predictions_file, iou_threshold=0.5):
    """
    Calculate precision and recall given ground truth and predictions CSV files.
    """
    # Read both CSV files
    ground_truth_df = pd.read_csv(ground_truth_file)
    predictions_df = pd.read_csv(predictions_file)

    true_positives = 0
    false_positives = 0
    false_negatives = 0

    # Loop over ground truth data by image
    for image_path in ground_truth_df['image_path'].unique():
        # Get ground truth and predictions for the current image
        ground_truth_boxes = ground_truth_df[ground_truth_df['image_path'] == image_path]
        predicted_boxes = predictions_df[predictions_df['image_path'] == image_path]

        # Track matches to avoid duplicates
        matched_ground_truth = set()
        matched_predictions = set()

        # Compare each prediction to ground truth boxes
        for idx_pred, pred_row in predicted_boxes.iterrows():
            pred_box = [pred_row['xmin'], pred_row['ymin'], pred_row['xmax'], pred_row['ymax']]
            matched = False
            for idx_gt, gt_row in ground_truth_boxes.iterrows():
                if idx_gt in matched_ground_truth:
                    continue

                gt_box = [gt_row['xmin'], gt_row['ymin'], gt_row['xmax'], gt_row['ymax']]
                iou = calculate_iou(pred_box, gt_box)

                if iou >= iou_threshold:
                    true_positives += 1
                    matched_ground_truth.add(idx_gt)
                    matched_predictions.add(idx_pred)
                    matched = True
                    break

            if not matched:
                false_positives += 1

        # Any ground truth boxes not matched are false negatives
        false_negatives += len(ground_truth_boxes) - len(matched_ground_truth)

    # Calculate precision and recall
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0

    return precision, recall

In [None]:
precision, recall = calculate_precision_recall(ground_truth_file="ground_truth.csv", 
                           predictions_file="predictions.csv", 
                           iou_threshold=0.2)

print(f"precision: {precision}, recall: {recall}")
print(f"With a dataset which was trained on leaked data the precision isn't that good")