In [1]:
"""
planning:
- load the detectron faster rcnn model and perform detections on a single image
- chosen model faster rcnn R50 C4 x1
- after i have the detection i should use them for the classification
- after i have images to use for the classification i should use the alexnet model to classify them.
- after that save the results
"""

'\nplanning:\n- load the detectron faster rcnn model and perform detections on a single image\n- chosen model faster rcnn R50 C4 x1\n- after i have the detection i should use them for the classification\n- after i have images to use for the classification i should use the alexnet model to classify them.\n- after that save the results\n'

In [1]:
# load the detectron model
import numpy as np
import os

# Setup detectron2 logger
from detectron2.utils.logger import setup_logger
setup_logger()


# checking the version and if we have cuda available
from src.segmentation.framework_handlers.detectron2_handler import print_version_info

!nvcc --version
print_version_info()

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Wed_Apr_17_19:19:55_PDT_2024
Cuda compilation tools, release 12.5, V12.5.40
Build cuda_12.5.r12.5/compiler.34177558_0
torch:  2.3 ; cuda:  12.1
detectron2: 0.6


In [22]:
import time
# import required functions, classes
from sahi import AutoDetectionModel
from sahi.predict import get_sliced_prediction

detection_model_config = {
    "model_name": "faster_rcnn_R_50_C4_1x",
    "checkpoint": "/home/etaylor/code_projects/thesis/checkpoints/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/29-04-2024_16-09-41/model_final.pth",
    "yaml_file": "/home/etaylor/code_projects/thesis/checkpoints/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/29-04-2024_16-09-41/config.yaml"
}

In [23]:
detection_model = AutoDetectionModel.from_pretrained(
    model_type='detectron2',
    model_path=detection_model_config['checkpoint'],
    config_path=detection_model_config['yaml_file'],
    confidence_threshold=0.5,
    image_size=512,
    device="cuda:0", # or 'cpu'
)



/home/etaylor/code_projects/thesis/checkpoints/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/29-04-2024_16-09-41/config.yaml not available in Model Zoo!
[32m[07/16 17:39:24 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from /home/etaylor/code_projects/thesis/checkpoints/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/29-04-2024_16-09-41/model_final.pth ...


07/16/2024 17:39:24 - INFO - fvcore.common.checkpoint -   [Checkpointer] Loading from /home/etaylor/code_projects/thesis/checkpoints/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/29-04-2024_16-09-41/model_final.pth ...


In [None]:
"""
colors of export_visuals function:
- blue - clear trichomes
- orange - cloudy trichomes
- purple - amber trichomes
"""

In [24]:
raw_image_path = "/home/etaylor/images/assessing_cannabis_experiment_images/day_5_2024_06_13/greenhouse/138/IMG_6652.JPG"

start = time.time()
result = get_sliced_prediction(
raw_image_path,
detection_model,
slice_height = 512,
slice_width = 512,
overlap_height_ratio = 0,
overlap_width_ratio = 0,
verbose=True,
)
end_time = time.time() - start
print(f"Time taken to process image: {end_time}")

Performing prediction on 48 slices.
Time taken to process image: 14.385375738143921


## Classification with AlexNet  

In [8]:
from fastai.vision.all import *
from fastai.vision import *

In [31]:
# Define the mapping from classification model to object detection model
classification_to_detection_mapping = {
    0: 3,  # Amber (classification) -> Amber (object detection)
    1: 1,  # Clear (classification) -> Clear (object detection)
    2: 2   # Cloudy (classification) -> Cloudy (object detection)
}

classification_dataset_config = {
    'train': '/home/etaylor/code_projects/thesis/segments/etaylor_cannabis_patches_train_26-04-2024_15-44-44/trichome_dataset_01',
    'test': '/home/etaylor/code_projects/thesis/segments/etaylor_cannabis_patches_test_26-04-2024_15-44-44/ground_truth_trichomes_datasets/trichome_dataset_01',
}

classification_models_path = "/home/etaylor/code_projects/thesis/checkpoints/image_classification_models"

classification_model_config = {
    'model_name': 'alexnet',
    'model': models.alexnet,
    'checkpoint': f'{classification_models_path}/alexnet_model_12_7_24.pkl'
}

# transformation and image space conversion
def custom_transform(size):
    return Resize(size, method='pad', pad_mode='zeros')

class RGB2HSV(Transform):
    def encodes(self, img: PILImage): 
        return rgb2hsv(img)
    
    
global_item_tfms=custom_transform(size=128),  # Resize and HSV transform
global_batch_tfms=[
    RGB2HSV(),
    *aug_transforms(size=128, flip_vert=True, max_rotate=10),
    Brightness(max_lighting=0.2, p=0.75),
    Contrast(max_lighting=0.2, p=0.75),
]



In [25]:
import cv2
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load models functions
def load_detection_model(model_config, patch_size=512):
    logger.info("Loading the model.")
    detection_model = AutoDetectionModel.from_pretrained(
        model_type='detectron2',
        model_path=model_config['checkpoint'],
        config_path=model_config['yaml_file'],
        confidence_threshold=0.5,
        image_size=patch_size,
        device="cuda:0",  # or 'cpu'
    )
    return detection_model

# Load classification model
def load_classification_model(classification_model_config):
    print("Loading the classification model.")
    learn = load_learner(classification_model_config['checkpoint'])
    return learn

In [44]:
import os
import time
from PIL import Image
import matplotlib.pyplot as plt


# process image functions
def perform_object_detection(image_path, detection_model, patch_size=512):
    logger.info(f"Performing object detection on image: {os.path.basename(image_path)}")
    start_time = time.time()
    result = get_sliced_prediction(
        image_path,
        detection_model,
        slice_height=patch_size,
        slice_width=patch_size,
        overlap_height_ratio=0,
        overlap_width_ratio=0,
        verbose=True,
    )
    detection_time = time.time() - start_time
    logger.info(f"Time taken for object detection on image {os.path.basename(image_path)}: {detection_time:.2f} seconds")
    return result


def filter_large_objects(predictions, size_threshold_ratio=10):
    sizes = [(pred.bbox.maxx - pred.bbox.minx) * (pred.bbox.maxy - pred.bbox.miny) for pred in predictions]
    if sizes:
        median_size = np.median(sizes)
        filtered_predictions = [pred for pred in predictions if (pred.bbox.maxx - pred.bbox.minx) * (pred.bbox.maxy - pred.bbox.miny) <= median_size * size_threshold_ratio]
        return filtered_predictions
    return predictions


def export_visuals(result, output_dir, base_file_name, stage):
    stage_output_dir = os.path.join(output_dir, f"{base_file_name}_{stage}")
    os.makedirs(stage_output_dir, exist_ok=True)
    result.export_visuals(
        export_dir=stage_output_dir,
        text_size=1,
        rect_th=2,
        hide_labels=True,
        hide_conf=True,
        file_name=base_file_name
    )
    logger.info(f"Exported {stage} visuals for image {base_file_name}")
    
    
def extend_bounding_box(x_min, y_min, x_max, y_max, image_width, image_height, margin=0.1):
    bbox_width = x_max - x_min
    bbox_height = y_max - y_min
    
    x_min_extended = max(0, x_min - int(margin * bbox_width))
    y_min_extended = max(0, y_min - int(margin * bbox_height))
    x_max_extended = min(image_width, x_max + int(margin * bbox_width))
    y_max_extended = min(image_height, y_max + int(margin * bbox_height))
    
    return x_min_extended, y_min_extended, x_max_extended, y_max_extended


def crop_image(image, x_min, y_min, x_max, y_max):
    return image[y_min:y_max, x_min:x_max]


def apply_transformations(cropped_image):
    cropped_pil_image = PILImage.create(cropped_image)
    
    for item_tfms in global_item_tfms:
        cropped_pil_image = item_tfms(cropped_pil_image)
    
    return Image.fromarray(np.array(cropped_pil_image))


def classify_cropped_image(cropped_fastai_image, classification_model):
    _, classification_model_pred_class_id, _ = classification_model.predict(cropped_fastai_image)
    return int(classification_model_pred_class_id)


def plot_classified_object(cropped_image, detection_class_name, classification_class_name):
    plt.title(f"Detected Class (Faster R-CNN): {detection_class_name}\nPredicted Class (AlexNet): {classification_class_name}")
    plt.imshow(cropped_image)
    plt.axis('off')
    plt.show()

def classify_objects(image_path, result, classification_model):
    logger.info("Classifying detected objects.")
    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape
    start_classification = time.time()

    for prediction in result.object_prediction_list:
        
        # Save the original class id for the detection model
        detection_model_pred_class_id = prediction.category.id
        print(f"pred id before: {detection_model_pred_class_id}")
        
        # Get the original bounding box coordinates
        x_min = int(prediction.bbox.minx)
        y_min = int(prediction.bbox.miny)
        x_max = int(prediction.bbox.maxx)
        y_max = int(prediction.bbox.maxy)
        
        # Extend bounding box by 10%
        x_min_extended, y_min_extended, x_max_extended, y_max_extended = extend_bounding_box(
            x_min, y_min, x_max, y_max, image_width, image_height
        )
        
        # Crop the extended bounding box from the original image
        cropped_image = crop_image(image, x_min_extended, y_min_extended, x_max_extended, y_max_extended)
        
        # Apply transformations
        cropped_fastai_image = apply_transformations(cropped_image)
        
        # Classify the cropped image
        classification_model_pred_class_id = classify_cropped_image(cropped_fastai_image, classification_model)
                
        # Get the corresponding classification model class id for the detection model (visuals purposes)
        prediction.category.id = classification_to_detection_mapping[classification_model_pred_class_id]
        print(f"pred id after {prediction.category.id}")
        
        labels = ["Clear", "Cloudy", "Amber"]
        faster_rcnn_class_name = labels[detection_model_pred_class_id - 1]
        alexnet_class_name = labels[prediction.category.id - 1]
        
        # Plot the classified object
        plot_classified_object(cropped_image, faster_rcnn_class_name, alexnet_class_name)
        
        logger.info(f"Detected Class (Faster R-CNN): {detection_model_pred_class_id} {faster_rcnn_class_name}")
        logger.info(f"Predicted Class (AlexNet): {prediction.category.id} {alexnet_class_name}")

    end_classification = time.time() - start_classification
    logger.info(f"Time taken for classification: {end_classification:.2f} seconds")





def process_and_classify_image(image_path, detection_model, classification_model, patch_size, output_dir, base_file_name):
    result = perform_object_detection(image_path, detection_model, patch_size)
    export_visuals(result, output_dir, base_file_name, "pre_classification")
    
    filtered_predictions = filter_large_objects(result.object_prediction_list)
    result.object_prediction_list = filtered_predictions
    
    classify_objects(image_path, result, classification_model)
    export_visuals(result, output_dir, base_file_name, "post_classification")
    
    return result

In [45]:
raw_image_path = "/home/etaylor/images/assessing_cannabis_experiment_images/day_5_2024_06_13/greenhouse/138/IMG_6652.JPG"

res = process_and_classify_image(
    image_path=raw_image_path,
    detection_model=load_detection_model(detection_model_config),
    classification_model=load_classification_model(classification_model_config),
    output_dir="/home/etaylor/code_projects/thesis/src/pipelines/end_to_end/testing_end_to_end_pipe",
    base_file_name="IMG_6652",
    patch_size=512
)

07/16/2024 19:01:29 - INFO - __main__ -   Loading the model.


/home/etaylor/code_projects/thesis/checkpoints/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/29-04-2024_16-09-41/config.yaml not available in Model Zoo!
[32m[07/16 19:01:29 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from /home/etaylor/code_projects/thesis/checkpoints/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/29-04-2024_16-09-41/model_final.pth ...


07/16/2024 19:01:29 - INFO - fvcore.common.checkpoint -   [Checkpointer] Loading from /home/etaylor/code_projects/thesis/checkpoints/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/29-04-2024_16-09-41/model_final.pth ...
07/16/2024 19:01:30 - INFO - __main__ -   Performing object detection on image: IMG_6652.JPG


Loading the classification model.
Performing prediction on 48 slices.


In [30]:
res.object_prediction_list[0]

ObjectPrediction<
    bbox: BoundingBox: <(2241.2849731445312, 1967.7375793457031, 2278.209197998047, 2008.1548156738281), w: 36.924224853515625, h: 40.417236328125>,
    mask: None,
    score: PredictionScore: <value: 0.964286744594574>,
    category: Category: <id: 1, name: 1>>

In [None]:
# classification dataset transformations
# Define the custom transformations
def custom_transform(size):
    return Resize(size, method='pad', pad_mode='zeros')

class RGB2HSV(Transform):
    def encodes(self, img: PILImage):
        return rgb2hsv(np.array(img))

# Apply global transformations for inference
global_item_tfms = [custom_transform(size=128)]
global_batch_tfms = [
    RGB2HSV(),
    *aug_transforms(size=128, flip_vert=True, max_rotate=10),
    Brightness(max_lighting=0.2, p=0.75),
    Contrast(max_lighting=0.2, p=0.75),
]

In [None]:
print("Performing object detection...")
start = time.time()
result = get_sliced_prediction(
    raw_image_path,
    detection_model,
    slice_height=512,
    slice_width=512,
    overlap_height_ratio=0,
    overlap_width_ratio=0,
    verbose=True,
)
end_time = time.time() - start
print(f"Time taken to for object detection model: {end_time}")

In [None]:
# Read the original image
image = cv2.imread(raw_image_path)

# initialize a time for the classification
start_classification = time.time()
# Iterate over detected objects and classify them
for i, prediction in enumerate(result.object_prediction_list):
    # print(f"Processing object {i+1}/{len(result.object_prediction_list)}...")
    # Extract bounding box coordinates
    x_min = int(prediction.bbox.minx)
    y_min = int(prediction.bbox.miny)
    x_max = int(prediction.bbox.maxx)
    y_max = int(prediction.bbox.maxy)

    # Crop the detected object from the original image
    cropped_image = image[y_min:y_max, x_min:x_max]
    
    # Plot the cropped bounding box image
    # plt.figure(figsize=(5, 5))
    # plt.imshow(cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB))
    # plt.axis('off')
    # plt.title("Cropped Bounding Box Image")
    # plt.show()

    # Convert the cropped image to PIL format for Fastai
    cropped_pil_image = PILImage.create(cropped_image)

    # Apply the same transformations as during training
    for item_tfms in global_item_tfms:
        cropped_pil_image = item_tfms(cropped_pil_image)

    # Convert the transformed image to a Fastai Image
    cropped_fastai_image = Image.fromarray(np.array(cropped_pil_image))

    # Perform classification on the cropped image
    pred_class, classification_model_pred_class_id, outputs = classification_model.predict(cropped_fastai_image)
    classification_model_pred_class_id = int(classification_model_pred_class_id)  # Convert tensor to int

    # Map the classification result to the detection model class
    detection_class = classification_to_detection_mapping[classification_model_pred_class_id]
    faster_rcnn_class_name = ["Clear", "Cloudy", "Amber"][prediction.category.id - 1]
    alexnet_class_name = ["Amber", "Clear", "Cloudy"][classification_model_pred_class_id]
    
    print(f"Detected Class (Faster R-CNN): {faster_rcnn_class_name}")
    print(f"Predicted Class (AlexNet): {alexnet_class_name}")
    
end = time.time() - start_classification
print(f"Time taken for classification: {end}")


In [47]:
detection_class

1