In [1]:
import os
import re

def get_image_list(folder_path):
    # Use a set to ensure distinct image names
    image_set = set()
    
    # Define the pattern to extract the base image name
    pattern = re.compile(r'(IMG_\d+)_p\d+_label_ground-truth\.png')

    # Walk through the folder
    for root, _, files in os.walk(folder_path):
        for file in files:
            match = pattern.match(file)
            if match:
                base_image_name = match.group(1)
                image_set.add(base_image_name)
    
    # Convert the set to a list and return it
    return list(image_set)

In [5]:
# Example usage
train_folder_path = '/home/etaylor/code_projects/thesis/segments/etaylor_cannabis_patches_train_26-04-2024_15-44-44/v0.1/'
test_folder_path = '/home/etaylor/code_projects/thesis/segments/etaylor_cannabis_patches_test_26-04-2024_15-44-44/v0.1'
train_image_list = get_image_list(train_folder_path)
test_image_list = get_image_list(test_folder_path)
print(f"Train number of images: {len(train_image_list)}")
print(f"Train image list: {train_image_list}")

print(f"Test number of images: {len(test_image_list)}")
print(f"Test image list: {test_image_list}")

Train number of images: 40
Train image list: ['IMG_0048', 'IMG_1186', 'IMG_0016', 'IMG_2153', 'IMG_0003', 'IMG_0014', 'IMG_1182', 'IMG_0562', 'IMG_0017', 'IMG_1827', 'IMG_2163', 'IMG_2167', 'IMG_1787', 'IMG_9998', 'IMG_0543', 'IMG_1784', 'IMG_0045', 'IMG_2129', 'IMG_1085', 'IMG_2145', 'IMG_0612', 'IMG_0581', 'IMG_0546', 'IMG_1111', 'IMG_1857', 'IMG_2276', 'IMG_0001', 'IMG_0540', 'IMG_0547', 'IMG_2134', 'IMG_1875', 'IMG_2157', 'IMG_2242', 'IMG_1144', 'IMG_0545', 'IMG_0597', 'IMG_2235', 'IMG_1082', 'IMG_0572', 'IMG_1818']
Test number of images: 10
Test image list: ['IMG_0542', 'IMG_2285', 'IMG_1079', 'IMG_2198', 'IMG_1753', 'IMG_2271', 'IMG_0058', 'IMG_0019', 'IMG_1096', 'IMG_1093']


In [6]:
from src.annotation_handling.segmentsai_handler import SegmentsAIHandler
segments_handler = SegmentsAIHandler()

test_gt_trichome_distribution = {}
# get the trichome distribution for each annotated image dataset
for image_number in test_image_list:
    print(f"get the distribution for image {image_number}")
    trichome_distribution = segments_handler.get_trichome_distribution(image_number)
    test_gt_trichome_distribution[image_number] = trichome_distribution
    print(f"Distribution for image {image_number}: \n{trichome_distribution}")

get the distribution for image IMG_0542
Distribution for image IMG_0542: 
{'clear': 36, 'cloudy': 49, 'amber': 0}
get the distribution for image IMG_2285
Distribution for image IMG_2285: 
{'clear': 30, 'cloudy': 128, 'amber': 12}
get the distribution for image IMG_1079
Distribution for image IMG_1079: 
{'clear': 68, 'cloudy': 79, 'amber': 6}
get the distribution for image IMG_2198
Distribution for image IMG_2198: 
{'clear': 48, 'cloudy': 148, 'amber': 10}
get the distribution for image IMG_1753
Distribution for image IMG_1753: 
{'clear': 74, 'cloudy': 21, 'amber': 0}
get the distribution for image IMG_2271
Distribution for image IMG_2271: 
{'clear': 23, 'cloudy': 58, 'amber': 12}
get the distribution for image IMG_0058
Distribution for image IMG_0058: 
{'clear': 78, 'cloudy': 31, 'amber': 0}
get the distribution for image IMG_0019
Distribution for image IMG_0019: 
{'clear': 144, 'cloudy': 100, 'amber': 4}
get the distribution for image IMG_1096
Distribution for image IMG_1096: 
{'clear

In [8]:
import pandas as pd


# Organize the ground truth to a DataFrame
images_annotations = []
for image_number, image_dist in test_gt_trichome_distribution.items():
    clear_count = image_dist.get("clear", 0)
    cloudy_count = image_dist.get("cloudy", 0)
    amber_count = image_dist.get("amber", 0)
    total_count = clear_count + cloudy_count + amber_count

    # Calculate normalized distribution
    if total_count > 0:
        clear_normalized = clear_count / total_count
        cloudy_normalized = cloudy_count / total_count
        amber_normalized = amber_count / total_count
    else:
        clear_normalized = cloudy_normalized = amber_normalized = 0

    images_annotations.append({
        "image_number": image_number,
        "clear": clear_count,
        "cloudy": cloudy_count,
        "amber": amber_count,
        "clear_normalized": clear_normalized,
        "cloudy_normalized": cloudy_normalized,
        "amber_normalized": amber_normalized
    })

ground_truth_df = pd.DataFrame(images_annotations)
ground_truth_df

Unnamed: 0,image_number,clear,cloudy,amber,clear_normalized,cloudy_normalized,amber_normalized
0,IMG_0542,36,49,0,0.423529,0.576471,0.0
1,IMG_2285,30,128,12,0.176471,0.752941,0.070588
2,IMG_1079,68,79,6,0.444444,0.51634,0.039216
3,IMG_2198,48,148,10,0.23301,0.718447,0.048544
4,IMG_1753,74,21,0,0.778947,0.221053,0.0
5,IMG_2271,23,58,12,0.247312,0.623656,0.129032
6,IMG_0058,78,31,0,0.715596,0.284404,0.0
7,IMG_0019,144,100,4,0.580645,0.403226,0.016129
8,IMG_1096,30,121,39,0.157895,0.636842,0.205263
9,IMG_1093,33,143,36,0.15566,0.674528,0.169811


In [9]:
# laod the pred boxes of each model
import json
import os
import numpy as np

# load the ultralytics models pred boxes
ultralytics_models_pred_boxes_saveing_path = os.path.join("/home/etaylor/code_projects/thesis/data/models_scores", "ultralytics_models_pred_boxes_result_22_05_2024.json")
detectron2_models_pred_boxes_saveing_path = os.path.join("/home/etaylor/code_projects/thesis/data/models_scores", "detectron2_models_pred_boxes_result_22_05_2024.json")

with open(ultralytics_models_pred_boxes_saveing_path, "r") as f:
    ultralytics_models_pred_boxes = json.load(f)
    
with open(detectron2_models_pred_boxes_saveing_path, "r") as f:
    detectron2_models_pred_boxes = json.load(f)

In [None]:
# get the images of the predicted boxes and then classify them using the best models
# then get the distribution of the trichomes for each image
# calculate the normalized distribution of the trichomes for each image
# then compare the ground truth and the predicted distribution of the trichomes
# calc RMSE and MPE for each model