In [54]:

#There are two versions of Grounding Dino, GroundingDINO-T (tiny), and GroundingDINO-B (base) (with backbones SwinT and SwinB, respectively). In their paper they show results for a SwinL backbone, but it is not available anywhere that I can find.

from groundingdino.util.inference import load_model, load_image, predict, annotate
import cv2
import supervision as sv
import torch
import csv
import os

def clean_labels(boxes, max_area):
    clean_boxes = []
    box_list = boxes.tolist()
    for box in box_list:
        #if width * height < 0.9, add box to list.
        if (box[2]*box[3])<max_area:
            clean_boxes.append(box)
    if len(clean_boxes)<2:
        return boxes
    return torch.FloatTensor(clean_boxes)


def run_dino(img_path, prompt, box_threshold, text_threshold, model_size, maxarea=0.7, save_dir="DINO-labels"):
    #choose swinb or swint
    if model_size == 'swint':
        config_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\groundingdino\config\GroundingDINO_SwinT_OGC.py"
        checkpoint_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\weights\groundingdino_swint_ogc.pth"
    elif model_size == 'swinb':
        checkpoint_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\weights\groundingdino_swinb_cogcoor.pth"
        config_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\groundingdino\config\GroundingDINO_SwinB_cfg.py"

    model = load_model(config_path, checkpoint_path)

    image_source, image = load_image(img_path)

    boxes, accuracy, obj_name = predict(model = model, image = image, caption = prompt, box_threshold = box_threshold, text_threshold = text_threshold)

    #print(boxes, accuracy, obj_name)
    #Convert boxes from YOLOv8 format to xyxy
    img_height, img_width = cv2.imread(img_path).shape[:2]
    clean_boxes = clean_labels(boxes, maxarea)
    absolute_boxes = [[(box[0]-(box[2]/2))*img_width,
                       (box[1]-(box[3]/2))*img_height,
                       (box[0]+(box[2]/2))*img_width,
                       (box[1]+(box[3]/2))*img_height] for box in clean_boxes.tolist()]
    #annotated_frame = annotate(image_source=image_source, boxes=clean_boxes, logits=accuracy, phrases=obj_name)
    #sv.plot_image(annotated_frame, (16,16))
    save_labels = True
    if save_labels:
        clean_boxes = clean_boxes.tolist()

        for x in clean_boxes:
            x.insert(0,0)

        with open(f'{save_dir}/{os.path.splitext(os.path.basename(img_path))[0]}.txt', 'w', newline='') as csvfile:
            writer = csv.writer(csvfile, delimiter=' ')
            writer.writerows(clean_boxes)
            #print("Labels saved in /DINO-labels")

    return absolute_boxes

def load_dino_model(model_size):
    #choose swinb or swint
    if model_size == 'swint':
        config_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\groundingdino\config\GroundingDINO_SwinT_OGC.py"
        checkpoint_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\weights\groundingdino_swint_ogc.pth"
    elif model_size == 'swinb':
        checkpoint_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\weights\groundingdino_swinb_cogcoor.pth"
        config_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\groundingdino\config\GroundingDINO_SwinB_cfg.py"

    model = load_model(config_path, checkpoint_path)
    return model
def run_dino_from_model(model, img_path, prompt, box_threshold, text_threshold, maxarea=0.7, save_dir="DINO-labels"):
    image_source, image = load_image(img_path)
    boxes, accuracy, obj_name = predict(model = model, image = image, caption = prompt, box_threshold = box_threshold, text_threshold = text_threshold)

    #print(boxes, accuracy, obj_name)
    #Convert boxes from YOLOv8 format to xyxy
    img_height, img_width = cv2.imread(img_path).shape[:2]
    clean_boxes = clean_labels(boxes, maxarea)
    absolute_boxes = [[(box[0]-(box[2]/2))*img_width,
                       (box[1]-(box[3]/2))*img_height,
                       (box[0]+(box[2]/2))*img_width,
                       (box[1]+(box[3]/2))*img_height] for box in clean_boxes.tolist()]
    #annotated_frame = annotate(image_source=image_source, boxes=clean_boxes, logits=accuracy, phrases=obj_name)
    #sv.plot_image(annotated_frame, (16,16))
    save_labels = True
    if save_labels:
        clean_boxes = clean_boxes.tolist()

        for x in clean_boxes:
            x.insert(0,0)

        with open(f'{save_dir}/{os.path.splitext(os.path.basename(img_path))[0]}.txt', 'w', newline='') as csvfile:
            writer = csv.writer(csvfile, delimiter=' ')
            writer.writerows(clean_boxes)
            #print("Labels saved in /DINO-labels")
    return absolute_boxes


In [11]:
from PIL import Image, ImageDraw
import numpy as np

def calculate_metrics(TP, FP, FN, TN):
    precision = TP / (TP + FP) if TP + FP > 0 else 0
    recall = TP / (TP + FN) if TP + FN > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    mcc = ((TP * TN) - (FP * FN)) / np.sqrt((TP+FP) * (TP+FN) * (TN+FP) * (TN+FN)) if np.sqrt((TP+FP) * (TP+FN) * (TN+FP) * (TN+FN)) > 0 else 0
    specificity = TN / (TN + FP) if TN + FP > 0 else 0
    return precision, recall, f1, mcc, specificity

def pixel_accuracy(predicted, ground_truth):
    correct = np.sum(predicted == ground_truth)
    total = predicted.shape[0] * predicted.shape[1]
    return correct / total

def read_and_draw_boxes(file_path, image_dim=(1280, 720)):
    boxes = []
    with open(file_path, 'r') as file:
        for line in file:
            class_id, x, y, width, height = map(float, line.strip().split())
            x1 = (x-(width/2))*image_dim[0]
            x2 = (x+(width/2))*image_dim[0]
            y1 = (y-(height/2))*image_dim[1]
            y2 = (y+(height/2))*image_dim[1]
            boxes.append([x1, y1, x2, y2])
    image = Image.new('L', image_dim, 0)
    draw = ImageDraw.Draw(image)
    for box in boxes:
        draw.rectangle(box, fill=255)
        #draw.rectangle([1,1,20,20], fill=255)
    image.save("test.jpg")
    return np.array(image, dtype=np.uint8)

def calculate_pixel_metrics(mask1, mask2):
    """
    Calculate IoU based on pixel values from two masks.
    """
    intersection = np.logical_and(mask1, mask2).sum()
    union = np.logical_or(mask1, mask2).sum()
    if union == 0:
        return 0
    return intersection / union

def process_files(predicted_mask_dir, ground_truth_mask_dir):
    predicted_files = os.listdir(ground_truth_mask_dir)
    metrics = {
        'iou_scores': [],
        'pixel_accuracies': [],
        'precision_scores': [],
        'recall_scores': [],
        'f1_scores': [],
        'mcc_scores': [],
        'specificity_scores': []
    }

    for fname in predicted_files:
        predicted_mask_path = os.path.join(predicted_mask_dir, fname)
        ground_truth_mask_path = os.path.join(ground_truth_mask_dir, os.path.splitext(fname)[0] + '.txt')

        if not os.path.exists(ground_truth_mask_path):
            metrics['iou_scores'].append(0)
            metrics['pixel_accuracies'].append(0)
            metrics['precision_scores'].append(0)
            metrics['recall_scores'].append(0)
            metrics['f1_scores'].append(0)
            metrics['mcc_scores'].append(0)
            metrics['specificity_scores'].append(0)
            continue

        predicted_mask = read_and_draw_boxes(predicted_mask_path)
        ground_truth_mask = read_and_draw_boxes(ground_truth_mask_path)

        COMMON_HEIGHT, COMMON_WIDTH = 1280, 720  # or any other desired size

        predicted_mask = cv2.resize(predicted_mask, (COMMON_WIDTH, COMMON_HEIGHT))

        ground_truth_mask = cv2.resize(ground_truth_mask, (COMMON_WIDTH, COMMON_HEIGHT))

        _, predicted_mask_bin = cv2.threshold(predicted_mask, 127, 255, cv2.THRESH_BINARY)
        _, ground_truth_mask_bin = cv2.threshold(ground_truth_mask, 127, 255, cv2.THRESH_BINARY)

        predicted_mask_bin = predicted_mask_bin / 255
        ground_truth_mask_bin = ground_truth_mask_bin / 255
        TP = np.float64(np.sum(np.logical_and(predicted_mask_bin == 1, ground_truth_mask_bin == 1)))
        TN = np.float64(np.sum(np.logical_and(predicted_mask_bin == 0, ground_truth_mask_bin == 0)))
        FP = np.float64(np.sum(np.logical_and(predicted_mask_bin == 1, ground_truth_mask_bin == 0)))
        FN = np.float64(np.sum(np.logical_and(predicted_mask_bin == 0, ground_truth_mask_bin == 1)))


        intersection = np.logical_and(predicted_mask_bin, ground_truth_mask_bin)
        union = np.logical_or(predicted_mask_bin, ground_truth_mask_bin)
        metrics['iou_scores'].append(np.sum(intersection) / np.sum(union))
        metrics['pixel_accuracies'].append(pixel_accuracy(predicted_mask_bin, ground_truth_mask_bin))
        precision, recall, f1, mcc, specificity = calculate_metrics(TP, FP, FN, TN)
        metrics['precision_scores'].append(precision)
        metrics['recall_scores'].append(recall)
        metrics['f1_scores'].append(f1)
        metrics['mcc_scores'].append(mcc)
        metrics['specificity_scores'].append(specificity)

    return metrics

In [12]:
def optimize_prompts(prompts_file, gt_path, img_dir, save_file):
    inf_path = fr"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\DINO-labels"

    with  open(prompts_file, 'r') as file:
        result_dict = {}
        for x in file:
            result_dict[x.strip()] = {}

    #result_dict = dict.fromkeys(prompts,{})
    for prompt in result_dict.keys():
        print(f'Trying prompt: "{prompt}"')
        for fname in os.listdir(img_dir):
            box_threshold = 0.3
            text_threshold = 0.1
            model_size = 'swint'
            run_dino(os.path.join(img_dir, fname), prompt, box_threshold, text_threshold, model_size)

        metrics = process_files(inf_path, gt_path)

        result_dict[prompt]['iou_scores'] = np.mean(metrics['iou_scores'])

    results = sorted(list(result_dict.items()), key=lambda a:a[1]['iou_scores'], reverse=True)
    print(results)

    with open(save_file, 'w') as output:
        for prompt_stats in results:
            output.write(str(prompt_stats)+'\n')

    return results

In [13]:
def optimize_confidence(prompt, model_size, gt_path, img_dir):
    inf_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\DINO-labels"
    best_iou = 0
    #number of decimal points in confidence
    final_precision = 2
    ubound = 0.9
    lbound = 0.0
    for precision in [x+1 for x in range(final_precision)]:
        for conf in [x/(10**precision) for x in range(int(lbound*(10**precision)),int(ubound*(10**precision)))]:
            for fname in os.listdir(img_dir):
                prompt = prompt
                box_threshold = conf
                text_threshold = 0.01
                model_size = model_size
                boxes = run_dino(os.path.join(img_dir, fname), prompt, box_threshold, text_threshold, model_size)
            metrics = process_files(inf_path, gt_path)
            iou = np.mean(metrics['iou_scores'])
            if iou>best_iou:
                best_iou = iou
                best_conf = conf
            print(f"confidence: {conf}, IOU: {iou} (best: {best_iou})")
        print(f"Best IOU at p{precision} is {best_iou} with confidence = {best_conf}")
        lbound = max(0,best_conf-(1/(10**precision)))
        ubound = min(0.9,best_conf+(1/(10**precision)))
    return best_iou, best_conf



In [14]:
import time as t

def multi_optmize(img_dir, gt_label_dir, model_size, prompts):
    print("Be sure to change the category folders and model size in each function!")
    t.sleep(2)
    start = t.time()
    best_iou = 0
    for prompt in prompts:
        print(f"Trying prompt: '{prompt}'")
        iou, conf = optimize_confidence(prompt, model_size, gt_label_dir, img_dir)
        if iou>best_iou:
            best_iou = iou
            best_conf = conf
            best_prompt = prompt
        print(f"So far: best prompt is '{best_prompt}', conf is {best_conf}, resulting in {best_iou} IOU)")
    print(f"\n\n\n\n\nFinal Result: best prompt is '{best_prompt}', conf is {best_conf}, resulting in {best_iou} IOU)")
    print(f"final time: {t.time()-start}")
    return {"prompt":best_prompt, "conf": best_conf, "iou":best_iou }

# Berries

In [None]:
#Initial prompt picker
prompts_file = r'C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\blueberry-prompts.txt'
ground_truth_dir = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\labels"

results = optimize_prompts(prompts_file,ground_truth_dir, r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\images", 'berry-results-dino.txt')

top10 = [result[0] for result in results][0:10]

In [None]:
print(top10)

In [None]:
#swint
multi_optmize(r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\images", r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\labels", 'swint', top10)

In [None]:
#swinb
multi_optmize(r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\images", r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\labels", 'swinb', top10)

# RedLeaf

In [None]:
from roboflow import Roboflow
rf = Roboflow(api_key="S3eK0rrtbGBZFhsHBOEK")
project = rf.workspace("autoannotate-study").project("red-leaf-bounding-box")
version = project.version(1)
dataset = version.download("yolov8")

In [None]:
#Initial prompt picker
prompts_file = r'C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red_leaf_plant_prompts.txt'
ground_truth_dir = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\labels"

results = optimize_prompts(prompts_file,ground_truth_dir, r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\images", 'redleaf-results-dino.txt')

top10 = [result[0] for result in results][0:10]

In [None]:
#swint
multi_optmize(r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\images", r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\labels", 'swint', top10)

In [None]:
#swinb
multi_optmize(r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\images", r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\labels", 'swinb', top10)

# Fescue

In [None]:
rf = Roboflow(api_key="S3eK0rrtbGBZFhsHBOEK")
project = rf.workspace("autoannotate-study").project("fescue-bounding-box")
version = project.version(1)
dataset = version.download("yolov8")

In [None]:
#Initial prompt picker
prompts_file = r'C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\Fescue_prompts.txt'
ground_truth_dir = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\fescue-bounding-box-1\train\labels"

results = optimize_prompts(prompts_file,ground_truth_dir, r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\fescue-bounding-box-1\train\images", 'fescue-results-dino.txt')

top10 = [result[0] for result in results][0:10]

In [None]:
multi_optmize(
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\fescue-bounding-box-1\train\images",
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\fescue-bounding-box-1\train\labels",
    'swint', top10)

In [None]:
#swinb
multi_optmize(
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\fescue-bounding-box-1\train\images",
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\fescue-bounding-box-1\train\labels",
    'swinb', top10)

# Buds

In [None]:
from roboflow import Roboflow
rf = Roboflow(api_key="S3eK0rrtbGBZFhsHBOEK")
project = rf.workspace("autoannotate-study").project("bounding-buds")
version = project.version(1)
dataset = version.download("yolov8")

In [None]:
#Initial prompt picker
prompts_file = r'C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bud_prompts.txt'
ground_truth_dir = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\labels"

results = optimize_prompts(prompts_file,ground_truth_dir, r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\images", 'bud-results-dino.txt')

top10 = [result[0] for result in results][0:10]

In [None]:
top10

In [None]:
multi_optmize(
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\images",
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\labels",
    'swint', top10)

In [None]:
#swinb
multi_optmize(
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\images",
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\labels",
    'swinb', top10)

In [None]:
detection_dir = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\blueberry-images\train\images"
for fname in os.listdir(detection_dir):
    prompt = 'smooth blueberry'
    box_threshold = 0.360
    text_threshold = 0.01
    model_size = 'swint'
    run_dino(os.path.join(detection_dir, fname), prompt, box_threshold, text_threshold, model_size)


In [None]:
metrics = process_files(r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\DINO-labels", r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\blueberry-images\train\labels_yolo")

print(f"Average IoU: {np.mean(metrics['iou_scores'])}")
print(f"Average Precision: {np.mean(metrics['precision_scores'])}")
print(f"Average Recall: {np.mean(metrics['recall_scores'])}")
print(f"Average F1: {np.mean(metrics['f1_scores'])}")

## Runs

In [39]:
ground_truth_paths = [r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\berries-1\train\images",
         r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-1\train\images",
         r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\fescue-1\train\images",
         r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\buds-1\train\images"]
dino_models = ['swint', 'swinb']
prompts = [
    ['smooth blueberry', 'blueberry'],
    ['A red leaf plant growing among green plants', 'A cluster of red leaves surrounded by green foliage'],
    ['grass spots', 'Fescue grass spots'],
    ['buds emerging', 'developing bud']
]
confidences = [[0.36, 0.4, 0.06, 0.41], [0.36, 0.3, 0.04, 0.34]]
SAM_model = ['sam_b.pt','mobile_sam.pt','sam_l.pt']

In [40]:
from ultralytics import SAM
from pathlib import Path
import time as t

def save_masks(sam_results, output_dir):
    segments = sam_results[0].masks.xyn
    with open(f"{Path(output_dir) / Path(sam_results[0].path).stem}.txt", "w") as f:
        for i in range(len(segments)):
            s = segments[i]
            if len(s) == 0:
                continue
            segment = map(str, segments[i].reshape(-1).tolist())
            f.write(f"0 " + " ".join(segment) + "\n")

In [50]:
dino_models = ['swint', 'swinb']
#dino_models = ['swinb']
sam_models = ['dino_SAM_b', 'dino_Mobile', 'dino_SAM_l']
categories = ['berries','red leaf','fescue','buds']
max_areas = [0.4, 0.4, 0.7, 0.25]
folders = {'berries':'berries-1', 'red leaf':'red-leaf-1','fescue':'fescue-1','buds':'buds-1'}

start = t.time()
for y in range(len(dino_models)):
    dino_model = load_dino_model(dino_models[y])
    for x in range(len(categories)):
        for z in range(len(sam_models)):
            img_path = ground_truth_paths[x]
            model = SAM(SAM_model[z])
            prompt = prompts[x][1]
            conf = confidences[1][x]
            start = t.time()
            for fname in os.listdir(img_path):
                path = img_path+"\\"+fname
                boxes = run_dino_from_model(dino_model, path, prompt, conf, 0.1, max_areas[x])
                #print(len(boxes))
                #print(f'conf{conf}, prompt{prompt}')
                sam_results = model(os.path.join(img_path, fname), model= model, bboxes=boxes, verbose=False)
                save_masks(sam_results, fr"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\Worldly_SAM\trials\{categories[x]}\{dino_models[y]}\{sam_models[z]}")
            print(f"\n\nTime for {categories[x]},{dino_models[y]},{sam_models[z]}: {t.time()-start}")


final text_encoder_type: bert-base-uncased


Time for berries,swint,dino_SAM_b: 5.138290643692017


Time for berries,swint,dino_Mobile: 3.380899667739868


Time for berries,swint,dino_SAM_l: 5.973753929138184


Time for red leaf,swint,dino_SAM_b: 4.351370334625244


Time for red leaf,swint,dino_Mobile: 3.410944938659668


Time for red leaf,swint,dino_SAM_l: 6.352397680282593


Time for fescue,swint,dino_SAM_b: 6.2403199672698975


Time for fescue,swint,dino_Mobile: 5.288557767868042


Time for fescue,swint,dino_SAM_l: 8.47096037864685


Time for buds,swint,dino_SAM_b: 3.9416797161102295


Time for buds,swint,dino_Mobile: 3.03104305267334


Time for buds,swint,dino_SAM_l: 5.605805158615112
final text_encoder_type: bert-base-uncased


Time for berries,swinb,dino_SAM_b: 5.15105938911438


Time for berries,swinb,dino_Mobile: 3.902315378189087


Time for berries,swinb,dino_SAM_l: 6.486317873001099


Time for red leaf,swinb,dino_SAM_b: 4.537626504898071


Time for red leaf,swinb,dino_Mobile:

## Segmentation Evaluation

In [51]:
from PIL import Image, ImageDraw
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt

def read_and_draw_masks(file_path, image_dim=(1280, 720)):
    boxes = []
    with open(file_path, 'r') as file:
        masks = []
        for line in file:
            raw_mask = [float(x) for x in list(line.strip().split())]
            points = []
            for point in range(int((len(raw_mask) - 1) / 2)):
                p1 = int(raw_mask[(2 * point) + 1] * image_dim[0])
                p2 = int(raw_mask[(2 * point) + 2] * image_dim[1])
                points.append([p1, p2])
            masks.append(points)
    canvas = np.zeros((image_dim[1], image_dim[0]), dtype=np.uint8)
    for mask in masks:
        cv2.fillPoly(canvas, np.array([mask], dtype=np.int32), 255)

    #plt.imshow(canvas, cmap='gray')
    #plt.axis('off')
    #plt.show()

    #image = Image.new('L', image_dim, 0)
    #draw = ImageDraw.Draw(image)
    #for box in boxes:
    #draw.rectangle(box, fill=255)
    #draw.rectangle([1,1,20,20], fill=255)
    #image.save("test.jpg")
    return np.array(canvas, dtype=np.uint8)

def process_files_seg(predicted_mask_dir, ground_truth_mask_dir):
    predicted_files = os.listdir(ground_truth_mask_dir)
    metrics = {
        'iou_scores': [],
        'pixel_accuracies': [],
        'precision_scores': [],
        'recall_scores': [],
        'f1_scores': [],
        'mcc_scores': [],
        'specificity_scores': []
    }

    for fname in predicted_files:
        predicted_mask_path = os.path.join(predicted_mask_dir, fname)
        ground_truth_mask_path = os.path.join(ground_truth_mask_dir, os.path.splitext(fname)[0] + '.txt')
        if not os.path.exists(predicted_mask_path):
            metrics['iou_scores'].append(0)
            metrics['pixel_accuracies'].append(0)
            metrics['precision_scores'].append(0)
            metrics['recall_scores'].append(0)
            metrics['f1_scores'].append(0)
            metrics['mcc_scores'].append(0)
            metrics['specificity_scores'].append(0)
            continue

        COMMON_HEIGHT, COMMON_WIDTH = 1280, 720  # or any other desired size

        #print("predicted, ground-truth")
        predicted_mask = read_and_draw_masks(predicted_mask_path)

        ground_truth_mask = read_and_draw_masks(ground_truth_mask_path)

        predicted_mask = cv2.resize(predicted_mask, (COMMON_WIDTH, COMMON_HEIGHT))
        ground_truth_mask = cv2.resize(ground_truth_mask, (COMMON_WIDTH, COMMON_HEIGHT))

        _, predicted_mask_bin = cv2.threshold(predicted_mask, 127, 255, cv2.THRESH_BINARY)
        _, ground_truth_mask_bin = cv2.threshold(ground_truth_mask, 127, 255, cv2.THRESH_BINARY)

        predicted_mask_bin = predicted_mask_bin / 255
        ground_truth_mask_bin = ground_truth_mask_bin / 255
        TP = np.float64(np.sum(np.logical_and(predicted_mask_bin == 1, ground_truth_mask_bin == 1)))
        TN = np.float64(np.sum(np.logical_and(predicted_mask_bin == 0, ground_truth_mask_bin == 0)))
        FP = np.float64(np.sum(np.logical_and(predicted_mask_bin == 1, ground_truth_mask_bin == 0)))
        FN = np.float64(np.sum(np.logical_and(predicted_mask_bin == 0, ground_truth_mask_bin == 1)))

        intersection = np.logical_and(predicted_mask_bin, ground_truth_mask_bin)
        union = np.logical_or(predicted_mask_bin, ground_truth_mask_bin)
        metrics['iou_scores'].append(np.sum(intersection) / np.sum(union))
        metrics['pixel_accuracies'].append(pixel_accuracy(predicted_mask_bin, ground_truth_mask_bin))
        precision, recall, f1, mcc, specificity = calculate_metrics(TP, FP, FN, TN)
        metrics['precision_scores'].append(precision)
        metrics['recall_scores'].append(recall)
        metrics['f1_scores'].append(f1)
        metrics['mcc_scores'].append(mcc)
        metrics['specificity_scores'].append(specificity)

    return metrics

In [53]:
dino_models = ['swint', 'swinb']
sam_models = ['dino_SAM_b', 'dino_Mobile', 'dino_SAM_l']
categories = ['berries','red leaf','fescue','buds']
folders = {'berries':'berries-1', 'red leaf':'red-leaf-1','fescue':'fescue-1','buds':'buds-1'}

for category in categories:
    for dino_model in dino_models:
        for sam_model in sam_models:
            metrics = process_files_seg(fr"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\Worldly_SAM\trials\{category}\{dino_model}\{sam_model}", fr'C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\{folders[category]}\train\labels')
            for score in metrics['iou_scores']:
                print(score)
            print(rf"{category}\{dino_model}\{sam_model}: Mean IOU = {np.mean(metrics['iou_scores'])}")


0.45984186433624635
0.5108952116585704
0.7074639623887788
0.6249239381769502
0.8326453508710557
0.7878131021194605
0.8764237931657928
0.8505839710028191
0.8403943069169851
berries\swint\dino_SAM_b: Mean IOU = 0.721220611181851
0.44528164858608327
0.45849768107147504
0.7852210693756553
0.6126803619466862
0.7978574755472753
0.7525490792877796
0.85728293813514
0.8375980893892869
0.8224400281583365
berries\swint\dino_Mobile: Mean IOU = 0.7077120412775242
0.46908518853982933
0.4482562816448209
0.8146875350192372
0.6271991233944116
0.7446329274301401
0.7880975296759705
0.8846364133406233
0.850275671950379
0.8577502899110939
berries\swint\dino_SAM_l: Mean IOU = 0.7205134401007229
0.465875997129954
0.4828508771929825
0.7016602124151682
0.6261187214611872
0.8847780126849895
0.8664346895074947
0.9432327166504382
0.8505687693898656
0.8410324581810125
berries\swinb\dino_SAM_b: Mean IOU = 0.7402836060681213
0.4033339439457776
0.3746486890319826
0.7154632495860629
0.6121275163678639
0.88407902382335