In [1]:
from groundingdino.util.inference import load_model, load_image, predict, annotate
import cv2
import supervision as sv
import torch
import csv
import os

def clean_labels(boxes, max_area):
    clean_boxes = []
    box_list = boxes.tolist()
    for box in box_list:
        #if width * height < 0.9, add box to list.
        if (box[2]*box[3])<max_area:
            clean_boxes.append(box)
    if len(clean_boxes)<1:
        return boxes
    return torch.FloatTensor(clean_boxes)


def run_dino(img_path, prompt, box_threshold, text_threshold, model_size, maxarea=0.7, save_dir="DINO-labels"):
    #choose swinb or swint
    if model_size == 'swint':
        config_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\groundingdino\config\GroundingDINO_SwinT_OGC.py"
        checkpoint_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\weights\groundingdino_swint_ogc.pth"
    elif model_size == 'swinb':
        checkpoint_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\weights\groundingdino_swinb_cogcoor.pth"
        config_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\groundingdino\config\GroundingDINO_SwinB_cfg.py"

    model = load_model(config_path, checkpoint_path)

    image_source, image = load_image(img_path)

    boxes, accuracy, obj_name = predict(model = model, image = image, caption = prompt, box_threshold = box_threshold, text_threshold = text_threshold)

    #print(boxes, accuracy, obj_name)
    #Convert boxes from YOLOv8 format to xyxy
    img_height, img_width = cv2.imread(img_path).shape[:2]
    clean_boxes = clean_labels(boxes, maxarea)
    absolute_boxes = [[(box[0]-(box[2]/2))*img_width,
                       (box[1]-(box[3]/2))*img_height,
                       (box[0]+(box[2]/2))*img_width,
                       (box[1]+(box[3]/2))*img_height] for box in clean_boxes.tolist()]
    #annotated_frame = annotate(image_source=image_source, boxes=clean_boxes, logits=accuracy, phrases=obj_name)
    #sv.plot_image(annotated_frame, (16,16))
    save_labels = True
    #print(clean_boxes)
    if save_labels:
        clean_boxes = clean_boxes.tolist()

        for x in clean_boxes:
            x.insert(0,0)

        with open(f'{save_dir}/{os.path.splitext(os.path.basename(img_path))[0]}.txt', 'w', newline='') as csvfile:
            writer = csv.writer(csvfile, delimiter=' ')
            writer.writerows(clean_boxes)
            #print("Labels saved in /DINO-labels")

    return absolute_boxes

def load_dino_model(model_size):
    #choose swinb or swint
    if model_size == 'swint':
        config_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\groundingdino\config\GroundingDINO_SwinT_OGC.py"
        checkpoint_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\weights\groundingdino_swint_ogc.pth"
    elif model_size == 'swinb':
        checkpoint_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\weights\groundingdino_swinb_cogcoor.pth"
        config_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\groundingdino\config\GroundingDINO_SwinB_cfg.py"

    model = load_model(config_path, checkpoint_path)
    return model
def run_dino_from_model(model, img_path, prompt, box_threshold, text_threshold, maxarea=0.7, save_dir="DINO-labels"):
    image_source, image = load_image(img_path)
    boxes, accuracy, obj_name = predict(model = model, image = image, caption = prompt, box_threshold = box_threshold, text_threshold = text_threshold)

    #print(boxes, accuracy, obj_name)
    #Convert boxes from YOLOv8 format to xyxy
    img_height, img_width = cv2.imread(img_path).shape[:2]
    clean_boxes = clean_labels(boxes, maxarea)
    absolute_boxes = [[(box[0]-(box[2]/2))*img_width,
                       (box[1]-(box[3]/2))*img_height,
                       (box[0]+(box[2]/2))*img_width,
                       (box[1]+(box[3]/2))*img_height] for box in clean_boxes.tolist()]
    #annotated_frame = annotate(image_source=image_source, boxes=clean_boxes, logits=accuracy, phrases=obj_name)
    #sv.plot_image(annotated_frame, (16,16))
    save_labels = True
    if save_labels:
        clean_boxes = clean_boxes.tolist()

        for x in clean_boxes:
            x.insert(0,0)

        with open(f'{save_dir}/{os.path.splitext(os.path.basename(img_path))[0]}.txt', 'w', newline='') as csvfile:
            writer = csv.writer(csvfile, delimiter=' ')
            writer.writerows(clean_boxes)
            #print("Labels saved in /DINO-labels")
    return absolute_boxes


In [2]:
from PIL import Image, ImageDraw
import numpy as np

def calculate_metrics(TP, FP, FN, TN):
    precision = TP / (TP + FP) if TP + FP > 0 else 0
    recall = TP / (TP + FN) if TP + FN > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    mcc = ((TP * TN) - (FP * FN)) / np.sqrt((TP+FP) * (TP+FN) * (TN+FP) * (TN+FN)) if np.sqrt((TP+FP) * (TP+FN) * (TN+FP) * (TN+FN)) > 0 else 0
    specificity = TN / (TN + FP) if TN + FP > 0 else 0
    return precision, recall, f1, mcc, specificity

def pixel_accuracy(predicted, ground_truth):
    correct = np.sum(predicted == ground_truth)
    total = predicted.shape[0] * predicted.shape[1]
    return correct / total

def read_and_draw_boxes(file_path, image_dim=(1280, 720)):
    boxes = []
    with open(file_path, 'r') as file:
        for line in file:
            class_id, x, y, width, height = map(float, line.strip().split())
            x1 = (x-(width/2))*image_dim[0]
            x2 = (x+(width/2))*image_dim[0]
            y1 = (y-(height/2))*image_dim[1]
            y2 = (y+(height/2))*image_dim[1]
            boxes.append([x1, y1, x2, y2])
    image = Image.new('L', image_dim, 0)
    draw = ImageDraw.Draw(image)
    for box in boxes:
        draw.rectangle(box, fill=255)
        #draw.rectangle([1,1,20,20], fill=255)
    image.save("test.jpg")
    return np.array(image, dtype=np.uint8)

def calculate_pixel_metrics(mask1, mask2):
    """
    Calculate IoU based on pixel values from two masks.
    """
    intersection = np.logical_and(mask1, mask2).sum()
    union = np.logical_or(mask1, mask2).sum()
    if union == 0:
        return 0
    return intersection / union

def process_files(predicted_mask_dir, ground_truth_mask_dir):
    predicted_files = os.listdir(ground_truth_mask_dir)
    metrics = {
        'iou_scores': [],
        'pixel_accuracies': [],
        'precision_scores': [],
        'recall_scores': [],
        'f1_scores': [],
        'mcc_scores': [],
        'specificity_scores': []
    }

    for fname in predicted_files:
        predicted_mask_path = os.path.join(predicted_mask_dir, fname)
        ground_truth_mask_path = os.path.join(ground_truth_mask_dir, os.path.splitext(fname)[0] + '.txt')

        if not os.path.exists(ground_truth_mask_path):
            metrics['iou_scores'].append(0)
            metrics['pixel_accuracies'].append(0)
            metrics['precision_scores'].append(0)
            metrics['recall_scores'].append(0)
            metrics['f1_scores'].append(0)
            metrics['mcc_scores'].append(0)
            metrics['specificity_scores'].append(0)
            continue

        predicted_mask = read_and_draw_boxes(predicted_mask_path)
        ground_truth_mask = read_and_draw_boxes(ground_truth_mask_path)

        COMMON_HEIGHT, COMMON_WIDTH = 1280, 720  # or any other desired size

        predicted_mask = cv2.resize(predicted_mask, (COMMON_WIDTH, COMMON_HEIGHT))

        ground_truth_mask = cv2.resize(ground_truth_mask, (COMMON_WIDTH, COMMON_HEIGHT))

        _, predicted_mask_bin = cv2.threshold(predicted_mask, 127, 255, cv2.THRESH_BINARY)
        _, ground_truth_mask_bin = cv2.threshold(ground_truth_mask, 127, 255, cv2.THRESH_BINARY)

        predicted_mask_bin = predicted_mask_bin / 255
        ground_truth_mask_bin = ground_truth_mask_bin / 255
        TP = np.float64(np.sum(np.logical_and(predicted_mask_bin == 1, ground_truth_mask_bin == 1)))
        TN = np.float64(np.sum(np.logical_and(predicted_mask_bin == 0, ground_truth_mask_bin == 0)))
        FP = np.float64(np.sum(np.logical_and(predicted_mask_bin == 1, ground_truth_mask_bin == 0)))
        FN = np.float64(np.sum(np.logical_and(predicted_mask_bin == 0, ground_truth_mask_bin == 1)))


        intersection = np.logical_and(predicted_mask_bin, ground_truth_mask_bin)
        union = np.logical_or(predicted_mask_bin, ground_truth_mask_bin)
        metrics['iou_scores'].append(np.sum(intersection) / np.sum(union))
        metrics['pixel_accuracies'].append(pixel_accuracy(predicted_mask_bin, ground_truth_mask_bin))
        precision, recall, f1, mcc, specificity = calculate_metrics(TP, FP, FN, TN)
        metrics['precision_scores'].append(precision)
        metrics['recall_scores'].append(recall)
        metrics['f1_scores'].append(f1)
        metrics['mcc_scores'].append(mcc)
        metrics['specificity_scores'].append(specificity)

    return metrics

In [3]:
def optimize_prompts(prompts_file, gt_path, img_dir, save_file):
    inf_path = fr"C:\Users\Mechanized Systems\DataspellProjects\AutoAnnotate\autoannotate study\DINO-labels"

    with  open(prompts_file, 'r') as file:
        result_dict = {}
        for x in file:
            result_dict[x.strip()] = {}

    #result_dict = dict.fromkeys(prompts,{})
    for prompt in result_dict.keys():
        print(f'Trying prompt: "{prompt}"')
        for fname in os.listdir(img_dir):
            box_threshold = 0.3
            text_threshold = 0.1
            model_size = 'swint'
            run_dino(os.path.join(img_dir, fname), prompt, box_threshold, text_threshold, model_size)

        metrics = process_files(inf_path, gt_path)

        result_dict[prompt]['iou_scores'] = np.mean(metrics['iou_scores'])

    results = sorted(list(result_dict.items()), key=lambda a:a[1]['iou_scores'], reverse=True)
    print(results)

    with open(save_file, 'w') as output:
        for prompt_stats in results:
            output.write(str(prompt_stats)+'\n')

    return results

In [4]:
def optimize_confidence(prompt, model_size, gt_path, img_dir):
    inf_path = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\GroundingDINO\DINO-labels"
    best_iou = 0
    #number of decimal points in confidence
    final_precision = 2
    ubound = 0.9
    lbound = 0.0
    for precision in [x+1 for x in range(final_precision)]:
        for conf in [x/(10**precision) for x in range(int(lbound*(10**precision)),int(ubound*(10**precision)))]:
            for fname in os.listdir(img_dir):
                prompt = prompt
                box_threshold = conf
                text_threshold = 0.01
                model_size = model_size
                boxes = run_dino(os.path.join(img_dir, fname), prompt, box_threshold, text_threshold, model_size)
            metrics = process_files(inf_path, gt_path)
            iou = np.mean(metrics['iou_scores'])
            if iou>best_iou:
                best_iou = iou
                best_conf = conf
            print(f"confidence: {conf}, IOU: {iou} (best: {best_iou})")
        print(f"Best IOU at p{precision} is {best_iou} with confidence = {best_conf}")
        lbound = max(0,best_conf-(1/(10**precision)))
        ubound = min(0.9,best_conf+(1/(10**precision)))
    return best_iou, best_conf



In [5]:
import time as t

def multi_optmize(img_dir, gt_label_dir, model_size, prompts):
    print("Be sure to change the category folders and model size in each function!")
    t.sleep(2)
    start = t.time()
    best_iou = 0
    for prompt in prompts:
        print(f"Trying prompt: '{prompt}'")
        iou, conf = optimize_confidence(prompt, model_size, gt_label_dir, img_dir)
        if iou>best_iou:
            best_iou = iou
            best_conf = conf
            best_prompt = prompt
        print(f"So far: best prompt is '{best_prompt}', conf is {best_conf}, resulting in {best_iou} IOU)")
    print(f"\n\n\n\n\nFinal Result: best prompt is '{best_prompt}', conf is {best_conf}, resulting in {best_iou} IOU)")
    print(f"final time: {t.time()-start}")
    return {"prompt":best_prompt, "conf": best_conf, "iou":best_iou }

# Berries

In [6]:
#Initial prompt picker
prompts_file = r'C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\blueberry-prompts.txt'
ground_truth_dir = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\labels"

results = optimize_prompts(prompts_file,ground_truth_dir, r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\images", 'berry-results-dino.txt')

top10 = [result[0] for result in results][0:10]

Trying prompt: "blueberry"




final text_encoder_type: bert-base-uncased




final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
Trying prompt: "a blueberry"
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
Trying prompt: "single blueberry"
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
Trying prompt: "a single blueberry"
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncase

In [7]:
print(top10)

['a single, round  wild blueberry', 'Egg-shaped Blueberry', 'individual blueberry', 'a small blue berry', 'a blueberry', 'Uniform Blueberry', 'a wild blueberry', 'a small blueberry', 'Smooth Blueberry', 'Spherical Blueberry']


In [8]:
#swint
multi_optmize(r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\images", r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\labels", 'swint', top10)

Be sure to change the category folders and model size in each function!
Trying prompt: 'a single, round  wild blueberry'
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
confidence: 0.0, IOU: 0.4742237291400106 (best: 0.4742237291400106)
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
confidence: 0.1, IOU: 0.4742237291400106 (best: 0.4742237291400106)
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_e

{'prompt': 'a single, round  wild blueberry',
 'conf': 0.0,
 'iou': 0.4742237291400106}

In [None]:
#swinb
multi_optmize(r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\images", r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\labels", 'swinb', top10)

# RedLeaf

In [None]:
from roboflow import Roboflow
rf = Roboflow(api_key="S3eK0rrtbGBZFhsHBOEK")
project = rf.workspace("autoannotate-study").project("red-leaf-bounding-box")
version = project.version(1)
dataset = version.download("yolov8")

In [None]:
#Initial prompt picker
prompts_file = r'C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red_leaf_plant_prompts.txt'
ground_truth_dir = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\labels"

results = optimize_prompts(prompts_file,ground_truth_dir, r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\images", 'redleaf-results-dino.txt')

top10 = [result[0] for result in results][0:10]

In [None]:
#swint
multi_optmize(r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\images", r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\labels", 'swint', top10)

In [None]:
#swinb
multi_optmize(r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\images", r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-bounding-box-1\train\labels", 'swinb', top10)

# Fescue

In [None]:
rf = Roboflow(api_key="S3eK0rrtbGBZFhsHBOEK")
project = rf.workspace("autoannotate-study").project("fescue-bounding-box")
version = project.version(1)
dataset = version.download("yolov8")

In [59]:
#Initial prompt picker
prompts_file = r'C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\Fescue_prompts.txt'
ground_truth_dir = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\tiled-fescue-boxes-2\train\labels"

results = optimize_prompts(prompts_file,ground_truth_dir, r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\tiled-fescue-boxes-2\train\images", 'fescue-results-dino.txt')

top10 = [result[0] for result in results][0:10]

Trying prompt: "Fescue grass patches"
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
Trying prompt: "Fescue grass areas"
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type

In [60]:
multi_optmize(
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\tiled-fescue-boxes-2\train\images",
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\tiled-fescue-boxes-2\train\labels",
    'swint', top10)

Be sure to change the category folders and model size in each function!
Trying prompt: 'Fescue patches'
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
confidence: 0.0, IOU: 0.45526446025208434 (best: 0.45526446025208434)
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final tex

{'prompt': 'Fescue patches', 'conf': 0.0, 'iou': 0.45526446025208434}

In [61]:
#swinb
multi_optmize(
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\tiled-fescue-boxes-2\train\images",
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\tiled-fescue-boxes-2\train\labels",
    'swinb', top10)

Be sure to change the category folders and model size in each function!
Trying prompt: 'Fescue patches'
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
confidence: 0.0, IOU: 0.45526446025208434 (best: 0.45526446025208434)
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final tex

KeyboardInterrupt: 

# Buds

In [None]:
from roboflow import Roboflow
rf = Roboflow(api_key="S3eK0rrtbGBZFhsHBOEK")
project = rf.workspace("autoannotate-study").project("bounding-buds")
version = project.version(1)
dataset = version.download("yolov8")

In [None]:
#Initial prompt picker
prompts_file = r'C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bud_prompts.txt'
ground_truth_dir = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\labels"

results = optimize_prompts(prompts_file,ground_truth_dir, r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\images", 'bud-results-dino.txt')

top10 = [result[0] for result in results][0:10]

In [None]:
top10

In [None]:
multi_optmize(
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\images",
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\labels",
    'swint', top10)

In [None]:
#swinb
multi_optmize(
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\images",
    r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-buds-1\train\labels",
    'swinb', top10)

In [10]:
DINO_time = []

In [11]:
detection_dir = r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\bounding-berries\train\images"
save_path = 'berry'
if not os.path.exists(save_path):
    os.makedirs(save_path)

for fname in os.listdir(detection_dir):
    start = t.time()
    prompt = 'smooth blueberry'
    box_threshold = 0.36
    text_threshold = 0.1
    model_size = 'swint'
    run_dino(os.path.join(detection_dir, fname), prompt, box_threshold, text_threshold, model_size, save_dir=save_path)
    total = t.time() - start
    DINO_time.append(total)

final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased
final text_encoder_type: bert-base-uncased


In [68]:
for time in DINO_time:
    print(time)

2.0275115966796875
1.514444351196289
1.524332046508789
1.477658748626709
1.3716561794281006
1.361999750137329
1.250000238418579
1.332000494003296
1.5759999752044678
1.3020000457763672
1.2581989765167236


In [69]:
metrics = process_files(r"C:\Users\Mechanized Systems\DataspellProjects\AutoAnnotate\autoannotate study\fescue", r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\tiled-fescue-boxes-2\train\labels")

print(f"Average IoU: {np.mean(metrics['iou_scores'])}")
print(f"Average Precision: {np.mean(metrics['precision_scores'])}")
print(f"Average Recall: {np.mean(metrics['recall_scores'])}")
print(f"Average F1: {np.mean(metrics['f1_scores'])}")

Average IoU: 0.7287206135453613
Average Precision: 0.8098698568908582
Average Recall: 0.8948370601733348
Average F1: 0.8286134183045462


In [73]:
for iou in metrics['f1_scores']:
    print(iou)

0.790918905976691
0.7550041743465232
0.9895463483913257
0.9873448122578565
0.6187764620427981
0.9843631989706005
0.9826474725406279
0.7264904045725964
0.7987659035314907
0.6822088386527466
0.7986810800667502


## Runs

In [71]:
ground_truth_paths = [r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\berries-1\train\images",
         r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\red-leaf-1\train\images",
         r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\tiled-fescue-seg-1\train\images",
         r"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\buds-1\train\images"]
dino_models = ['swint', 'swinb']
prompts = [
    ['smooth blueberry', 'blueberry'],
    ['A red leaf plant growing among green plants', 'A cluster of red leaves surrounded by green foliage'],
    ['patches of fescue', 'Fescue patches'],
    ['buds emerging', 'developing bud']
]
confidences = [[0.36, 0.4, 0.24, 0.41], [0.36, 0.3, 0.15, 0.34]]
#SAM_model = ['sam_b.pt','sam_l.pt','mobile_sam.pt']
SAM_model = ['sam2_t.pt','sam2_s.pt','sam2_b.pt','sam2_l.pt']

In [72]:
from ultralytics import SAM
from pathlib import Path
import time as t

def save_masks(sam_results, output_dir):
    segments = sam_results[0].masks.xyn
    with open(f"{Path(output_dir) / Path(sam_results[0].path).stem}.txt", "w") as f:
        for i in range(len(segments)):
            s = segments[i]
            if len(s) == 0:
                continue
            segment = map(str, segments[i].reshape(-1).tolist())
            f.write(f"0 " + " ".join(segment) + "\n")

def append_mask(sam_results, output_dir):
    segments = sam_results[0].masks.xyn
    with open(f"{Path(output_dir) / Path(sam_results[0].path).stem}.txt", "a") as f:
        for i in range(len(segments)):
            s = segments[i]
            if len(s) == 0:
                continue
            segment = map(str, segments[i].reshape(-1).tolist())
            f.write(f"0 " + " ".join(segment) + "\n")



In [73]:
SAM_time = {}

In [74]:
sam_masks = []
dino_models = ['swint', 'swinb']
#sam_models = ['dino_SAM_b', 'dino_SAM_l', 'dino_SAM_mobile']
sam_models = ['dino_SAM2_t', 'dino_SAM2_s', 'dino_SAM2_b', 'dino_SAM2_l']
categories = ['berries','red leaf','fescue','buds']
max_areas = [0.4, 0.4, 0.7, 0.25]
folders = {'berries':'berries-1', 'red leaf':'red-leaf-1','fescue':'tiled-fescue-seg-1','buds':'buds-1'}

start = t.time()
for y in range(len(dino_models)):
    dino_model = load_dino_model(dino_models[y])
    for x in range(len(categories)):
        for z in range(len(sam_models)):
            img_path = ground_truth_paths[x]
            model = SAM(SAM_model[z])
            prompt = prompts[x][y]
            conf = confidences[y][x]
            start = t.time()
            for fname in os.listdir(img_path):
                sam_masks.clear()
                path = img_path+"\\"+fname
                boxes = run_dino_from_model(dino_model, path, prompt, conf, 0.1, max_areas[x])
                #print(len(boxes))
                #print(f'conf{conf}, prompt{prompt}')
                #for box in boxes:
                sam_results = model(os.path.join(img_path, fname), model= model, bboxes=boxes)
                save_path = fr"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\Worldly_SAM\trials\{categories[x]}\{dino_models[y]}\{sam_models[z]}"
                if not os.path.exists(save_path):
                    os.makedirs(save_path)
                    #append_mask(sam_results, save_path)
                save_masks(sam_results, fr"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\Worldly_SAM\trials\{categories[x]}\{dino_models[y]}\{sam_models[z]}")
            end = t.time()-start
            print(f"\n\nTime for {categories[x]},{dino_models[y]},{sam_models[z]}: {end}")
            SAM_time[f'{categories[x]},{dino_models[y]},{sam_models[z]}'] = end

final text_encoder_type: bert-base-uncased

image 1/1 C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\berries-1\train\images\IMG_9331_jpg.rf.72224121a63b7fbdd24a1ec1c8e82326.jpg: 1024x1024 46.0ms
Speed: 6.0ms preprocess, 46.0ms inference, 0.0ms postprocess per image at shape (1, 3, 1024, 1024)

image 1/1 C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\berries-1\train\images\IMG_9331_jpg.rf.8cb583bb444b98e1eaf3f2f6d46d2f4c.jpg: 1024x1024 52.0ms
Speed: 8.0ms preprocess, 52.0ms inference, 0.0ms postprocess per image at shape (1, 3, 1024, 1024)

image 1/1 C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\berries-1\train\images\IMG_9355_jpg.rf.4aea93c91e43fdcc0f95c9e9cc5c9499.jpg: 1024x1024 52.0ms
Speed: 6.0ms preprocess, 52.0ms inference, 2.0ms postprocess per image at shape (1, 3, 1024, 1024)

image 1/1 C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\berries-1\train\images\IMG_9379_jp

In [75]:
for key in SAM_time.keys():
    print(f'{key}, {SAM_time[key]}')

berries,swint,dino_SAM2_t, 3.4375805854797363
berries,swint,dino_SAM2_s, 3.1853952407836914
berries,swint,dino_SAM2_b, 3.3895657062530518
berries,swint,dino_SAM2_l, 4.589521646499634
red leaf,swint,dino_SAM2_t, 3.2183563709259033
red leaf,swint,dino_SAM2_s, 3.174992561340332
red leaf,swint,dino_SAM2_b, 3.438707113265991
red leaf,swint,dino_SAM2_l, 4.6417224407196045
fescue,swint,dino_SAM2_t, 3.0607166290283203
fescue,swint,dino_SAM2_s, 3.004314661026001
fescue,swint,dino_SAM2_b, 3.25848650932312
fescue,swint,dino_SAM2_l, 4.251156330108643
buds,swint,dino_SAM2_t, 3.024277687072754
buds,swint,dino_SAM2_s, 3.177898406982422
buds,swint,dino_SAM2_b, 3.371643304824829
buds,swint,dino_SAM2_l, 4.064979314804077
berries,swinb,dino_SAM2_t, 3.965263605117798
berries,swinb,dino_SAM2_s, 3.7914059162139893
berries,swinb,dino_SAM2_b, 4.182908773422241
berries,swinb,dino_SAM2_l, 4.994729042053223
red leaf,swinb,dino_SAM2_t, 3.6935510635375977
red leaf,swinb,dino_SAM2_s, 3.864793300628662
red leaf,swin

## Segmentation Evaluation

In [76]:
from PIL import Image, ImageDraw
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt

def read_and_draw_masks(file_path, image_dim=(1280, 720)):
    with open(file_path, 'r') as file:
        masks = []
        for line in file:
            raw_mask = [float(x) for x in list(line.strip().split())]
            points = []
            for point in range(int((len(raw_mask) - 1) / 2)):
                p1 = int(raw_mask[(2 * point) + 1] * image_dim[0])
                p2 = int(raw_mask[(2 * point) + 2] * image_dim[1])
                points.append([p1, p2])
            masks.append(points)
    canvas = np.zeros((image_dim[1], image_dim[0]), dtype=np.uint8)
    for mask in masks:
        cv2.fillPoly(canvas, np.array([mask], dtype=np.int32), 255)

    #plt.imshow(canvas, cmap='gray')
    #plt.axis('off')
    #plt.show()

    #image = Image.new('L', image_dim, 0)
    #draw = ImageDraw.Draw(image)
    #for box in boxes:
    #draw.rectangle(box, fill=255)
    #draw.rectangle([1,1,20,20], fill=255)
    #image.save("test.jpg")
    return np.array(canvas, dtype=np.uint8)

def process_files_seg(predicted_mask_dir, ground_truth_mask_dir):
    predicted_files = os.listdir(ground_truth_mask_dir)
    metrics = {
        'iou_scores': [],
        'pixel_accuracies': [],
        'precision_scores': [],
        'recall_scores': [],
        'f1_scores': [],
        'mcc_scores': [],
        'specificity_scores': []
    }

    for fname in predicted_files:
        predicted_mask_path = os.path.join(predicted_mask_dir, fname)
        ground_truth_mask_path = os.path.join(ground_truth_mask_dir, os.path.splitext(fname)[0] + '.txt')
        if not os.path.exists(predicted_mask_path):
            metrics['iou_scores'].append(0)
            metrics['pixel_accuracies'].append(0)
            metrics['precision_scores'].append(0)
            metrics['recall_scores'].append(0)
            metrics['f1_scores'].append(0)
            metrics['mcc_scores'].append(0)
            metrics['specificity_scores'].append(0)
            continue

        COMMON_HEIGHT, COMMON_WIDTH = 1280, 720  # or any other desired size

        #print("predicted, ground-truth")
        predicted_mask = read_and_draw_masks(predicted_mask_path)

        ground_truth_mask = read_and_draw_masks(ground_truth_mask_path)

        predicted_mask = cv2.resize(predicted_mask, (COMMON_WIDTH, COMMON_HEIGHT))
        ground_truth_mask = cv2.resize(ground_truth_mask, (COMMON_WIDTH, COMMON_HEIGHT))

        _, predicted_mask_bin = cv2.threshold(predicted_mask, 127, 255, cv2.THRESH_BINARY)
        _, ground_truth_mask_bin = cv2.threshold(ground_truth_mask, 127, 255, cv2.THRESH_BINARY)

        predicted_mask_bin = predicted_mask_bin / 255
        ground_truth_mask_bin = ground_truth_mask_bin / 255
        TP = np.float64(np.sum(np.logical_and(predicted_mask_bin == 1, ground_truth_mask_bin == 1)))
        TN = np.float64(np.sum(np.logical_and(predicted_mask_bin == 0, ground_truth_mask_bin == 0)))
        FP = np.float64(np.sum(np.logical_and(predicted_mask_bin == 1, ground_truth_mask_bin == 0)))
        FN = np.float64(np.sum(np.logical_and(predicted_mask_bin == 0, ground_truth_mask_bin == 1)))

        intersection = np.logical_and(predicted_mask_bin, ground_truth_mask_bin)
        union = np.logical_or(predicted_mask_bin, ground_truth_mask_bin)
        metrics['iou_scores'].append(np.sum(intersection) / np.sum(union))
        metrics['pixel_accuracies'].append(pixel_accuracy(predicted_mask_bin, ground_truth_mask_bin))
        precision, recall, f1, mcc, specificity = calculate_metrics(TP, FP, FN, TN)
        metrics['precision_scores'].append(precision)
        metrics['recall_scores'].append(recall)
        metrics['f1_scores'].append(f1)
        metrics['mcc_scores'].append(mcc)
        metrics['specificity_scores'].append(specificity)

    return metrics

In [77]:
dino_models = ['swint', 'swinb']
sam_models = ['dino_SAM2_t', 'dino_SAM2_s', 'dino_SAM2_b', 'dino_SAM2_l']
categories = ['berries','red leaf','fescue','buds']
folders = {'berries':'berries-1', 'red leaf':'red-leaf-1','fescue':'tiled-fescue-seg-1','buds':'buds-1'}

for category in categories:
    for dino_model in dino_models:
        for sam_model in sam_models:
            metrics = process_files_seg(fr"C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\Worldly_SAM\trials\{category}\{dino_model}\{sam_model}", fr'C:\Users\Mechanized Systems\DataspellProjects\WSU_joint_data\Auto Annotate\{folders[category]}\train\labels')
            for score in metrics['iou_scores']:
                print(score)
            print(rf"{category}\{dino_model}\{sam_model}: Mean IOU = {np.mean(metrics['iou_scores'])}")


0.5115476506503849
0.4480789200415369
0.5537422622397299
0.617779647773036
0.8279157315572288
0.9265829951873171
0.9226847918436704
0.8397439528692007
0.8828280652630446
berries\swint\dino_SAM2_t: Mean IOU = 0.7256560019361277
0.5165019829266653
0.4579594747162636
0.5637276740512738
0.6169852407452214
0.8301024544851477
0.9360275809231948
0.9429261226460647
0.8445884492951341
0.8399691187823977
berries\swint\dino_SAM2_s: Mean IOU = 0.7276431220634847
0.49922440537745605
0.44786019148690737
0.55714055448098
0.6178866658679765
0.8370002192822162
0.9247149722039009
0.923627460212826
0.8362170334285234
0.8398804672643303
berries\swint\dino_SAM2_b: Mean IOU = 0.7203946632894574
0.5186281102891729
0.45337714922833167
0.5648185565343894
0.6254404082128538
0.8371889367291834
0.9409380428488708
0.9403979907264297
0.8458960522578813
0.8425686457932097
berries\swint\dino_SAM2_l: Mean IOU = 0.7299170991800359
0.43429774623723
0.41971798953832157
0.6930413660070015
0.6127265065318163
0.898281737113