In [1]:
!pip install ultralytics



In [19]:
from ultralytics import YOLO
import os
import cv2
import shutil
import time
from datetime import date
from datetime import datetime

def check_digi(num):
    if num >= 0 and num <= 9:
        num = "0" + str(num)
    else:
        num = str(num)
    return num

def fn_date():
    today = date.today()
    return str(today.year) + check_digi(today.month) + check_digi(today.day)

def fn_time():
    current_dateTime = datetime.now()
    order_time = check_digi(current_dateTime.hour) + check_digi(current_dateTime.minute)
    return str(order_time)

def calculate_iou(box1, box2):
    # Unpack coordinates
    x1_min, y1_min, x1_max, y1_max = box1[:4]
    x2_min, y2_min, x2_max, y2_max = box2[:4]

    # Calculate intersection coordinates
    x_inter_min = max(x1_min, x2_min)
    y_inter_min = max(y1_min, y2_min)
    x_inter_max = min(x1_max, x2_max)
    y_inter_max = min(y1_max, y2_max)

    # Compute the area of intersection
    inter_width = max(0, x_inter_max - x_inter_min)
    inter_height = max(0, y_inter_max - y_inter_min)
    inter_area = inter_width * inter_height

    # Compute the area of each box
    area_box1 = (x1_max - x1_min) * (y1_max - y1_min)
    area_box2 = (x2_max - x2_min) * (y2_max - y2_min)

    # Compute the area of union
    union_area = area_box1 + area_box2 - inter_area

    # Compute the IoU
    iou_value = inter_area / union_area if union_area != 0 else 0
    return iou_value

model = YOLO("best_cake.pt")
input_folder = "input/"

# Loop over all files in the directory
for filename in os.listdir(input_folder):
    # Construct the full path to the image
    file_path = os.path.join(input_folder, filename)

    # Check if the file is an image (optional, but recommended)
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        # Load the image
        image = cv2.imread(file_path)

        # Predict using the model
        results = model.predict(image)
        result = results[0]
        count = len(result.boxes)

        #count_medicine = 0
        for box in result.boxes:
            #count_medicine = count_medicine + 1
            class_id = result.names[box.cls[0].item()]
            #result.names[box.cls[0].item()] = result.names[box.cls[0].item()] + str(count)
            #class_id = result.names[box.cls[0].item()]
            
            cords = box.xyxy[0].tolist()
            cords = [round(x) for x in cords]
            conf = round(box.conf[0].item(), 2)
            
            #print("Object type:", class_id)
            #print("Coordinates:", cords)
            #print("Probability:", conf)
            #print("---")
        
        bounding_boxes = []
        count_medicine = 0
        for box in result.boxes:
            count_medicine = count_medicine + 1
            x1, y1, x2, y2 = [round(x) for x in box.xyxy[0].tolist()]
            class_id = box.cls[0].item()
            prob = round(box.conf[0].item(), 2)
            if prob > 0.60:
                bounding_boxes.append([x1, y1, x2, y2, result.names[class_id], prob])
            #return output
        print("output", bounding_boxes)

        # Track indices of boxes to be removed
        to_remove = set()
        
        # Check all pairs for IoU and mark boxes with lower probability for removal if IoU > 0.70
        for i in range(len(bounding_boxes)):
            for j in range(i + 1, len(bounding_boxes)):
                iou_value = calculate_iou(bounding_boxes[i], bounding_boxes[j])
                if iou_value > 0.70:
                    # Compare probabilities and mark the one with lower probability for removal
                    if bounding_boxes[i][5] > bounding_boxes[j][5]:  # If box i has higher probability
                        to_remove.add(j)
                    else:  # If box j has higher or equal probability
                        to_remove.add(i)
        
        # Filter out boxes marked for removal
        filtered_boxes = [box for idx, box in enumerate(bounding_boxes) if idx not in to_remove]

        # Iterate over each bounding box
        for count_medicine, box in enumerate(filtered_boxes, start=1):
            x1, y1, x2, y2, class_name, confidence = box
            cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)

            # Add label with count and confidence
            label_text = f" {class_name} {count_medicine} ({confidence*100:.0f}%)"
            #cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 2)

            # Get the width and height of the text box
            (text_width, text_height), baseline = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            cv2.rectangle(image, (x1, y1 - text_height - baseline), (x1 + text_width, y1), (255, 0, 0), -1)

            # Add text above the bounding box
            cv2.putText(image, label_text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
   
    name_output = file_path.replace("input", "output")
    name_output = name_output.replace(".", "_" + fn_date() + fn_time() + "_" + str(count_medicine) + ".")
    name_archived = file_path.replace("input", "archived")
    name_archived = name_archived.replace(".", "_" + fn_date() + fn_time() + "_" + str(count_medicine) + ".")
    
    cv2.imwrite(name_output, image)
        
    # Process results list
    ## for result in results:
        ## result.show()
        ## result.save(filename=name_output)  # save to disk

    # Copy the image file
    shutil.copy(file_path, name_archived)
    # delete the image file
    os.remove(file_path)
    
    #cv2.imshow("capsules_"+str(count_medicine) + " (Press \"space bar\" to Exit)", image)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()


0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 320.8ms
Speed: 2.7ms preprocess, 320.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
output [[1208, 441, 1462, 689, 'donut', 1.0], [912, 448, 1147, 682, 'cookie', 1.0], [237, 438, 478, 689, 'candy', 0.99], [1641, 458, 1884, 687, 'cake', 0.98], [496, 434, 708, 692, 'capcake', 0.96]]

0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 321.0ms
Speed: 2.9ms preprocess, 321.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
output [[1271, 419, 1571, 708, 'donut', 1.0], [1638, 466, 1918, 743, 'cookie', 1.0], [187, 320, 479, 584, 'cake', 1.0], [916, 372, 1186, 686, 'candy', 0.99], [567, 332, 823, 641, 'capcake', 0.94]]

0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 318.1ms
Speed: 3.0ms preprocess, 318.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
output [[556, 438, 859, 734, 'donut', 1.0], [199, 461, 485, 739, 'cookie', 1.0], [1590, 420, 1885, 704, 'cake'

In [21]:
# json
from ultralytics import YOLO
import os
import cv2
import shutil
import time
import json
from datetime import date
from datetime import datetime

def check_digi(num):
    if num >= 0 and num <= 9:
        num = "0" + str(num)
    else:
        num = str(num)
    return num

def fn_date():
    today = date.today()
    return str(today.year) + check_digi(today.month) + check_digi(today.day)

def fn_time():
    current_dateTime = datetime.now()
    order_time = check_digi(current_dateTime.hour) + check_digi(current_dateTime.minute)
    return str(order_time)

def calculate_iou(box1, box2):
    # Unpack coordinates
    x1_min, y1_min, x1_max, y1_max = box1[:4]
    x2_min, y2_min, x2_max, y2_max = box2[:4]

    # Calculate intersection coordinates
    x_inter_min = max(x1_min, x2_min)
    y_inter_min = max(y1_min, y2_min)
    x_inter_max = min(x1_max, x2_max)
    y_inter_max = min(y1_max, y2_max)

    # Compute the area of intersection
    inter_width = max(0, x_inter_max - x_inter_min)
    inter_height = max(0, y_inter_max - y_inter_min)
    inter_area = inter_width * inter_height

    # Compute the area of each box
    area_box1 = (x1_max - x1_min) * (y1_max - y1_min)
    area_box2 = (x2_max - x2_min) * (y2_max - y2_min)

    # Compute the area of union
    union_area = area_box1 + area_box2 - inter_area

    # Compute the IoU
    iou_value = inter_area / union_area if union_area != 0 else 0
    return iou_value

model = YOLO("best_cake.pt")
input_folder = "input/"

# Loop over all files in the directory
for filename in os.listdir(input_folder):
    # Construct the full path to the image
    file_path = os.path.join(input_folder, filename)

    # Check if the file is an image
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        # Load the image
        image = cv2.imread(file_path)

        # Prepare JSON output structure
        json_output = {
            "filename": filename,
            "timestamp": f"{fn_date()}_{fn_time()}",
            "objects": []
        }

        # Predict using the model
        results = model.predict(image)
        result = results[0]
        count = len(result.boxes)

        bounding_boxes = []
        count_medicine = 0
        for box in result.boxes:
            count_medicine = count_medicine + 1
            x1, y1, x2, y2 = [round(x) for x in box.xyxy[0].tolist()]
            class_id = box.cls[0].item()
            prob = round(box.conf[0].item(), 2)
            if prob > 0.60:
                bounding_boxes.append([x1, y1, x2, y2, result.names[class_id], prob])

        # Track indices of boxes to be removed
        to_remove = set()
        
        # Check all pairs for IoU and mark boxes with lower probability for removal if IoU > 0.70
        for i in range(len(bounding_boxes)):
            for j in range(i + 1, len(bounding_boxes)):
                iou_value = calculate_iou(bounding_boxes[i], bounding_boxes[j])
                if iou_value > 0.70:
                    # Compare probabilities and mark the one with lower probability for removal
                    if bounding_boxes[i][5] > bounding_boxes[j][5]:
                        to_remove.add(j)
                    else:
                        to_remove.add(i)
        
        # Filter out boxes marked for removal
        filtered_boxes = [box for idx, box in enumerate(bounding_boxes) if idx not in to_remove]

        # Iterate over each bounding box
        for count_medicine, box in enumerate(filtered_boxes, start=1):
            x1, y1, x2, y2, class_name, confidence = box
            
            # Add object to JSON output
            object_info = {
                "id": count_medicine,
                "class": class_name,
                "confidence": confidence,
                "bbox": {
                    "x1": x1,
                    "y1": y1,
                    "x2": x2,
                    "y2": y2
                }
            }
            json_output["objects"].append(object_info)

            # Draw bounding box and label on image
            cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)

            # Add label with count, class name and confidence
            label_text = f"{count_medicine} {class_name} ({confidence*100:.0f}%)"
            (text_width, text_height), baseline = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            cv2.rectangle(image, (x1, y1 - text_height - baseline - 5), (x1 + text_width, y1), (255, 0, 0), -1)
            cv2.putText(image, label_text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

        # Create base output filename
        base_output = file_path.replace("input", "output").replace(".", f"_{fn_date()}{fn_time()}_{str(count_medicine)}.")
        
        # Save image output
        cv2.imwrite(base_output, image)
        
        # Save JSON output
        json_filename = base_output.rsplit(".", 1)[0] + ".json"
        with open(json_filename, 'w') as f:
            json.dump(json_output, f, indent=4)

        # Archive original image
        archived_path = file_path.replace("input", "archived").replace(".", f"_{fn_date()}{fn_time()}_{str(count_medicine)}.")
        shutil.copy(file_path, archived_path)
        
        # Delete original image
        os.remove(file_path)


0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 326.0ms
Speed: 3.0ms preprocess, 326.0ms inference, 4.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 321.6ms
Speed: 0.4ms preprocess, 321.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 330.0ms
Speed: 8.8ms preprocess, 330.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 330.8ms
Speed: 0.0ms preprocess, 330.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cake, 2 candys, 1 capcake, 1 cookie, 1 donut, 335.4ms
Speed: 5.6ms preprocess, 335.4ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 328.6ms
Speed: 7.6ms preprocess, 328.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x6

In [25]:
# objects are organized in the correct order (candy > cake > cookie > donut > cake).
from ultralytics import YOLO
import os
import cv2
import shutil
import time
import json
from datetime import date
from datetime import datetime

def check_digi(num):
    if num >= 0 and num <= 9:
        num = "0" + str(num)
    else:
        num = str(num)
    return num

def fn_date():
    today = date.today()
    return str(today.year) + check_digi(today.month) + check_digi(today.day)

def fn_time():
    current_dateTime = datetime.now()
    order_time = check_digi(current_dateTime.hour) + check_digi(current_dateTime.minute)
    return str(order_time)

def calculate_iou(box1, box2):
    x1_min, y1_min, x1_max, y1_max = box1[:4]
    x2_min, y2_min, x2_max, y2_max = box2[:4]
    x_inter_min = max(x1_min, x2_min)
    y_inter_min = max(y1_min, y2_min)
    x_inter_max = min(x1_max, x2_max)
    y_inter_max = min(y1_max, y2_max)
    inter_width = max(0, x_inter_max - x_inter_min)
    inter_height = max(0, y_inter_max - y_inter_min)
    inter_area = inter_width * inter_height
    area_box1 = (x1_max - x1_min) * (y1_max - y1_min)
    area_box2 = (x2_max - x2_min) * (y2_max - y2_min)
    union_area = area_box1 + area_box2 - inter_area
    iou_value = inter_area / union_area if union_area != 0 else 0
    return iou_value

def check_organization(objects):
    # Define the expected order
    expected_order = ['candy', 'capcake', 'cookie', 'donut', 'cake']
    
    # Extract classes in order from left to right (based on x1 coordinate)
    detected_objects = sorted(objects, key=lambda x: x['bbox']['x1'])
    detected_classes = [obj['class'].lower() for obj in detected_objects]
    
    # Compare with expected order
    is_correct = detected_classes == expected_order
    
    return {
        'is_correct': is_correct,
        'detected_order': detected_classes,
        'expected_order': expected_order
    }

model = YOLO("best_cake.pt")
input_folder = "input/"

for filename in os.listdir(input_folder):
    file_path = os.path.join(input_folder, filename)

    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        image = cv2.imread(file_path)

        json_output = {
            "filename": filename,
            "timestamp": f"{fn_date()}_{fn_time()}",
            "objects": [],
            "organization_check": None
        }

        results = model.predict(image)
        result = results[0]
        count = len(result.boxes)

        bounding_boxes = []
        count_medicine = 0
        for box in result.boxes:
            count_medicine = count_medicine + 1
            x1, y1, x2, y2 = [round(x) for x in box.xyxy[0].tolist()]
            class_id = box.cls[0].item()
            prob = round(box.conf[0].item(), 2)
            if prob > 0.60:
                bounding_boxes.append([x1, y1, x2, y2, result.names[class_id], prob])

        to_remove = set()
        
        for i in range(len(bounding_boxes)):
            for j in range(i + 1, len(bounding_boxes)):
                iou_value = calculate_iou(bounding_boxes[i], bounding_boxes[j])
                if iou_value > 0.70:
                    if bounding_boxes[i][5] > bounding_boxes[j][5]:
                        to_remove.add(j)
                    else:
                        to_remove.add(i)
        
        filtered_boxes = [box for idx, box in enumerate(bounding_boxes) if idx not in to_remove]

        # Background color based on organization status
        bg_color = (255, 0, 0)  # Default blue

        for count_medicine, box in enumerate(filtered_boxes, start=1):
            x1, y1, x2, y2, class_name, confidence = box
            
            object_info = {
                "id": count_medicine,
                "class": class_name,
                "confidence": confidence,
                "bbox": {
                    "x1": x1,
                    "y1": y1,
                    "x2": x2,
                    "y2": y2
                }
            }
            json_output["objects"].append(object_info)

            cv2.rectangle(image, (x1, y1), (x2, y2), bg_color, 2)
            label_text = f"{count_medicine} {class_name} ({confidence*100:.0f}%)"
            (text_width, text_height), baseline = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            cv2.rectangle(image, (x1, y1 - text_height - baseline - 5), (x1 + text_width, y1), bg_color, -1)
            cv2.putText(image, label_text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

        # Check organization and add result to JSON
        organization_result = check_organization(json_output["objects"])
        json_output["organization_check"] = organization_result

        # Add organization status text to image
        status_text = "CORRECT" if organization_result["is_correct"] else "WRONG"
        status_color = (0, 255, 0) if organization_result["is_correct"] else (0, 0, 255)
        cv2.putText(image, f"Organization: {status_text}", (10, 30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, status_color, 2)

        # Save outputs
        base_output = file_path.replace("input", "output").replace(".", f"_{fn_date()}{fn_time()}_{str(count_medicine)}.")
        cv2.imwrite(base_output, image)
        
        json_filename = base_output.rsplit(".", 1)[0] + ".json"
        with open(json_filename, 'w') as f:
            json.dump(json_output, f, indent=4)

        archived_path = file_path.replace("input", "archived").replace(".", f"_{fn_date()}{fn_time()}_{str(count_medicine)}.")
        shutil.copy(file_path, archived_path)
        os.remove(file_path)


0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 320.0ms
Speed: 4.0ms preprocess, 320.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cake, 1 candy, 1 capcake, 2 cookies, 1 donut, 323.6ms
Speed: 3.0ms preprocess, 323.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 317.6ms
Speed: 3.1ms preprocess, 317.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cake, 2 candys, 1 capcake, 1 cookie, 1 donut, 334.1ms
Speed: 3.0ms preprocess, 334.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 314.5ms
Speed: 4.1ms preprocess, 314.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cake, 1 candy, 1 capcake, 1 cookie, 1 donut, 321.1ms
Speed: 3.0ms preprocess, 321.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x