In [2]:
import json
from ultralytics import YOLO

# Load your model
model = YOLO("./models/best-071024-3.pt")
# Make a prediction
results = model.predict(source='./images/NEFT.jpg', save=True, conf=0.25, device=0)


image 1/1 /mnt/d/Sem5/DL/DL-Project/Code/Guru/merge/images/NEFT.jpg: 1216x896 15 boxInputs, 18 lineInputs, 1 signature, 81.4ms
Speed: 8.9ms preprocess, 81.4ms inference, 11.3ms postprocess per image at shape (1, 3, 1216, 896)
Results saved to [1mruns/detect/predict8[0m


In [3]:
import json

# Extract results
detections = []
for result in results:
    for detection in result.boxes:  # Access the detected boxes
        x1, y1, x2, y2 = detection.xyxy[0]  # Bounding box coordinates
        confidence = detection.conf[0]      # Confidence score
        class_id = int(detection.cls[0])    # Class ID
        
        detections.append({
            'bbox': [x1.item(), y1.item(), x2.item(), y2.item()],
            'confidence': confidence.item(),
            'class_id': class_id
        })

# Convert to JSON
json_output = json.dumps(detections, indent=4)

# Save to a file or print it
with open('output.json', 'w') as json_file:
    json_file.write(json_output)

print(json_output)


[
    {
        "bbox": [
            1593.97314453125,
            289.24444580078125,
            2230.648193359375,
            359.25933837890625
        ],
        "confidence": 0.9829572439193726,
        "class_id": 3
    },
    {
        "bbox": [
            420.12554931640625,
            1166.6220703125,
            1178.8392333984375,
            1235.150146484375
        ],
        "confidence": 0.9820379018783569,
        "class_id": 3
    },
    {
        "bbox": [
            949.0014038085938,
            399.6697692871094,
            1390.4024658203125,
            466.61163330078125
        ],
        "confidence": 0.9602988362312317,
        "class_id": 5
    },
    {
        "bbox": [
            414.6068115234375,
            1519.8336181640625,
            1134.2467041015625,
            1588.5987548828125
        ],
        "confidence": 0.9386115670204163,
        "class_id": 3
    },
    {
        "bbox": [
            422.8742370605469,
            1584.0404

In [5]:

# Prepare the JSON output
output_data = []

# Iterate over the results to extract relevant information
for result in results:
    predictions = result.pred[0]  # Assuming predictions are in the first element
    for prediction in predictions:
        # Extracting the relevant fields: box coordinates, confidence, class id
        output_data.append({
            "class_id": int(prediction[5]),  # Assuming class id is at index 5
            "confidence": float(prediction[4]),  # Confidence at index 4
            "box": {
                "x1": float(prediction[0]),  # x1 coordinate
                "y1": float(prediction[1]),  # y1 coordinate
                "x2": float(prediction[2]),  # x2 coordinate
                "y2": float(prediction[3])   # y2 coordinate
            }
        })

# Save the output as a JSON file
with open('predictions.json', 'w') as json_file:
    json.dump(output_data, json_file, indent=4)

print("Predictions saved to predictions.json")


AttributeError: 'Results' object has no attribute 'pred'. See valid attributes below.

    A class for storing and manipulating inference results.

    This class encapsulates the functionality for handling detection, segmentation, pose estimation,
    and classification results from YOLO models.

    Attributes:
        orig_img (numpy.ndarray): Original image as a numpy array.
        orig_shape (Tuple[int, int]): Original image shape in (height, width) format.
        boxes (Boxes | None): Object containing detection bounding boxes.
        masks (Masks | None): Object containing detection masks.
        probs (Probs | None): Object containing class probabilities for classification tasks.
        keypoints (Keypoints | None): Object containing detected keypoints for each object.
        obb (OBB | None): Object containing oriented bounding boxes.
        speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.
        names (Dict[int, str]): Dictionary mapping class IDs to class names.
        path (str): Path to the image file.
        _keys (Tuple[str, ...]): Tuple of attribute names for internal use.

    Methods:
        update: Updates object attributes with new detection results.
        cpu: Returns a copy of the Results object with all tensors on CPU memory.
        numpy: Returns a copy of the Results object with all tensors as numpy arrays.
        cuda: Returns a copy of the Results object with all tensors on GPU memory.
        to: Returns a copy of the Results object with tensors on a specified device and dtype.
        new: Returns a new Results object with the same image, path, and names.
        plot: Plots detection results on an input image, returning an annotated image.
        show: Shows annotated results on screen.
        save: Saves annotated results to file.
        verbose: Returns a log string for each task, detailing detections and classifications.
        save_txt: Saves detection results to a text file.
        save_crop: Saves cropped detection images.
        tojson: Converts detection results to JSON format.

    Examples:
        >>> results = model("path/to/image.jpg")
        >>> for result in results:
        ...     print(result.boxes)  # Print detection boxes
        ...     result.show()  # Display the annotated image
        ...     result.save(filename="result.jpg")  # Save annotated image
    

In [4]:
import cv2
import json
import numpy as np
from ultralytics import YOLO

# Load both models
model_a = YOLO('./models/best-071024-2.pt')
model_b = YOLO('./models/best-071024-3.pt')

# Run inference on the same image
image_path = './images/NEFT.jpg'
results_a = model_a(image_path)
results_b = model_b(image_path)

# Function to extract detections
def extract_detections(results):
    detections = []
    for result in results:
        for detection in result.boxes:
            x1, y1, x2, y2 = detection.xyxy[0]
            confidence = detection.conf[0]
            class_id = int(detection.cls[0])
            detections.append({
                'bbox': [x1.item(), y1.item(), x2.item(), y2.item()],
                'confidence': confidence.item(),
                'class_id': class_id
            })
    return detections

# Extract detections
detections_a = extract_detections(results_a)
detections_b = extract_detections(results_b)

# Combine detections
def combine_detections(detections_a, detections_b, iou_threshold=0.5):
    combined = []
    def iou(box1, box2):
        x1_inter = max(box1[0], box2[0])
        y1_inter = max(box1[1], box2[1])
        x2_inter = min(box1[2], box2[2])
        y2_inter = min(box1[3], box2[3])
        
        inter_area = max(0, x2_inter - x1_inter) * max(0, y2_inter - y1_inter)
        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
        
        return inter_area / (box1_area + box2_area - inter_area)

    for detection in detections_a:
        combined.append(detection)

    for detection_b in detections_b:
        is_conflicted = False
        for detection_a in combined:
            if iou(detection_a['bbox'], detection_b['bbox']) > iou_threshold:
                is_conflicted = True
                if detection_b['confidence'] > detection_a['confidence']:
                    combined.remove(detection_a)
                    combined.append(detection_b)
                break
        if not is_conflicted:
            combined.append(detection_b)

    return combined

final_detections = combine_detections(detections_a, detections_b)

# Load the original image
image = cv2.imread(image_path)

# Function to draw detections
def draw_detections(image, detections):
    for detection in detections:
        bbox = detection['bbox']
        confidence = detection['confidence']
        class_id = detection['class_id']
        
        x1, y1, x2, y2 = map(int, bbox)
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        label = f'Class {class_id}: {confidence:.2f}'
        cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

draw_detections(image, final_detections)

# Save the output image
output_image_path = 'output_image.jpg'
cv2.imwrite(output_image_path, image)

# Display the image (optional)
# cv2.imshow("Detections", image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()



image 1/1 /mnt/d/Sem5/DL/DL-Project/Code/Guru/merge/images/NEFT.jpg: 1216x896 18 boxInputs, 9 lineInputs, 3 signatures, 75.0ms
Speed: 10.8ms preprocess, 75.0ms inference, 10.8ms postprocess per image at shape (1, 3, 1216, 896)

image 1/1 /mnt/d/Sem5/DL/DL-Project/Code/Guru/merge/images/NEFT.jpg: 1216x896 15 boxInputs, 18 lineInputs, 1 signature, 44.2ms
Speed: 9.4ms preprocess, 44.2ms inference, 4.1ms postprocess per image at shape (1, 3, 1216, 896)


True

In [7]:
import cv2
import numpy as np
from scipy.optimize import linear_sum_assignment
from ultralytics import YOLO

# Load both models
model_a = YOLO('./models/best-071024-2.pt')
model_b = YOLO('./models/best-071024-3.pt')

# Run inference on the same image
image_path = './images/NEFT.jpg'
results_a = model_a(image_path)
results_b = model_b(image_path)

# Extract detections
def extract_detections(results):
    detections = []
    for result in results:
        for detection in result.boxes:
            x1, y1, x2, y2 = detection.xyxy[0]
            confidence = detection.conf[0]
            class_id = int(detection.cls[0])
            detections.append({
                'bbox': [x1.item(), y1.item(), x2.item(), y2.item()],
                'confidence': confidence.item(),
                'class_id': class_id
            })
    return detections


# Filter nested detections
def filter_nested_detections(detections):
    filtered_detections = []
    for det in detections:
        x1, y1, x2, y2 = det['bbox']
        is_nested = False
        for other_det in detections:
            if det != other_det:
                ox1, oy1, ox2, oy2 = other_det['bbox']
                if x1 >= ox1 and y1 >= oy1 and x2 <= ox2 and y2 <= oy2:
                    is_nested = True
                    break
        if not is_nested:
            filtered_detections.append(det)
    return filtered_detections

# Extract and filter detections from both models
detections_a = filter_nested_detections(extract_detections(results_a))
detections_b = filter_nested_detections(extract_detections(results_b))
# Create cost matrix
def create_cost_matrix(detections_a, detections_b):
    cost_matrix = np.zeros((len(detections_a), len(detections_b)))
    
    for i, det_a in enumerate(detections_a):
        for j, det_b in enumerate(detections_b):
            # Here you can use distance or other criteria for the cost function
            cost_matrix[i, j] = 1 - min(det_a['confidence'], det_b['confidence'])  # Example cost function
            
    return cost_matrix

# Perform matching using Hungarian Algorithm
def hungarian_matching(cost_matrix):
    row_indices, col_indices = linear_sum_assignment(cost_matrix)
    return list(zip(row_indices, col_indices))

# Create cost matrix
cost_matrix = create_cost_matrix(detections_a, detections_b)

# Perform matching
matches = hungarian_matching(cost_matrix)

# Load the original image
image = cv2.imread(image_path)

# Draw matches
def draw_matches(image, detections_a, detections_b, matches):
    for i, j in matches:
        bbox_a = detections_a[i]['bbox']
        bbox_b = detections_b[j]['bbox']
        
        # Draw bounding box for model A
        x1_a, y1_a, x2_a, y2_a = map(int, bbox_a)
        cv2.rectangle(image, (x1_a, y1_a), (x2_a, y2_a), (0, 255, 0), 2)  # Green box
        
        # Draw bounding box for model B
        x1_b, y1_b, x2_b, y2_b = map(int, bbox_b)
        cv2.rectangle(image, (x1_b, y1_b), (x2_b, y2_b), (255, 0, 0), 2)  # Blue box
        
        # Label with class id and confidence
        label_a = f'A: {detections_a[i]["class_id"]} {detections_a[i]["confidence"]:.2f}'
        label_b = f'B: {detections_b[j]["class_id"]} {detections_b[j]["confidence"]:.2f}'
        
        cv2.putText(image, label_a, (x1_a, y1_a - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.putText(image, label_b, (x1_b, y1_b - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

# Draw the matches on the image
draw_matches(image, detections_a, detections_b, matches)

# Save or display the image
output_image_path = 'output_image.jpg'
cv2.imwrite(output_image_path, image)
# cv2.imshow("Matches", image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()



image 1/1 /mnt/d/Sem5/DL/DL-Project/Code/Guru/merge/images/NEFT.jpg: 1216x896 18 boxInputs, 9 lineInputs, 3 signatures, 79.0ms
Speed: 11.8ms preprocess, 79.0ms inference, 11.4ms postprocess per image at shape (1, 3, 1216, 896)

image 1/1 /mnt/d/Sem5/DL/DL-Project/Code/Guru/merge/images/NEFT.jpg: 1216x896 15 boxInputs, 18 lineInputs, 1 signature, 40.7ms
Speed: 8.2ms preprocess, 40.7ms inference, 2.8ms postprocess per image at shape (1, 3, 1216, 896)


True

In [8]:
import cv2
import numpy as np
from ultralytics import YOLO
from shapely.geometry import box
from shapely.ops import unary_union

# Load models
model_a = YOLO('./models/best-071024-2.pt')
model_b = YOLO('./models/best-071024-3.pt')

def extract_detections(results):
    detections = []
    for result in results:
        for detection in result.boxes:
            x1, y1, x2, y2 = detection.xyxy[0]
            confidence = detection.conf[0]
            class_id = int(detection.cls[0])
            detections.append({
                'bbox': [x1.item(), y1.item(), x2.item(), y2.item()],
                'confidence': confidence.item(),
                'class_id': class_id
            })
    return detections

def calculate_iou(box1, box2):
    # Convert to shapely boxes
    box1_shape = box(box1[0], box1[1], box1[2], box1[3])
    box2_shape = box(box2[0], box2[1], box2[2], box2[3])
    
    if not box1_shape.intersects(box2_shape):
        return 0
    
    intersection = box1_shape.intersection(box2_shape).area
    union = box1_shape.union(box2_shape).area
    
    return intersection / union

def merge_boxes(box1, box2):
    # Convert to shapely boxes
    box1_shape = box(box1[0], box1[1], box1[2], box1[3])
    box2_shape = box(box2[0], box2[1], box2[2], box2[3])
    
    # Merge boxes
    merged_box = unary_union([box1_shape, box2_shape])
    bounds = merged_box.bounds
    
    return list(bounds)

def group_detections(detections_a, detections_b, iou_threshold=0.5):
    all_detections = detections_a + detections_b
    grouped_detections = []
    processed = set()

    for i, det1 in enumerate(all_detections):
        if i in processed:
            continue

        current_group = [det1]
        processed.add(i)

        for j, det2 in enumerate(all_detections):
            if j in processed or i == j:
                continue

            if (det1['class_id'] == det2['class_id'] and 
                calculate_iou(det1['bbox'], det2['bbox']) > iou_threshold):
                current_group.append(det2)
                processed.add(j)

        if len(current_group) > 1:
            # Merge all boxes in the group
            merged_bbox = current_group[0]['bbox']
            max_conf = current_group[0]['confidence']
            
            for det in current_group[1:]:
                merged_bbox = merge_boxes(merged_bbox, det['bbox'])
                max_conf = max(max_conf, det['confidence'])

            grouped_detections.append({
                'bbox': merged_bbox,
                'confidence': max_conf,
                'class_id': current_group[0]['class_id']
            })
        else:
            grouped_detections.append(current_group[0])

    return grouped_detections

def draw_detections(image, detections):
    colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255)]  # Green, Blue, Red
    
    for i, det in enumerate(detections):
        x1, y1, x2, y2 = map(int, det['bbox'])
        color = colors[i % len(colors)]
        
        cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
        label = f"Class {det['class_id']} ({det['confidence']:.2f})"
        cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

def process_image(image_path):
    # Run inference
    results_a = model_a(image_path)
    results_b = model_b(image_path)
    
    # Extract detections
    detections_a = extract_detections(results_a)
    detections_b = extract_detections(results_b)
    
    # Group and merge detections
    final_detections = group_detections(detections_a, detections_b)
    
    # Draw results
    image = cv2.imread(image_path)
    draw_detections(image, final_detections)
    
    return image, final_detections

# Main execution
if __name__ == "__main__":
    image_path = './images/NEFT.jpg'
    output_image, final_detections = process_image(image_path)
    
    # Save the output image
    cv2.imwrite('output_merged_detections.jpg', output_image)
    
    # Print detection results
    for i, det in enumerate(final_detections):
        print(f"Detection {i+1}: Class {det['class_id']}, Confidence: {det['confidence']:.2f}")


image 1/1 /mnt/d/Sem5/DL/DL-Project/Code/Guru/merge/images/NEFT.jpg: 1216x896 18 boxInputs, 9 lineInputs, 3 signatures, 32.9ms
Speed: 6.8ms preprocess, 32.9ms inference, 3.1ms postprocess per image at shape (1, 3, 1216, 896)

image 1/1 /mnt/d/Sem5/DL/DL-Project/Code/Guru/merge/images/NEFT.jpg: 1216x896 15 boxInputs, 18 lineInputs, 1 signature, 33.9ms
Speed: 9.0ms preprocess, 33.9ms inference, 1.8ms postprocess per image at shape (1, 3, 1216, 896)
Detection 1: Class 6, Confidence: 0.99
Detection 2: Class 5, Confidence: 0.98
Detection 3: Class 6, Confidence: 0.97
Detection 4: Class 3, Confidence: 0.98
Detection 5: Class 3, Confidence: 0.98
Detection 6: Class 5, Confidence: 0.96
Detection 7: Class 3, Confidence: 0.96
Detection 8: Class 3, Confidence: 0.95
Detection 9: Class 6, Confidence: 0.94
Detection 10: Class 3, Confidence: 0.93
Detection 11: Class 5, Confidence: 0.92
Detection 12: Class 3, Confidence: 0.93
Detection 13: Class 3, Confidence: 0.93
Detection 14: Class 3, Confidence: 0.