In [17]:
import glob
import json
import cv2
from pathlib import Path
import pandas as pd

import torch
from sahi import AutoDetectionModel
from ultralytics.utils import metrics

from utils import plot_bboxes, detection_metrics, convert_yolo_to_torch, sliced_inference, load_labels

In [26]:
# Test P&ID Image Dimensions
IMAGE_WIDTH = 2339
IMAGE_HEIGHT = 1653
X1 = 100
X2 = 1779
Y1 = 200
Y2 = 1403
CONFIDENCE_THRESHOLD = 0.75
IOU_THRESHOLD = 0.5

root = Path().resolve().parents[1]
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

HEIGHT = IMAGE_HEIGHT - Y1 - (IMAGE_HEIGHT - Y2)
WIDTH = IMAGE_WIDTH - X1 - (IMAGE_WIDTH - X2)

In [3]:
# Training P&ID Dimensions
IMAGE_WIDTH = 5268
IMAGE_HEIGHT = 4011
X1 = 100
X2 = 1779
Y1 = 200
Y2 = 1403
CONFIDENCE_THRESHOLD = 0.75
IOU_THRESHOLD = 0.5

root = Path().resolve().parents[1]
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [15]:
im_path = root / 'Data' / 'test' / 'fw_system' / 'fw_system-1_cropped.jpg'
im = cv2.imread(im_path)
test_labels = root / 'Data' / 'test' / 'fw_system' / 'labels' / '1de1eea3-fw_system-1_cropped.txt'

## Extract GT Images from P&ID using Bounding Boxes

In [30]:
label_tensor = load_labels(test_labels)
label_tensor_sorted = torch.stack(sorted(label_tensor, key=lambda label_tensor: label_tensor[0]))

unique, idx = label_tensor_sorted[:, 0].unique(dim=0, sorted=True, return_inverse=True)

# Get 'ground truth' label by taking first instance of each unique class
unique_indices = torch.cat(
    [
        torch.ones(1), # Base case
        torch.tensor([i for i in range(1, idx.size(0)) if idx[i] != idx[i-1]]) # Sliding window 
    ]
).long()

gt_labels = label_tensor_sorted[unique_indices]
class_ids = gt_labels[:, 0].long()
gt_labels_torch = convert_yolo_to_torch(gt_labels, height=HEIGHT, width=WIDTH, x1=X1, y1=Y1, cropped=False)

# Save Extracted Images
for gt_label, class_id in zip(gt_labels_torch, class_ids):
    
    id = class_id.item()
    x1, x2 = torch.round(gt_label[::2]).int()
    y1, y2 = torch.round(gt_label[1::2]).int()

    write_path = root / 'Data' / 'test' / 'fw_system' / 'labels' / f'class_{id}.png'
    if not cv2.imwrite(write_path, im[y1:y2, x1:x2]):
        print('Failed')


## Extract images from detected Bounding Boxes

In [31]:
from PIDObjectLookupTable import ObjectLookupTable
from PIDDetectionEncoder import ObjectEncoder

In [38]:
model_folder = 'train8'
model_path = f'runs/detect/{model_folder}/weights/best.pt'
im_path = root / 'Data' / 'test' / 'fw_system' / 'fw_system-1_cropped.jpg'
im = cv2.imread(im_path)

model = AutoDetectionModel.from_pretrained(
    model_type='yolov8',
    model_path=model_path,
    confidence_threshold = CONFIDENCE_THRESHOLD,
    device=device
)

results = sliced_inference(model, img_path=str(im_path), slice_height=256, slice_width=256, h_ratio=0.2, w_ratio=0.2) 
results.export_visuals(export_dir=root, hide_labels=True)
pred_bboxes = torch.tensor([x.bbox.to_xyxy() for x in results.object_prediction_list])

for i, detection_bbox in enumerate(pred_bboxes):
    
    x1, x2 = torch.round(detection_bbox[::2]).int()
    y1, y2 = torch.round(detection_bbox[1::2]).int()

    write_path = root / 'Data' / 'test' / 'fw_system' / 'predictions' / f'{model_folder}' /f'prediction_{i+1}.png'
    if not cv2.imwrite(write_path, im[y1:y2, x1:x2]):
        print('Failed')

Performing prediction on 48 slices.


## Ground Truth Image Embeddings

In [45]:
# GT Class Images
gt_imgs_path = root / 'Data' / 'test' / 'fw_system' / 'labels' / '*.png'
gt_imgs = glob.glob(str(gt_imgs_path))

# Detection Images
imgs_path = root / 'Data' / 'test' / 'fw_system' / 'predictions' / model_folder / '*.png'
prediction_imgs = glob.glob(str(imgs_path))

# Class ID to Labels
class_labels = root / 'Data' / 'test' / 'fw_system' / 'labels' / 'notes.json'
with open(class_labels) as json_data:
    d = json.load(json_data)

# Translate Class ID to labels
gt_indices = [0, 1, 3, 6, 7, 9, 11, 12, 16, 18, 21, 23, 24, 25, 27, 28, 30, 31, 32, 39]
labels = [d['categories'][i] for i in gt_indices]

# Pre-Trained Encoder Path
encoder_pth = root / 'nyu-capstone-2024-PIDGraph' / 'Object Detection'/ 'models' / 'encoder_20250414.pth'

In [46]:
labels

[{'id': 0, 'name': 'Ball Valve N.C.'},
 {'id': 1, 'name': 'Ball Valve N.O.'},
 {'id': 3, 'name': 'Centrifugal Pump'},
 {'id': 6, 'name': 'Continuation Arrow'},
 {'id': 7, 'name': 'Control Measurement Point'},
 {'id': 9, 'name': 'Coupling'},
 {'id': 11, 'name': 'Flange'},
 {'id': 12, 'name': 'Flow Arrow'},
 {'id': 16, 'name': 'Gate Valve N.O. #2'},
 {'id': 18, 'name': 'Gate Valve Pneumatic'},
 {'id': 21, 'name': 'Globe Valve N.O.'},
 {'id': 23, 'name': 'Hose Bib'},
 {'id': 24, 'name': 'Inline Flow Meter'},
 {'id': 25, 'name': 'Inline Indicator'},
 {'id': 27, 'name': 'Motor'},
 {'id': 28, 'name': 'Needle Valve'},
 {'id': 30, 'name': 'PLC'},
 {'id': 31, 'name': 'Pressure Vessel'},
 {'id': 32, 'name': 'Reducer'},
 {'id': 39, 'name': 'Swinging Disk Check Valve'}]

In [9]:
from PIDDetectionEncoder import LeNet

## Prediction Images Embeddings & Matching

In [10]:
encoder = torch.load(encoder_pth)
lookup_tables = ObjectLookupTable(labels, gt_imgs, encoder, img_size=224)
max_sim, match_class = lookup_tables.classify(prediction_imgs)



## Atlas Embedding Map of Training Data Points

In [47]:
from nomic import atlas
import numpy as np

gt_imgs_path = root / 'Data' / 'SimCLR' / '*.png'
gt_imgs = glob.glob(str(gt_imgs_path))

encoder = torch.load(encoder_pth)
lookup_tables = ObjectLookupTable(labels, gt_imgs, encoder, img_size=224)

ModuleNotFoundError: No module named 'nomic'

In [12]:
embeddings_np = lookup_tables.gt_embeddings.cpu().numpy()

In [13]:
class_labels = root / 'nyu-capstone-2024-PIDGraph' / 'Object Detection' / 'classes_general.json'
with open(class_labels) as json_data:
    d = json.load(json_data)

In [14]:
import re
img_classes = [
    str(int(re.findall('(\d+)(?:_[a-z])?\.png', path)[0]))
    for path in gt_imgs
    ]
text_labels = [d[i]['class_name'] for i in img_classes]

KeyError: '0'

In [15]:
data = [
    {
        "id": f"img_{i}",
        "label": text_labels[i],
        "image_path": img_path
    }
    for i, img_path in enumerate(gt_imgs)
]

NameError: name 'text_labels' is not defined

In [16]:
atlas.map_data(
    data=data,
    blobs=gt_imgs,
    embeddings=embeddings_np,
    identifier='P&ID Object Embeddings (Resnet18 Backbone, Low Temp)'
)

NameError: name 'atlas' is not defined

In [17]:
from PIL import Image

def calculate_mean_std(image_paths, target_size=(224, 224)):
    # Initialize lists to store pixel values for each channel
    red_values = []
    green_values = []
    blue_values = []

    # Loop through all image paths
    for img_path in image_paths:
        # Open the image
        img = Image.open(img_path)
        img = img.convert("RGB")  # Ensure the image is in RGB mode
        
        # Resize the image to the target size
        img = img.resize(target_size)
        
        # Convert the image to a numpy array
        img_array = np.array(img)

        # Append pixel values of each channel
        red_values.append(img_array[:, :, 0])
        green_values.append(img_array[:, :, 1])
        blue_values.append(img_array[:, :, 2])

    # Convert lists to numpy arrays
    red_values = np.concatenate(red_values)
    green_values = np.concatenate(green_values)
    blue_values = np.concatenate(blue_values)

    # Calculate mean and std for each channel
    red_mean, red_std = np.mean(red_values), np.std(red_values)
    green_mean, green_std = np.mean(green_values), np.std(green_values)
    blue_mean, blue_std = np.mean(blue_values), np.std(blue_values)

    return (red_mean, red_std), (green_mean, green_std), (blue_mean, blue_std)

In [18]:
stats = calculate_mean_std(gt_imgs)

NameError: name 'np' is not defined

In [19]:
stats

NameError: name 'stats' is not defined

## Roestta Stone between Class / Labels 

In [20]:
detection_ids = []

from collections import namedtuple
Classification = namedtuple('Classification', ['id', 'name'])

for _match in match_class:
    id = lookup_tables.class_labels[_match.item()]['id']
    name = lookup_tables.class_labels[_match.item()]['name']
    c = Classification(id, name)
    detection_ids.append(c)

In [22]:
json_labels = root / 'Data' / 'test' / 'fw_system' / 'labels' / 'notes.json'

with open(json_labels) as jl:
    labels_conversion = json.load(jl)

labels_adjusted = df[0]

classes = labels_adjusted.apply(lambda row: labels_conversion['categories'][row]['name'])
classes


NameError: name 'df' is not defined

In [23]:
import matplotlib.pyplot as plt
from collections import Counter
class_counts_detection = Counter([item.name for item in detection_ids])

# Extract class names and counts from the 'classes' series
class_counts_series = Counter(classes)

# Extract class names and counts (the union of keys)
names = list(set(class_counts_detection.keys()).union(class_counts_series.keys()))

# Get counts for each class from both sources, defaulting to 0 if not found
detection_values = [class_counts_detection.get(name, 0) for name in names]
series_values = [class_counts_series.get(name, 0) for name in names]

# Plotting the stacked bar chart
plt.figure(figsize=(10, 6))
plt.bar(names, detection_values, color='skyblue', label='Matched Classes')
plt.bar(names, series_values, bottom=detection_values, color='orange', label='Ground Truth Classes')

# Adding labels and title
plt.xlabel('Class Name')
plt.ylabel('Frequency')
plt.title('Matched Classes Distribution vs Ground Truth')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()

# Add legend
plt.legend()

# Show the plot
plt.show()

NameError: name 'classes' is not defined

In [24]:
pred_bboxes

tensor([[ 606.6666,  292.3619,  651.7353,  337.4257],
        [ 606.8184,  222.0527,  651.7449,  266.7242],
        [1137.0739,  868.9871, 1180.5472,  913.4901],
        [ 561.8498,  997.5872,  606.7565, 1042.2794],
        [ 618.9312,  997.6917,  664.0825, 1042.5157],
        [1080.1989,  868.8502, 1123.7912,  914.2368],
        [  75.1580,  953.4601,  120.5877,  998.2258],
        [ 183.2225,  887.7995,  227.2415,  934.7491],
        [ 590.5762,  751.4611,  634.5347,  799.7556],
        [1345.8783,  759.3105, 1389.2747,  804.4855],
        [ 182.8255,  953.0795,  228.0563,  997.8292],
        [1081.1742,  622.6985, 1126.8682,  667.9127],
        [  75.7868,  889.4388,  119.7221,  934.1135],
        [ 562.0662,  906.4601,  605.8758,  950.6668],
        [ 524.4335,  199.3483,  569.5709,  243.7855],
        [ 682.2962,  744.0904,  726.2284,  789.0168],
        [1177.3525,  622.9869, 1222.2386,  667.9508],
        [ 523.9020,  306.6059,  568.3209,  351.3579],
        [ 709.4512,  291.834

In [None]:
def match_boxes(pred_boxes, gt_boxes, iou_threshold=0.1):
    """
    Match predicted bounding boxes to ground truth boxes based on IoU.

    Args:
        pred_boxes (torch.Tensor): Tensor of shape (N, 4), predicted boxes in [x1, y1, x2, y2] format.
        gt_boxes (torch.Tensor): Tensor of shape (M, 4), ground truth boxes in [x1, y1, x2, y2] format.
        iou_threshold (float): IoU threshold for matching.

    Returns:
        torch.Tensor: Tensor of shape (N,), where each element is the index of the matching ground truth box, or -1 if no match.
    """
    classes = gt_boxes[:, 0]
    ious = metrics.box_iou(pred_boxes, gt_boxes[:, 1:])  # (N, M)

    # Find the best match for each predicted box
    max_iou, max_idx = ious.max(dim=1)  # (N,)

    # Assign matches based on IoU threshold
    matches = max_idx.clone()
    matches[max_iou < iou_threshold] = -1

    return matches, ious

In [None]:
from PIDObjectDetector import PIDPreprocessor
import pandas as pd

labels_path = root / 'Data' / 'test'/ 'fw_system' / 'labels' / '1de1eea3-fw_system-1_cropped.txt'


df = pd.read_csv(labels_path, sep=' ', header=None)
df.head()

preprocessor = PIDPreprocessor(
    root=root,
    width=WIDTH,
    height=HEIGHT,
    num_imgs=1,
    val_start=1,
)

df_torch = df.apply(lambda row: preprocessor.convert_yolo_to_torch(row), axis=1)

In [None]:
gt_tensor = torch.tensor(df_torch[['label', 'x1', 'y1', 'x2', 'y2']].values)
gt_labels = gt_tensor[:, 0]

In [None]:
results, ious = match_boxes(pred_bboxes, gt_tensor)
gt_labels = gt_tensor[:, 0]
gt_labels_filtered = gt_labels[results]

In [None]:
detection_classes = torch.tensor([x.id for x in detection_ids])
perf = torch.stack((gt_labels_filtered, detection_classes))

In [None]:
correct = (perf[0] == perf[1]).sum()
correct_pct = correct / perf.size(1)

print(f"Accuracy: {correct_pct:.2%}")

## Scratch

In [None]:
def pad_detections(detection_img, height, width, tile_size):
        """
        Pad detection images to 640 x 640 pixels for consistent sizing for YOLO inference
        """
        pad_left = (tile_size - width) // 2
        pad_right = tile_size - width - pad_left
        pad_top = (tile_size - height) // 2
        pad_bottom = tile_size - height - pad_top
        
        padded_detection = cv2.copyMakeBorder(
            detection_img,
            pad_top, pad_bottom, pad_left, pad_right,
            cv2.BORDER_CONSTANT,
            value=[255, 255, 255]  # White padding
        )
        return padded_detection

for i, prediction in enumerate(pred_bboxes):
    x1, y1, x2, y2 = prediction.long()
    x1, y1, x2, y2 = x1.item(), y1.item(), x2.item(), y2.item()

    detection = im[y1:y2, x1:x2]
    height, width = (y2 - y1), (x2 - x1)
    padded_detection = pad_detections(detection, height, width, tile_size=256)
    
    write_path = root / 'Data' / 'test' / 'fw_system' / 'predictions' / model_folder / f'prediction_{i}.png'
    if not cv2.imwrite(write_path, padded_detection):
        print('Failed')

In [None]:
for img in gt_imgs:
    object = cv2.imread(img)
    height, width, _ = object.shape
    class_id = re.findall('(\d+).png', img)[0]
    
    padded_detection = pad_detections(object, height, width, tile_size=256)

    name = d['categories'][int(class_id)]['name']

    write_path = root / 'Data' / 'test' / 'fw_system' / 'labels' / f'{name}.png'
    if not cv2.imwrite(write_path, padded_detection):
        print('Failed')

In [None]:
iou = metrics.box_iou(label_tensor_torch.to(device), pred_bboxes.to(device), eps=1e-07)
mean_iou, tp, fp, fn = detection_metrics(iou, iou_threshold=IOU_THRESHOLD)
print(f"Mean IOU: {mean_iou:.3f}")
print(f"Overall Box Recall: {tp / (tp + fn):.3f}")
print(f"Overall Box Precision: {tp /(tp + fp):.3f}")