In [20]:
import json
import re
from pathlib import Path
import glob
from PIL import Image
import numpy as np

import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

from nomic import atlas

from PIDDetectionEncoder import ObjectEncoder, PIDObjects, AugmentedViewGenerator, img_augmentation
from PIDObjectLookupTable import ObjectLookupTable

In [4]:
root = Path().resolve().parents[1]
img_dir = root / 'Data' / 'SimCLR'
    
# Image Augmentation pipeline
augmentation_pipeline = img_augmentation(img_size=224, blur_kernel_size=3)
# Generates 2 augmented samples for each img
augmentation_generator = AugmentedViewGenerator(augmentation_pipeline, n_views=8)
# 40 P&ID symbols with 2 randomly augmented views of each
pid_dataset = PIDObjects(img_dir=img_dir, transform=augmentation_generator)

train_dataloader = DataLoader(
    pid_dataset,
    batch_size=108,
    shuffle=True
)



In [5]:
class UnnormalizePIDObjects():

    def __init__(self):
        self.mean = torch.tensor([0.753, 0.753, 0.754]).reshape(-1, 1, 1)
        self.std = torch.tensor([0.333, 0.333, 0.332]).reshape(-1, 1, 1)
   
    def unnormalize(self, tensor):
        unnormalized_tensor = tensor * self.std + self.mean
        return unnormalized_tensor

In [6]:
for batch, labels in train_dataloader:


    to_pil = transforms.ToPILImage()
    unnormalizer = UnnormalizePIDObjects()

    unnormalized_batch = [unnormalizer.unnormalize(x) for x in batch]
     
    pil_imgs = [
        [to_pil(x[i]) for i in range(x.size(0))]
        for x in unnormalized_batch 
    ]

    img_suffix_counter = {}

    for batch in pil_imgs:
        for i, img in enumerate(batch):
            
            label = labels[i].item()
            
            # Init value @ -1 b/c value is offset from 'A' ascii id
            img_suffix_counter[label] = img_suffix_counter.get(label, 64) 

            # Skip past non printable ascii chars (91-96)
            img_suffix_counter[label] = img_suffix_counter[label] + (1 if img_suffix_counter[label] != 90 else 7)
            
            save_path = root / 'Data' / 'SimCLR_augmentations' / f'{labels[i]}_{chr(img_suffix_counter[labels[i].item()])}.png'
            print(save_path)
            img.save(save_path)

C:\Users\Andrew Deur\Documents\NYU\DS-GA 1006 Capstone\Data\SimCLR_augmentations\3_A.png
C:\Users\Andrew Deur\Documents\NYU\DS-GA 1006 Capstone\Data\SimCLR_augmentations\20_A.png
C:\Users\Andrew Deur\Documents\NYU\DS-GA 1006 Capstone\Data\SimCLR_augmentations\7_A.png
C:\Users\Andrew Deur\Documents\NYU\DS-GA 1006 Capstone\Data\SimCLR_augmentations\16_A.png
C:\Users\Andrew Deur\Documents\NYU\DS-GA 1006 Capstone\Data\SimCLR_augmentations\20_B.png
C:\Users\Andrew Deur\Documents\NYU\DS-GA 1006 Capstone\Data\SimCLR_augmentations\1_A.png
C:\Users\Andrew Deur\Documents\NYU\DS-GA 1006 Capstone\Data\SimCLR_augmentations\11_A.png
C:\Users\Andrew Deur\Documents\NYU\DS-GA 1006 Capstone\Data\SimCLR_augmentations\31_A.png
C:\Users\Andrew Deur\Documents\NYU\DS-GA 1006 Capstone\Data\SimCLR_augmentations\30_A.png
C:\Users\Andrew Deur\Documents\NYU\DS-GA 1006 Capstone\Data\SimCLR_augmentations\1_B.png
C:\Users\Andrew Deur\Documents\NYU\DS-GA 1006 Capstone\Data\SimCLR_augmentations\33_A.png
C:\Users\Andre

In [70]:
root = Path().resolve().parents[1]
img_dir = root / 'Data' / 'SimCLR_augmentations' / '*.png'
imgs = glob.glob(str(img_dir))

# Pre-Trained Encoder Path
encoder_pth = root / 'nyu-capstone-2024-PIDGraph' / 'Object Detection'/ 'models' / 'encoder_1.pth'

encoder = torch.load(encoder_pth)
lookup_tables = ObjectLookupTable(labels, imgs, encoder, img_size=224)

  encoder = torch.load(encoder_pth)


In [71]:
embeddings_np = lookup_tables.gt_embeddings.cpu().numpy()

In [72]:
class_labels = root / 'nyu-capstone-2024-PIDGraph' / 'Object Detection' / 'classes_general.json'
with open(class_labels) as json_data:
    d = json.load(json_data)

In [76]:
img_classes = [
    str(int(re.findall('(\d+)(?:_[a-z]+)?\.png', path, re.IGNORECASE)[0]))
    for path in imgs
]

text_labels = [d[i]['class_name'] for i in img_classes]

In [79]:
data = [
    {
        "id": f"img_{i}",
        "label": text_labels[i],
        "image_path": img_path
    }
    for i, img_path in enumerate(imgs)
]

In [80]:
atlas.map_data(
    data=data,
    blobs=imgs,
    embeddings=embeddings_np,
    identifier='P&ID Object Embedding Clustering (Resnet18 Backbone)'
)

[32m2024-12-17 20:19:46.057[0m | [1mINFO    [0m | [36mnomic.dataset[0m:[36m_create_project[0m:[36m867[0m - [1mOrganization name: `ad3254`[0m
[32m2024-12-17 20:19:46.458[0m | [1mINFO    [0m | [36mnomic.dataset[0m:[36m_create_project[0m:[36m895[0m - [1mCreating dataset `pid-object-embedding-clustering-resnet18-backbone`[0m
[32m2024-12-17 20:19:46.799[0m | [1mINFO    [0m | [36mnomic.atlas[0m:[36mmap_data[0m:[36m140[0m - [1mUploading data to Atlas.[0m
Loading images: 100%|██████████| 791/791 [00:00<00:00, 1059.68it/s]
Uploading blobs to Atlas: 792it [00:18, 43.88it/s]                          
[32m2024-12-17 20:20:05.612[0m | [1mINFO    [0m | [36mnomic.atlas[0m:[36mmap_data[0m:[36m158[0m - [1m`ad3254/pid-object-embedding-clustering-resnet18-backbone`: Data upload succeeded to dataset`[0m
[32m2024-12-17 20:20:06.752[0m | [1mINFO    [0m | [36mnomic.dataset[0m:[36mcreate_index[0m:[36m1301[0m - [1mCreated map `P&ID Object Embedding Clu