In [66]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import torch
import torch.nn.functional as F
import torchvision
from torchvision import models, transforms

from PIL import Image

from tqdm import tqdm

import json
import requests

In [67]:
# Download the ImageNet class index to label mapping
IMAGENET_CLASSES_URL = "https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json"
response = requests.get(IMAGENET_CLASSES_URL)
class_idx = response.json()

# Convert it to the idx_to_class dictionary
idx_to_class = {int(k): v[1] for k, v in class_idx.items()}

In [68]:
def predict_image(image, model, transform, topk=5):
    """
    Predict the top-k categories for an image.
    """
    # If image is a path, then load it; otherwise, assume it's already an array
    if isinstance(image, str):
        image = Image.open(image)
    else:
        image = Image.fromarray(image)
    
    image = transform(image).unsqueeze(0)  # Add batch dimension
    image = image.to(device)
    
    model.eval()
    with torch.no_grad():
        output = model(image)
        probs, indices = output.topk(topk)
        probs = F.softmax(probs, dim=1)[0] * 100
        indices = indices[0]
    
    labels = [idx_to_class[int(idx.item())] for idx in indices]
    #labels = [idx_to_class[idx] for idx in indices]
    return labels, probs.cpu().numpy()


In [69]:
def load_images_from_directory(base_path, limit=None):
    all_images = {}
    assembly_folders = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
    
    if limit:
        assembly_folders = assembly_folders[:limit]
    
    for assembly_id in tqdm(assembly_folders, desc="Loading images"):
        assembly_path = os.path.join(base_path, assembly_id)
        all_images[assembly_id] = {
            'assembly': None,
            'bodies': []
        }
        
        for image_name in os.listdir(assembly_path):
            image_path = os.path.join(assembly_path, image_name)
            
            # Load assembly image
            if image_name == 'assembly.png':
                with Image.open(image_path) as img:
                    all_images[assembly_id]['assembly'] = np.array(img)
            # Load body images
            elif 'bodies' in image_name:
                with Image.open(image_path) as img:
                    all_images[assembly_id]['bodies'].append(np.array(img))
                    
    return all_images

In [70]:
def predict_first_n_assemblies(all_images, model, transform, n=3):
    predictions = {}
    for idx, assembly_id in enumerate(tqdm(list(all_images.keys())[:n], desc="Predicting")):
        predictions[assembly_id] = {
            'assembly': None,
            'bodies': []
        }
        
        # Predict for the assembly image
        assembly_image = all_images[assembly_id]['assembly']
        if assembly_image is not None:
            labels, probabilities = predict_image(assembly_image, model, transform)
            predictions[assembly_id]['assembly'] = (labels, probabilities)
            
        # Predict for the body images
        for body_image in all_images[assembly_id]['bodies']:
            labels, probabilities = predict_image(body_image, model, transform)
            predictions[assembly_id]['bodies'].append((labels, probabilities))
            
    return predictions


In [71]:
base_path = "C:\\Users\\richt\\Documents\\ASME_data\\train\\Fusion360GalleryDataset_23hackathon_train"
all_images = load_images_from_directory(base_path, limit=3)

Loading images: 100%|██████████| 3/3 [00:00<00:00, 55.68it/s]


In [72]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [73]:
model = models.resnet50(pretrained=True)
model.eval() # Set the model to evaluation mode

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [74]:
# Check if CUDA is available and set device accordingly
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [75]:
missing_indices = set(range(1000)) - set(idx_to_class.keys())
print(f"Missing indices: {missing_indices}")


Missing indices: set()


In [76]:
print(613 in idx_to_class)

True


In [77]:
print(model.fc.out_features)

1000


In [78]:
# Now use the function
selected_predictions = predict_first_n_assemblies(all_images, model, transform, n=3)

Predicting: 100%|██████████| 3/3 [00:00<00:00,  3.87it/s]


In [82]:
for image_id, prediction_data in selected_predictions.items():
    print(f"\nPredictions for Image ID: {image_id}")
    
    for image_type, predictions in prediction_data.items():
        print(f"\n  {image_type} Predictions:")
        
        # Check if the predictions are a tuple and have expected length
        if isinstance(predictions, tuple) and len(predictions) == 2:
            labels, probabilities = predictions
            for label, prob in zip(labels, probabilities):
                print(f"    Label: {label}, Probability: {prob:.2f}%")
        else:
            print("    No predictions available for this image type.")



Predictions for Image ID: 100029_94515530

  assembly Predictions:
    Label: joystick, Probability: 25.79%
    Label: potter's_wheel, Probability: 20.68%
    Label: loudspeaker, Probability: 20.30%
    Label: plunger, Probability: 19.61%
    Label: pencil_sharpener, Probability: 13.62%

  bodies Predictions:
    No predictions available for this image type.

Predictions for Image ID: 100106_7f144e5b

  assembly Predictions:
    Label: water_jug, Probability: 32.20%
    Label: pencil_sharpener, Probability: 25.27%
    Label: mouse, Probability: 19.21%
    Label: paper_towel, Probability: 12.30%
    Label: can_opener, Probability: 11.02%

  bodies Predictions:
    No predictions available for this image type.

Predictions for Image ID: 100112_bc0a563a

  assembly Predictions:
    Label: projector, Probability: 53.16%
    Label: vacuum, Probability: 14.59%
    Label: radio, Probability: 12.36%
    Label: spotlight, Probability: 11.85%
    Label: switch, Probability: 8.03%

  bodies Pred