In [None]:
from pathlib import Path
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import time
from collections import Counter
from sklearn.model_selection import train_test_split

In [None]:
def get_all_files(directory, pattern):
    return [f for f in Path(directory).glob(pattern)]

In [None]:
def id2name(id):
    id = id.lower()
    # id = id.rstrip()
    if id == 'metal_non-ferrous':
        return 'non-ferrous metal'
    elif id == 'metal_ferrous':
        return 'ferrous metal'
    elif id == 'metal_ferrous_steel':
        return 'steel'
    elif id == 'metal_aluminum':
        return 'aluminum'
    elif id in ['other', 'wood', 'plastic']:
        return id
    else:
        raise f"Non-default id! {id}"
        # return 'other'

In [None]:
def show_confusion_matrix(y_true, y_pred, classes=None, normalize=None):
    cm = confusion_matrix(y_true, y_pred, normalize=normalize, labels=classes)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
    disp.plot(xticks_rotation=45)
    plt.tight_layout()
    plt.show()

In [None]:
# Change this to the directory where you downloaded the dataset
input_dir = r"C:\Users\richt\OneDrive - University of Central Florida\Collabs\Shared OneDrive File - Zack\ASME-Hackathon\data\train_new\Fusion360GalleryDataset_23hackathon_train"     
input_jsons = get_all_files(input_dir, "*/assembly.json")


In [1]:
# ... [Your other imports and functions]

assemblies = {}
for input_json in tqdm(input_jsons):
    with open(input_json, "r", encoding="utf-8") as f:
        assembly_data = json.load(f)

    # Extract densities from occurrences
    densities = {}
    if "occurrences" in assembly_data:
        for occ_key, occurrence in assembly_data["occurrences"].items():
            physical_properties = occurrence.get("physical_properties")
            if physical_properties:
                density = physical_properties.get("density")
                if density:
                    for body_key in occurrence["bodies"]:
                        densities[body_key] = density

    bodies = []
    for key, value in assembly_data['bodies'].items():
        name = value['name']
        material = value['material_category']

        # Only consider bodies with custom names
        if name[:4] != 'Body':
            body_data = {'name': name, 'material': material}
            # If density data exists for the body, include it
            if key in densities:
                body_data['density'] = densities[key]
            bodies.append(body_data)

    if len(bodies) > 0:
        assemblies[input_json.parts[-2]] = bodies

# ... [Rest of your code]


NameError: name 'tqdm' is not defined

In [None]:
print(f"Number of total assemblies in the training set: {len(input_jsons)}")
print(f"Number of assemblies that have at least one non-default part name: {len(assemblies)}")

In [None]:
body_list = [assembly_data for assembly_data in assemblies.values()]

train_assemblies, validation_assemblies = train_test_split(body_list, test_size=0.2, shuffle=True, random_state=0)

In [None]:
print(f"number of train assemblies: {len(train_assemblies)}")
print(f"number of validation assemblies: {len(validation_assemblies)}")