In [1]:
import openai
from pathlib import Path
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import backoff
import time
from collections import Counter

In [2]:
# openai.api_key  = "open_ai_key"


In [3]:
def get_completion(prompt='', model="gpt-3.5-turbo", temperature=0, max_tokens=200):
    messages = [{"role": "user", "content": prompt}]

    try:
        response = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            temperature=temperature,
            max_tokens=6
        )
        response = response.choices[0].message["content"]
    except openai.error.APIError as e:
        print(f"OpenAI API Error: {e}")
        response = "ERROR"
    except openai.error.APIConnectionError as e:
        print(f"OpenAI API Connection Error: {e}")
        response = "ERROR"
    except Exception as e:
        print(f"Exception: {e}")
        response = "ERROR"
    return response

@backoff.on_exception(backoff.expo, openai.error.RateLimitError)
def get_completion_with_backoff(**kwargs):
    time.sleep(0.1)
    return get_completion(**kwargs)

def get_all_files(directory, pattern):
    return [f for f in Path(directory).glob(pattern)]

def post_process_response(response):
    if 'non-ferrous metal' in response:
        return 'non-ferrous metal'
    elif 'aluminum' in response:
        return 'aluminum'
    elif 'steel' in response:
        return 'steel'
    elif 'wood' in response:
        return 'wood'
    elif 'plastic' in response:
        return 'plastic'
    elif 'ferrous metal' in response:
        return 'ferrous metal'
    elif 'other' in response:
        return 'other'
    else:
        return 'other'

In [4]:
input_dir = r"/mnt/audio_ml/IDETC23_data/Fusion360GalleryDataset_23hackathon_test"
input_jsons = get_all_files(input_dir, "*/assembly.json")

In [5]:
assemblies = {}
for input_json in tqdm(input_jsons):
    with open(input_json, "r", encoding="utf-8") as f:
        assembly_data = json.load(f)

    bodies = []
    for key, value in assembly_data['bodies'].items():
        body_id = key
        name = value['name']

        if name[:4] != 'Body':
            bodies.append({'body_id': body_id, 'name': name})

    if len(bodies) > 0:
        assemblies[input_json.parts[-2]] = bodies

100%|██████████| 10/10 [00:00<00:00, 617.85it/s]


In [6]:
print(f"Number of assemblies in the test set: {len(assemblies)}")

Number of assemblies in the test set: 10


In [9]:
# Deduplicate bodies that have the same name
unique_bodies = []

body_names = set()  # Using a set for O(1) lookup time
for assembly in assemblies.values():
    for body in assembly:
        body_name = body['name']
        if body_name not in body_names:
            body_names.add(body_name)
            unique_bodies.append(body)

In [10]:
# Calculate and print the number of bodies before deduplication
total_bodies_before = sum([len(assembly) for assembly in assemblies.values()])
print(f"Number of bodies before deduplication: {total_bodies_before}")

# Calculate and print the number of bodies after deduplication
total_bodies_after = len(unique_bodies)
print(f"Number of bodies after deduplication: {total_bodies_after}")


Number of bodies before deduplication: 106
Number of bodies after deduplication: 105


In [11]:
prompt = """
The following is the name of a part in an assembly.
Given the name of the part, classify the material of the part as either of the following material categories: 'aluminum', 'ferrous metal', 'non-ferrous metal', 'steel', 'plastic', 'wood', or 'other'.
If you are unsure, please answer 'other'. Only respond with the material category.
"""


In [12]:
# Predict materials for unique body names
unique_body_predictions = {}
for body in tqdm(unique_bodies):
    body_name = body['name']
    full_prompt = prompt + f"part name: {body_name}, material: "
    response = get_completion_with_backoff(prompt=full_prompt)
    processed_response = post_process_response(response)
    unique_body_predictions[body_name] = processed_response

# Construct predictions for each assembly based on results from step 2
assembly_material_predictions = {}
for assembly_id, bodies in tqdm(assemblies.items()):
    material_predictions = []

    for body in bodies:
        body_name = body['name']
        # Look up material from step 2
        material = unique_body_predictions[body_name]  
        material_predictions.append((body['body_id'], material))

    assembly_material_predictions[assembly_id] = material_predictions

100%|██████████| 105/105 [00:39<00:00,  2.65it/s]
100%|██████████| 10/10 [00:00<00:00, 84222.97it/s]


In [13]:
print(f"Predicted materials for the test set:\n\n{assembly_material_predictions}")

Predicted materials for the test set:

{'35584_fb213b6b': [('90c4fe0c-0606-11ec-b78f-0ae0e5d97f29', 'other'), ('90c76eda-0606-11ec-99ac-0ae0e5d97f29', 'other'), ('90cb18c6-0606-11ec-8820-0ae0e5d97f29', 'other'), ('90cb8dbe-0606-11ec-8bc1-0ae0e5d97f29', 'other'), ('90cd8992-0606-11ec-8c0d-0ae0e5d97f29', 'other'), ('90d13322-0606-11ec-b81a-0ae0e5d97f29', 'other'), ('90d18114-0606-11ec-8435-0ae0e5d97f29', 'other'), ('90d6b140-0606-11ec-907a-0ae0e5d97f29', 'other'), ('90d7e99c-0606-11ec-ba9e-0ae0e5d97f29', 'plastic'), ('90d85efa-0606-11ec-a64b-0ae0e5d97f29', 'other'), ('90dbe1b6-0606-11ec-8e32-0ae0e5d97f29', 'other'), ('90de798c-0606-11ec-a816-0ae0e5d97f29', 'other'), ('90deeea8-0606-11ec-8c14-0ae0e5d97f29', 'other')], '21762_90d34d16': [('28d76794-05fe-11ec-9188-02631ee0d5cb', 'plastic'), ('28f1a634-05fe-11ec-9632-02631ee0d5cb', 'other'), ('28facdf4-05fe-11ec-92f4-02631ee0d5cb', 'steel'), ('2901f9e4-05fe-11ec-94ce-02631ee0d5cb', 'other'), ('290ad386-05fe-11ec-a405-02631ee0d5cb', 'steel'),

In [14]:
with open('../submission/submission.csv', 'w') as file:
    file.write("assembly_id,body_id,material_category\n")

    for assembly_id, predictions in assembly_material_predictions.items():
        for body_id, material in predictions:
            file.write(f"{assembly_id},{body_id},{material}\n")