## Prediction of Material Categories of 3D CAD Parts: A Machine Learning Approach
### Hossein Basereh Taramsari

## Phase 1 - Data Cleaning

In [11]:
from pathlib import Path
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import backoff
import time
from collections import Counter
from sklearn.model_selection import train_test_split
import numpy as np

In [12]:
def get_all_files(directory, pattern):
    return [f for f in Path(directory).glob(pattern)]

In [13]:
input_dir = r"C:\Users\Hossein\Desktop\Autodesk Project\Fusion360GalleryDataset_23hackathon_train"     
input_jsons = get_all_files(input_dir, "*/assembly.json")

In [38]:
assemblies = {}

for input_json in tqdm(input_jsons):
    with open(input_json, "r", encoding="utf-8") as f:
        assembly_data = json.load(f)

    bodies = []

    if "bodies" in assembly_data:
        for key, value in assembly_data["bodies"].items():
            if "name" in value and value["name"].startswith("Body"):
                name = value.get("name", "")
                body_type = value.get("type", "")

                physical_properties = value.get("physical_properties", {})
                center_of_mass = physical_properties.get("center_of_mass", {})
                x = center_of_mass.get("x", 0)
                y = center_of_mass.get("y", 0)
                z = center_of_mass.get("z", 0)

                area = physical_properties.get("area", 0)
                volume = physical_properties.get("volume", 0)
                material_category = value.get("material_category", "")

                # Extract information from the "properties" section
                properties = assembly_data.get("properties", {})
                edge_count = properties.get("edge_count", 0)
                face_count = properties.get("face_count", 0)
                loop_count = properties.get("loop_count", 0)
                body_count = properties.get("body_count", 0)
                assembly_area = properties.get("area", 0)
                assembly_volume = properties.get("volume", 0)
                density = properties.get("density", 0)
                mass = properties.get("mass", 0)

                # Extract categories and industries from "properties" section
                categories = properties.get("categories", [])
                industries = properties.get("industries", [])

                bodies.append({
                    "name": name,
                    "type": body_type,
                    "center_of_mass_x": x,
                    "center_of_mass_y": y,
                    "center_of_mass_z": z,
                    "area": area,
                    "volume": volume,
                    "material": material_category,
                    "edge_count": edge_count,
                    "face_count": face_count,
                    "loop_count": loop_count,
                    "body_count": body_count,
                    "assembly_area": assembly_area,
                    "assembly_volume": assembly_volume,
                    "density": density,
                    "mass": mass,
                    "categories": categories,
                    "industries": industries
                })

    if bodies:
        assemblies[input_json.parts[-2]] = bodies


100%|█████████████████████████████████████████████████████████████████████████████| 6336/6336 [00:20<00:00, 306.83it/s]


In [39]:
# Dataframe for assemblies
assemblies_df1 = pd.DataFrame.from_dict(assemblies, orient="index")
assemblies_df1 = assemblies_df1.reset_index()
assemblies_df1 = assemblies_df1.rename(columns={"index": "Assembly"})
assemblies_df1

Unnamed: 0,Assembly,0,1,2,3,4,5,6,7,8,...,296,297,298,299,300,301,302,303,304,305
0,100029_94515530,"{'name': 'Body1', 'type': 'BRepBody', 'center_...","{'name': 'Body4', 'type': 'BRepBody', 'center_...","{'name': 'Body5', 'type': 'BRepBody', 'center_...","{'name': 'Body6', 'type': 'BRepBody', 'center_...","{'name': 'Body7', 'type': 'BRepBody', 'center_...","{'name': 'Body8', 'type': 'BRepBody', 'center_...",,,,...,,,,,,,,,,
1,100106_7f144e5b,"{'name': 'Body1', 'type': 'BRepBody', 'center_...","{'name': 'Body2', 'type': 'BRepBody', 'center_...",,,,,,,,...,,,,,,,,,,
2,100112_bc0a563a,"{'name': 'Body2', 'type': 'BRepBody', 'center_...","{'name': 'Body3', 'type': 'BRepBody', 'center_...","{'name': 'Body4', 'type': 'BRepBody', 'center_...","{'name': 'Body6', 'type': 'BRepBody', 'center_...","{'name': 'Body7', 'type': 'BRepBody', 'center_...","{'name': 'Body8', 'type': 'BRepBody', 'center_...","{'name': 'Body9', 'type': 'BRepBody', 'center_...","{'name': 'Body10', 'type': 'BRepBody', 'center...","{'name': 'Body11', 'type': 'BRepBody', 'center...",...,,,,,,,,,,
3,100126_e58fbfba,"{'name': 'Body1', 'type': 'BRepBody', 'center_...","{'name': 'Body2', 'type': 'BRepBody', 'center_...","{'name': 'Body3', 'type': 'BRepBody', 'center_...",,,,,,,...,,,,,,,,,,
4,100138_119e1068,"{'name': 'Body1', 'type': 'BRepBody', 'center_...","{'name': 'Body3', 'type': 'BRepBody', 'center_...","{'name': 'Body6', 'type': 'BRepBody', 'center_...",,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6031,99833_12f36e36,"{'name': 'Body2', 'type': 'BRepBody', 'center_...","{'name': 'Body10', 'type': 'BRepBody', 'center...",,,,,,,,...,,,,,,,,,,
6032,99842_e4ee42a9,"{'name': 'Body2', 'type': 'BRepBody', 'center_...","{'name': 'Body3', 'type': 'BRepBody', 'center_...","{'name': 'Body4', 'type': 'BRepBody', 'center_...","{'name': 'Body5', 'type': 'BRepBody', 'center_...","{'name': 'Body7', 'type': 'BRepBody', 'center_...","{'name': 'Body8', 'type': 'BRepBody', 'center_...","{'name': 'Body9', 'type': 'BRepBody', 'center_...","{'name': 'Body10', 'type': 'BRepBody', 'center...","{'name': 'Body11', 'type': 'BRepBody', 'center...",...,,,,,,,,,,
6033,99843_deca7822,"{'name': 'Body1', 'type': 'BRepBody', 'center_...","{'name': 'Body3', 'type': 'BRepBody', 'center_...","{'name': 'Body7', 'type': 'BRepBody', 'center_...","{'name': 'Body8', 'type': 'BRepBody', 'center_...","{'name': 'Body10', 'type': 'BRepBody', 'center...","{'name': 'Body13', 'type': 'BRepBody', 'center...","{'name': 'Body14', 'type': 'BRepBody', 'center...","{'name': 'Body38', 'type': 'BRepBody', 'center...","{'name': 'Body15', 'type': 'BRepBody', 'center...",...,,,,,,,,,,
6034,99849_96caf43a,"{'name': 'Body1', 'type': 'BRepBody', 'center_...","{'name': 'Body2', 'type': 'BRepBody', 'center_...","{'name': 'Body3', 'type': 'BRepBody', 'center_...","{'name': 'Body4', 'type': 'BRepBody', 'center_...","{'name': 'Body5', 'type': 'BRepBody', 'center_...","{'name': 'Body6', 'type': 'BRepBody', 'center_...","{'name': 'Body7', 'type': 'BRepBody', 'center_...","{'name': 'Body8', 'type': 'BRepBody', 'center_...","{'name': 'Body9', 'type': 'BRepBody', 'center_...",...,,,,,,,,,,


In [40]:
assemblies_df1.to_csv('assemblies.csv', index=False)

In [49]:
import pandas as pd

# Assuming assemblies_df is the original dataframe

# Initialize an empty list to store rows
new_rows = []

# Iterate over each row in the original dataframe
for _, row in assemblies_df1.iterrows():
    # Iterate over each column and its value
    for column, value in row.items():
        if not pd.isna(value) and value != "":
            new_rows.append({'Column': column, 'Value': value})

# Create a new dataframe from the list of rows
new_df = pd.DataFrame(new_rows)

# Reset the index
new_df = new_df.reset_index(drop=True)

# Save the new dataframe as a CSV if needed
new_df.to_csv('new_assemblies.csv', index=False)

# Print the new dataframe
new_df


Unnamed: 0,Column,Value
0,Assembly,100029_94515530
1,0,"{'name': 'Body1', 'type': 'BRepBody', 'center_..."
2,1,"{'name': 'Body4', 'type': 'BRepBody', 'center_..."
3,2,"{'name': 'Body5', 'type': 'BRepBody', 'center_..."
4,3,"{'name': 'Body6', 'type': 'BRepBody', 'center_..."
...,...,...
123394,10,"{'name': 'Body1', 'type': 'BRepBody', 'center_..."
123395,11,"{'name': 'Body1', 'type': 'BRepBody', 'center_..."
123396,12,"{'name': 'Body2', 'type': 'BRepBody', 'center_..."
123397,13,"{'name': 'Body1', 'type': 'BRepBody', 'center_..."


In [52]:
flat_data = []

for index, row in new_df.iterrows():
    flat_entry = row.to_dict()
    if '0' in flat_entry:
        flat_entry.update(flat_entry['0'])
        del flat_entry['0']
    
    flat_data.append(flat_entry)


df = pd.DataFrame(flat_data)
df

          Column                                              Value
0       Assembly                                    100029_94515530
1              0  {'name': 'Body1', 'type': 'BRepBody', 'center_...
2              1  {'name': 'Body4', 'type': 'BRepBody', 'center_...
3              2  {'name': 'Body5', 'type': 'BRepBody', 'center_...
4              3  {'name': 'Body6', 'type': 'BRepBody', 'center_...
...          ...                                                ...
123394        10  {'name': 'Body1', 'type': 'BRepBody', 'center_...
123395        11  {'name': 'Body1', 'type': 'BRepBody', 'center_...
123396        12  {'name': 'Body2', 'type': 'BRepBody', 'center_...
123397        13  {'name': 'Body1', 'type': 'BRepBody', 'center_...
123398        14  {'name': 'Body2', 'type': 'BRepBody', 'center_...

[123399 rows x 2 columns]


In [54]:
def flatten_dict(d):
    flat_dict = {}
    for key, value in d.items():
        if isinstance(value, dict):
            for sub_key, sub_value in value.items():
                flat_dict[key + '_' + sub_key] = sub_value
        else:
            flat_dict[key] = value
    return flat_dict

# Create a list of flattened dictionaries
flattened_dicts = []
for _, row in df.iterrows():
    if isinstance(row['Value'], dict):
        flattened_dict = flatten_dict(row['Value'])
        flattened_dicts.append(flattened_dict)
    else:
        flattened_dicts.append({'Value': row['Value']})

# DataFrame from the flattened dictionaries
new_df = pd.DataFrame(flattened_dicts)
new_df['Column'] = df['Column']
new_df = new_df[['Column'] + [col for col in new_df.columns if col != 'Column']]
new_df.reset_index(drop=True, inplace=True)
new_df

Unnamed: 0,Column,Value,name,type,center_of_mass_x,center_of_mass_y,center_of_mass_z,area,volume,material,edge_count,face_count,loop_count,body_count,assembly_area,assembly_volume,density,mass,categories,industries
0,Assembly,100029_94515530,,,,,,,,,,,,,,,,,,
1,0,,Body1,BRepBody,9.170593e-16,2.846286,5.060009e-13,1.892846,0.064315,Plastic,507.0,211.0,253.0,6.0,7.652705,0.214475,0.00785,0.001684,[Mechanical Engineering],[Product Design & Manufacturing]
2,1,,Body4,BRepBody,-2.057905e-09,2.423258,-2.936297e-02,0.659531,0.007196,Metal_Ferrous_Steel,507.0,211.0,253.0,6.0,7.652705,0.214475,0.00785,0.001684,[Mechanical Engineering],[Product Design & Manufacturing]
3,2,,Body5,BRepBody,-2.066062e-09,2.421404,6.705135e-01,0.657318,0.007167,Metal_Non-Ferrous,507.0,211.0,253.0,6.0,7.652705,0.214475,0.00785,0.001684,[Mechanical Engineering],[Product Design & Manufacturing]
4,3,,Body6,BRepBody,9.170593e-16,2.846286,7.000000e-01,1.892846,0.064315,Other,507.0,211.0,253.0,6.0,7.652705,0.214475,0.00785,0.001684,[Mechanical Engineering],[Product Design & Manufacturing]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123394,10,,Body1,BRepBody,6.132067e-01,0.000000,-2.670490e-11,1.729151,0.116680,Metal_Ferrous_Steel,5189.0,1902.0,2420.0,54.0,2121.693737,299.830188,0.00785,2.353667,[Mechanical Engineering],[Product Design & Manufacturing]
123395,11,,Body1,BRepBody,0.000000e+00,0.181651,1.007575e-11,0.129364,0.002853,Metal_Ferrous_Steel,5189.0,1902.0,2420.0,54.0,2121.693737,299.830188,0.00785,2.353667,[Mechanical Engineering],[Product Design & Manufacturing]
123396,12,,Body2,BRepBody,-6.182051e-17,-0.011000,8.108617e-12,0.016467,0.000024,Metal_Ferrous_Steel,5189.0,1902.0,2420.0,54.0,2121.693737,299.830188,0.00785,2.353667,[Mechanical Engineering],[Product Design & Manufacturing]
123397,13,,Body1,BRepBody,0.000000e+00,0.150000,1.045680e-11,0.109956,0.002356,Metal_Ferrous_Steel,5189.0,1902.0,2420.0,54.0,2121.693737,299.830188,0.00785,2.353667,[Mechanical Engineering],[Product Design & Manufacturing]


In [57]:
new_df = new_df[~(new_df['Column'] == 'Assembly')]
new_df

Unnamed: 0,Column,Value,name,type,center_of_mass_x,center_of_mass_y,center_of_mass_z,area,volume,material,edge_count,face_count,loop_count,body_count,assembly_area,assembly_volume,density,mass,categories,industries
1,0,,Body1,BRepBody,9.170593e-16,2.846286,5.060009e-13,1.892846,0.064315,Plastic,507.0,211.0,253.0,6.0,7.652705,0.214475,0.00785,0.001684,[Mechanical Engineering],[Product Design & Manufacturing]
2,1,,Body4,BRepBody,-2.057905e-09,2.423258,-2.936297e-02,0.659531,0.007196,Metal_Ferrous_Steel,507.0,211.0,253.0,6.0,7.652705,0.214475,0.00785,0.001684,[Mechanical Engineering],[Product Design & Manufacturing]
3,2,,Body5,BRepBody,-2.066062e-09,2.421404,6.705135e-01,0.657318,0.007167,Metal_Non-Ferrous,507.0,211.0,253.0,6.0,7.652705,0.214475,0.00785,0.001684,[Mechanical Engineering],[Product Design & Manufacturing]
4,3,,Body6,BRepBody,9.170593e-16,2.846286,7.000000e-01,1.892846,0.064315,Other,507.0,211.0,253.0,6.0,7.652705,0.214475,0.00785,0.001684,[Mechanical Engineering],[Product Design & Manufacturing]
5,4,,Body7,BRepBody,9.170593e-16,2.846286,-7.000000e-01,1.892846,0.064315,Plastic,507.0,211.0,253.0,6.0,7.652705,0.214475,0.00785,0.001684,[Mechanical Engineering],[Product Design & Manufacturing]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123394,10,,Body1,BRepBody,6.132067e-01,0.000000,-2.670490e-11,1.729151,0.116680,Metal_Ferrous_Steel,5189.0,1902.0,2420.0,54.0,2121.693737,299.830188,0.00785,2.353667,[Mechanical Engineering],[Product Design & Manufacturing]
123395,11,,Body1,BRepBody,0.000000e+00,0.181651,1.007575e-11,0.129364,0.002853,Metal_Ferrous_Steel,5189.0,1902.0,2420.0,54.0,2121.693737,299.830188,0.00785,2.353667,[Mechanical Engineering],[Product Design & Manufacturing]
123396,12,,Body2,BRepBody,-6.182051e-17,-0.011000,8.108617e-12,0.016467,0.000024,Metal_Ferrous_Steel,5189.0,1902.0,2420.0,54.0,2121.693737,299.830188,0.00785,2.353667,[Mechanical Engineering],[Product Design & Manufacturing]
123397,13,,Body1,BRepBody,0.000000e+00,0.150000,1.045680e-11,0.109956,0.002356,Metal_Ferrous_Steel,5189.0,1902.0,2420.0,54.0,2121.693737,299.830188,0.00785,2.353667,[Mechanical Engineering],[Product Design & Manufacturing]


In [58]:
# Save the new dataframe as a CSV if needed
new_df.to_csv('data.csv', index=False)