In [1]:
import os
import json
import pandas as pd
import plotly.express as px
from tqdm import tqdm
import networkx as nx
import plotly.graph_objects as go
import numpy as np
import torch_geometric.transforms as T
from torch_geometric.data import Data, DataLoader
from sklearn.manifold import TSNE

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
from torch_geometric.utils.convert import from_networkx
from torch_geometric.nn import GCNConv
from torch.nn import Linear
import torch.nn.functional as F
import matplotlib.pyplot as plt

# Utility

In [None]:
def one_hot(df):
    value = df.to_numpy().flatten()
    if (value[0] == 'Plastic'):
        return torch.tensor([1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
    elif(value[0] == 'Metal_Ferrous_Steel'):
        return torch.tensor([0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0])
    elif(value[0] == 'Metal_Non-Ferrous'):
        return torch.tensor([0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0])
    elif(value[0] == 'Metal_Aluminum'):
        return torch.tensor([0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0])
    elif(value[0] == 'Metal_Ferrous'):
        return torch.tensor([0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0])
    elif(value[0] == 'Wood'):
        return torch.tensor([0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0])
    else:
        return torch.tensor([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) # Other
    
def one_num(df):
    value = df.to_numpy().flatten()
    if (value[0] == 'Plastic'):
        return torch.tensor(0, dtype=torch.int64)
    elif(value[0] == 'Metal_Ferrous_Steel'):
        return torch.tensor(1, dtype=torch.int64)
    elif(value[0] == 'Metal_Non-Ferrous'):
        return torch.tensor(2, dtype=torch.int64)
    elif(value[0] == 'Metal_Aluminum'):
        return torch.tensor(3, dtype=torch.int64)
    elif(value[0] == 'Metal_Ferrous'):
        return torch.tensor(4, dtype=torch.int64)
    elif(value[0] == 'Wood'):
        return torch.tensor(5, dtype=torch.int64)
    else:
        return torch.tensor(6, dtype=torch.int64) # Other

In [None]:
def decompose_dict_body(json_data, key):
    """
    Decomposes the body type for Autodesk dataset
    """
    if (len(json_data[str(key)]) != 0):
        part_ids = list(json_data[key].keys())
        _data = list(json_data[str(key)].values())
        df = pd.DataFrame.from_dict(_data)
        df = df.drop(columns=['png', 'smt', 'step', 'obj'])
        x = True
        data_frame = df
        combined_df = df
        keys_to_delete = [];
        while(x): # Dangerous, also O(n^2) solution
            x = False
            key_list = data_frame.keys();
            for key in key_list:
                if (type(data_frame[key].iloc[0]) == type({})):
                    keys_to_delete.append(key)
                    data_frame = pd.DataFrame.from_dict(data_frame[key].to_dict(), orient="index")
                    combined_df = pd.concat([combined_df, data_frame], axis=1) # Concat the pd to master
                    x = True
                    break
        combined_df = combined_df.drop(columns=keys_to_delete)    
        combined_df["body_id"] = part_ids
        combined_df = combined_df.rename(columns={
            "type": "center_of_mass_point_type", 
            "x": "center_of_mass_x", 
            "y": "center_of_mass_y",
            "z": "center_of_mass_z",
        })
        return combined_df
    
def generate_graph(json_data, file, df):
    """
    Creates Graph: G with networkX
    Assumes bodies exist with G
    """
    if ('contacts' not in json_data):
        return file
    elif (json_data["contacts"] is None):
        return file
    
    df = df.drop(columns=['center_of_mass_point_type', 'name'])
    
    G = nx.Graph()
    contact_set = set()
    nodes = []
    
    for key in list(json_data["bodies"].keys()):
        nodes.append(
            (key, {'y': one_num(df.loc[df['body_id'] == key].drop(columns=[
                            'area', 
                            'volume', 
                            'body_id',
                            'center_of_mass_x',
                            'center_of_mass_y',
                            'center_of_mass_z'
                        ])),
                   'x': torch.tensor(df.loc[df['body_id'] == key].drop(columns=[
                       'material_category', 
                       'body_id'
                   ]).to_numpy().flatten(), dtype=torch.float32)
                  })
        )
    
    
    for contact in json_data["contacts"]:
        # We will always assume two entities for each contact point
        contact_set.add((contact['entity_one']['body'], contact['entity_two']['body']))
    contacts_list = list(contact_set)
    
    G.add_nodes_from(nodes)
    G.add_edges_from(contacts_list)
    
    return G

def flatten(l):
    return [item for sublist in l for item in sublist]

def unpack_dataset(root_folder = None, exclusion_file_list = {}):
    """
    BFS Implementation of unpacking file directories into a dataset given a root folder.
    ** Assumes first surface level unpacking. Assumes only one Assembly.json
    """
    if (not root_folder):
        print("Invalid root_folder path is None Type.")
        return pd.DataFrame()
    
    num = -1 # Excluding Root folder
    assembly_cnt = 0
    master_df = pd.DataFrame();
    queue = []
    
    try: 
        file_count = len(os.listdir(root_folder))
    except:
        print("Invalid root_folder Path:", root_folder)
        return pd.Dataframe()
    
    # Some extra data collection
    graphs = []
    graph_order = []
    keys = [] 
    no_contact_files = []
    
    visited_folders = []
    
    queue.append(root_folder)
    visited_folders.append(root_folder)
    
    while (len(queue) != 0):
        _curr = queue.pop(0)
        if (os.path.isdir(_curr)):
            os.chdir(_curr);
            num += 1;
            print("Files Processed:", str(int(num / file_count * 100))+"% |", str(num) +  "/" + str(file_count), end="\r")
            neighbors = sorted(os.listdir());
            for n in neighbors:
                if (n not in exclusion_file_list):
                    path = _curr + "/" + n;
                    if (path not in visited_folders and os.path.isdir(n)):
                        visited_folders.append(path);
                        queue.append(path);
                    else:
                        if ("assembly.json" in path):
                            visited_folders.append(path);
                            queue.append(path);
        else:
            assembly_cnt += 1
            print("Assembly.JSON Processed:", str(int(assembly_cnt / file_count * 100))+"% |", str(assembly_cnt) +  "/" + str(file_count), end="\r")
            
            try:
                # Read Bodies Key
                file = open(_curr)
                json_data = json.load(file)
                keys.append(list(json_data.keys()))
                df = decompose_dict_body(json_data, "bodies")
                master_df = pd.concat([master_df, df], ignore_index=True)
                
                # Append Graphs 
                graph = generate_graph(json_data, _curr, df)
                if (graph != _curr):
                    graph_order.append(_curr)
                    graphs.append(graph)
                else:
                    no_contact_files.append(graph)
            except Exception as e:
                print("Error occured at assembly file:", _curr)
                print(e)
            
    os.chdir(root_folder) # Return the dir
    keys = np.array(flatten(keys))
    print("Unqiue values:", np.unique(keys, return_counts=True))
    
    return master_df, graphs, graph_order

hard_drive_path = "/Volumes/T7/ASME-Hackathon/train_new/Fusion360GalleryDataset_23hackathon_train"

# Annoying, but you have to give the absolute path
# df, graphs, graph_order = unpack_dataset(
#     hard_drive_path,
#     {'.DS_Store'}
# )

#df

# Create Graph from Pickle and DF

In [4]:
df_main = pd.read_pickle("./df_main_fe2.pkl")

/Users/zacharyg/Documents/GitHub/ASME_Hackathon/AUTODESK Problem


FileNotFoundError: [Errno 2] No such file or directory: './df_main_fe2.pkl'

In [None]:
def generate_graph(df):
    """
    Creates Graph: G with networkX
    Assumes bodies exist with G
    """    
    df = df.drop(columns=['center_of_mass_point_type', 'name'])
    
    G = nx.Graph()
    contact_set = set()
    nodes = []
    
    for key in list(json_data["bodies"].keys()):
        nodes.append(
            (key, {'y': one_num(df.loc[df['body_id'] == key].drop(columns=[
                            'area', 
                            'volume', 
                            'body_id',
                            'center_of_mass_x',
                            'center_of_mass_y',
                            'center_of_mass_z'
                        ])),
                   'x': torch.tensor(df.loc[df['body_id'] == key].drop(columns=[
                       'material_category', 
                       'body_id'
                   ]).to_numpy().flatten(), dtype=torch.float32)
                  })
        )
    
    
    for contact in json_data["contacts"]:
        # We will always assume two entities for each contact point
        contact_set.add((contact['entity_one']['body'], contact['entity_two']['body']))
    contacts_list = list(contact_set)
    
    G.add_nodes_from(nodes)
    G.add_edges_from(contacts_list)
    
    return G




# Graph Visualization | Visualization

In [None]:
def visualizeGraph(G):
    pos = nx.spring_layout(G)

    node_x = [pos[node][0] for node in G.nodes()]
    node_y = [pos[node][1] for node in G.nodes()]

    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)

    # Create a Scatter trace for nodes
    node_trace = go.Scatter(
        x=node_x,
        y=node_y,
        text=list(G.nodes()),  # Use node labels as text
        textposition='top center',  # Position the text above the nodes
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=False,
            colorscale='Viridis',
            size=10,
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            )
        )
    )

    # Create a Scatter trace for Edges
    edge_trace = go.Scatter(
        x=edge_x,
        y=edge_y,
        line=dict(width=0.5, color='#888'),
        hoverinfo='none',
        mode='lines'
    )


    fig = go.Figure(data=[edge_trace, node_trace])

    fig.show(renderer="browser")

In [None]:
def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())

    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()

In [None]:
len(graphs)

# One Graph Visualization

In [None]:
Master_Graph = nx.Graph()

for graph_idx in range(4000):
    Master_Graph = nx.compose(Master_Graph, graphs[graph_idx])

# Remove Isolated Nodes
isolated_nodes = [node for node, degree in dict(Master_Graph.degree()).items() if degree == 0]
Master_Graph.remove_nodes_from(isolated_nodes)

In [None]:
visualizeGraph(Master_Graph)

# Pytorch Geometric

### Conversion[A]

In [None]:
pyg_graph = from_networkx(Master_Graph)

# Sanity check
# print(pyg_graph)
print(pyg_graph.x)
print(pyg_graph.y)
# print(pyg_graph.edge_index)
# print(pyg_graph.num_nodes)

### Conversion [B]

In [None]:
# Convert NetworkX graph and features to PyTorch Geometric data format
# edges = np.array(Master_Graph.edges()).T
# edge_index = torch.tensor(edges, dtype=torch.long)
# print("Edges: \n\n", edges)
# x = torch.tensor(node_features, dtype=torch.float)
# y = torch.tensor([node['club'] for node in graph.nodes.values()], dtype=torch.long)

### Split the data

In [None]:
# Split the data 
train_ratio = 0.7
num_nodes = pyg_graph.x.shape[0]
num_train = int(num_nodes * train_ratio)
idx = [i for i in range(num_nodes)]

np.random.shuffle(idx)
train_mask = torch.full_like(pyg_graph.y, False, dtype=bool)
train_mask[idx[:num_train]] = True
test_mask = torch.full_like(pyg_graph.y, False, dtype=bool)
test_mask[idx[num_train:]] = True

### Load into Data Object

In [None]:
data = Data(
    x=pyg_graph.x.double(), 
    edge_index=pyg_graph.edge_index, 
    y=pyg_graph.y,
    num_classes=7,
    train_mask=train_mask,
    test_mask=test_mask
)

### GCN Model

In [None]:
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        torch.manual_seed(1234567)
        self.conv1 = GCNConv(data.num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, data.num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GCN(hidden_channels=16)
model.double()
print(model)

In [None]:
out = model(data.x, data.edge_index)
visualize(out, color=data.y)

# Train GNN

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    optimizer.zero_grad()  # Clear gradients.
    out = model(data.x, data.edge_index)  # Perform a single forward pass.
    loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
    loss.backward()  # Derive gradients.
    optimizer.step()  # Update parameters based on gradients.
    
    return loss

def test():
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)  # Use the class with highest probability.
    test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
    test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      
    return test_acc


for epoch in range(1, 1001):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

### Test Accuracy

In [None]:
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')