In [224]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric.nn as gnn
from torch_geometric.data import Data, Dataset, DataLoader
from torch_geometric.nn import MessagePassing, GCNConv, global_mean_pool
from torch_geometric.nn.models import GIN,GCN,GAT
from torch_geometric.utils import get_laplacian, add_self_loops, degree
from torch_geometric.data import DataLoader

In [225]:
from torch_geometric.utils import to_networkx

In [226]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import f1_score, precision_score, accuracy_score
import numpy as np

In [14]:
import matplotlib.pyplot as plt

In [8]:
def parse_off(file_path):
    with open(file_path, 'r') as f:
        # Read the header and get the number of vertices and faces
        f.readline()#skip 1st line
        header = f.readline().strip().split(' ')
        num_vertices = int(header[0])
        num_faces = int(header[1])
        
        # Read the vertices
        vertices = []
        for i in range(num_vertices):
            vertex = list(map(float, f.readline().strip().split(' ')))
            vertices.append(vertex)
        vertices = torch.tensor(vertices)
        
        # Read the faces and build the edges
        edges = []
        for i in range(num_faces):
            face = list(map(int, f.readline().strip().split(' ')[1:]))
            for j in range(len(face)):
                edge = (face[j], face[(j+1)%len(face)])
                edges.append(edge)
        edges = torch.tensor(edges, dtype=torch.long)
        
        #Pad and trim to match dimensionality
        #num_nodes = max(edges.max().item() + 1, vertices.size(0))
        #new_x = torch.zeros((num_nodes, vertices.size(1)))
        #new_x[:vertices.size(0), :] = vertices
        
        return Data(x=vertices, edge_index=edges.transpose(0,1))

# Set the root folder that contains all the subfolders of .off files
root_folder = 'PSB'

# Get the subfolder names and corresponding categorization labels
subfolder_names = [d for d in os.listdir(root_folder) if os.path.isdir(os.path.join(root_folder, d))]
# Encode Labels
enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(np.array(subfolder_names).reshape(-1, 1))
labels = enc.transform(np.array(subfolder_names).reshape(-1, 1)).toarray()
labels = torch.argmax(torch.tensor(labels),dim=1)
#labels = [int(d.split('_')[0]) for d in subfolder_names]

#Parsing all files and build a list of graph data objects
graphs = []
for subfolder_name, label in zip(subfolder_names, labels):
    subfolder_path = os.path.join(root_folder, subfolder_name)
    for filename in os.listdir(subfolder_path):
        if filename.endswith('.off'):#Check file type
            file_path = os.path.join(subfolder_path, filename)
            graph = parse_off(file_path)
            graph.y = torch.tensor([label])
            graphs.append(graph)


from torch_geometric.data import DataLoader

loader = DataLoader(graphs, batch_size= 32, drop_last=True)

In [10]:
#Filtered: 120
len(graphs)

380

In [1]:
from torch_geometric.data import InMemoryDataset, Data

class MyDataset(InMemoryDataset):
    def __init__(self, data_list):
        super().__init__()
        self.data_list = data_list
        
    @property
    def num_classes(self):
        num_classes = [data.y for data in self.data_list]
        return len(torch.unique(torch.tensor(num_classes)))
        
    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, index):
        return self.data_list[index]

  from .autonotebook import tqdm as notebook_tqdm


In [234]:
psb_set = MyDataset(graphs)
torch.save(psb_set,'psb.pt')