In [1]:
import os
import shutil
import numpy as np
import pickle
import torch
import torch.nn
from torch_geometric.data import Dataset
from torch_geometric.data import Data

from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [2]:
from collections import defaultdict

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
from pyg_dataset import NetlistDataset

In [5]:
dataset = NetlistDataset(data_dir="cross_design_data", load_pe = True, pl = True, processed = False)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.56it/s]


In [142]:
for data in dataset:
    num_instances = data.node_congestion.shape[0]
    data.num_instances = num_instances
    
    data.edge_index_source_to_net[1] = data.edge_index_source_to_net[1] - num_instances
    data.edge_index_sink_to_net[1] = data.edge_index_sink_to_net[1] - num_instances
    
    edge_index_node_to_net = torch.cat([data.edge_index_source_to_net, data.edge_index_sink_to_net], dim=1)
    #data.edge_index_net_to_node = torch.flip(edge_index_node_to_net, [0])
    
    data.edge_index_source_sink = None
    data.edge_index_sink_source = None

In [47]:
design_indices_dict = defaultdict(list)
for idx in range(len(dataset)):
    data = dataset[idx]
    design_indices_dict[data['design_index']].append(idx)

In [51]:
#start building split index
all_train_indices = []
all_valid_indices = []
all_test_indices = []

for design, design_indices in design_indices_dict.items():
    train_indices, test_indices = train_test_split(design_indices, test_size=0.2, random_state=1)
    test_indices, valid_indices = train_test_split(test_indices, test_size=0.5, random_state=1)
    all_train_indices.append(train_indices)
    all_valid_indices.append(valid_indices)
    all_test_indices.append(test_indices)

In [52]:
all_train_indices = np.concatenate(all_train_indices)
all_valid_indices = np.concatenate(all_valid_indices)
all_test_indices = np.concatenate(all_test_indices)

In [54]:
pickle.dump((all_train_indices, all_valid_indices, all_test_indices), open("cross_design_data_split.pt", "wb"))

In [57]:
np.random.shuffle(all_train_indices)

In [66]:
for design_fp in tqdm(os.listdir("cross_design_data/")):
    new_directory_path = f'processed_datasets/{design_fp}'
    os.makedirs(new_directory_path, exist_ok=True)

    source_file_path = f'cross_design_data/{design_fp}/pyg_data.pkl'  
    destination_file_path = os.path.join(new_directory_path, os.path.basename(source_file_path))

    shutil.move(source_file_path, destination_file_path)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 150/150 [00:00<00:00, 4591.06it/s]


In [6]:
data = dataset[0]

In [7]:
data

Data(node_features=[708535, 19], net_features=[939231, 2], edge_index_source_sink=[2, 2523118], edge_index_sink_to_net=[2, 2523118], edge_index_source_to_net=[2, 939231], node_congestion=[708535], net_hpwl=[939231], batch=[708535], num_vn=10093, design_index=117)

In [135]:
from torch_geometric.data import HeteroData

In [136]:
h_data_lst = []
for data in dataset:
    
    h_data = HeteroData()
    h_data['node'].x = data.node_features
    h_data['node'].y = data.node_congestion
    
    h_data['net'].x = data.net_features
    h_data['net'].y = data.net_hpwl
    
    h_data['node', 'as_a_sink_of', 'net'].edge_index = data.edge_index_sink_to_net
    h_data['node', 'as_a_source_of', 'net'].edge_index = data.edge_index_source_to_net
    h_data['net', 'connected_to', 'node'].edge_index = data.edge_index_net_to_node

    h_data.batch = data.batch
    h_data.num_vn = data.num_vn
    h_data_lst.append(h_data)

In [37]:
torch.save(h_data, "h_data.pt")

In [92]:
from torch_geometric.loader import NeighborLoader

In [137]:
l_data = h_data

In [None]:
loader = NeighborLoader(
                l_data,
                num_neighbors={key: [10] * 4 for key in h_data.edge_types},
                input_nodes=('net', mask),
                batch_size=6400 
            )

In [139]:
for data in loader:
    break

In [140]:
data

HeteroData(
  batch=[708535],
  num_vn=10093,
  node={
    x=[0, 19],
    y=[0],
    n_id=[0],
  },
  net={
    x=[1280, 2],
    y=[1280],
    n_id=[1280],
    input_id=[1280],
    batch_size=1280,
  },
  (node, as_a_sink_of, net)={
    edge_index=[2, 0],
    e_id=[0],
  },
  (node, as_a_source_of, net)={
    edge_index=[2, 0],
    e_id=[0],
  },
  (net, connected_to, node)={
    edge_index=[2, 0],
    e_id=[0],
  }
)

In [125]:
data.batch = data.batch[data['node'].n_id]

In [42]:
from torch_geometric.datasets import OGB_MAG
from torch_geometric.loader import NeighborLoader

hetero_data = OGB_MAG("test")[0]

loader = NeighborLoader(
    hetero_data,
    num_neighbors={key: [30] * 2 for key in hetero_data.edge_types},
    batch_size=128,
    input_nodes=('paper', hetero_data['paper'].train_mask),
)

In [48]:
hetero_data

HeteroData(
  paper={
    x=[736389, 128],
    year=[736389],
    y=[736389],
    train_mask=[736389],
    val_mask=[736389],
    test_mask=[736389],
  },
  author={ num_nodes=1134649 },
  institution={ num_nodes=8740 },
  field_of_study={ num_nodes=59965 },
  (author, affiliated_with, institution)={ edge_index=[2, 1043998] },
  (author, writes, paper)={ edge_index=[2, 7145660] },
  (paper, cites, paper)={ edge_index=[2, 5416271] },
  (paper, has_topic, field_of_study)={ edge_index=[2, 7505078] }
)

In [103]:
mask = torch.tensor([True for idx in range(len(h_data['net'].x))])