In [1]:
import gzip
import json
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
import torch
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from sklearn.preprocessing import OneHotEncoder
import networkx as nx
from sklearn.cluster import KMeans
import math
from torch.profiler import profile, record_function, ProfilerActivity

In [None]:
with gzip.open('../data/xbar/1/xbar.json.gz','rb') as f:
    design = json.loads(f.read().decode('utf-8'))
    
instances = pd.DataFrame(design['instances'])
nets = pd.DataFrame(design['nets'])

conn=np.load('../data/xbar/1/xbar_connectivity.npz')
A = coo_matrix((conn['data'], (conn['row'], conn['col'])), shape=conn['shape'])
A = A.__mul__(A.T)

def buildBST(array,start=0,finish=-1):
    if finish<0:
        finish = len(array)
    mid = (start + finish) // 2
    if mid-start==1:
        ltl=start
    else:
        ltl=buildBST(array,start,mid)
    
    if finish-mid==1:
        gtl=mid
    else:
        gtl=buildBST(array,mid,finish)
        
    return((array[mid],ltl,gtl))

congestion_data = np.load('../data/xbar/1/xbar_congestion.npz')
xbst=buildBST(congestion_data['xBoundaryList'])
ybst=buildBST(congestion_data['yBoundaryList'])

def getGRCIndex(x, y, xbst, ybst):
    xi, yi = None, None
    while isinstance(xbst, tuple):
        if x is not None and x < xbst[0]:
            xbst = xbst[1]
        else:
            xbst = xbst[2]
        xi = xbst if not isinstance(xbst, tuple) else xi
    
    while isinstance(ybst, tuple):
        if y is not None and y < ybst[0]:
            ybst = ybst[1]
        else:
            ybst = ybst[2]
        yi = ybst if not isinstance(ybst, tuple) else yi
    
    return xi, yi

demand = np.zeros(len(instances))
capacity = np.zeros(len(instances))
demand_variance = np.zeros(len(instances))
neighbor_demand = np.zeros(len(instances))

layer_indices = {l: idx for idx, l in enumerate(congestion_data['layerList'])}
iloc_jloc = instances.apply(lambda row: getGRCIndex(row['xloc'], row['yloc'], xbst, ybst), axis=1)
iloc, jloc = zip(*iloc_jloc)
for l in congestion_data['layerList']:
    lyr = layer_indices[l]
    layer_demand = congestion_data['demand'][lyr]
    layer_capacity = congestion_data['capacity'][lyr]  # Assuming similar structure for capacity
    layer_shape = layer_demand.shape

    for k in range(len(instances)):
        i, j = iloc[k], jloc[k]

        demand_val = 0
        capacity_val = 0  # Initialize capacity value for each instance
        neighbor_vals = np.array([])

        if 0 <= i < layer_shape[0] and 0 <= j < layer_shape[1]:
            demand_val = layer_demand[i, j]
            capacity_val = layer_capacity[i, j]  # Calculate capacity value

            i_min, i_max = max(0, i-1), min(i+2, layer_shape[0])
            j_min, j_max = max(0, j-1), min(j+2, layer_shape[1])
            neighbor_vals = layer_demand[i_min:i_max, j_min:j_max].flatten()

        demand[k] += demand_val
        capacity[k] += capacity_val  # Accumulate capacity
        demand_variance[k] = np.var(neighbor_vals) if neighbor_vals.size else 0
        neighbor_demand[k] = np.mean(neighbor_vals) if neighbor_vals.size else 0



def find_optimal_clusters(data, max_k):
    inertia = []
    for k in range(1, max_k + 1):
        kmeans = KMeans(n_clusters=k, random_state=42).fit(data)
        inertia.append(kmeans.inertia_)
    elbow_point = np.argmax(np.diff(inertia)) + 1
    return elbow_point
max_k = 10
optimal_k = find_optimal_clusters(instances[['xloc', 'yloc']], max_k)
kmeans = KMeans(n_clusters=optimal_k, random_state=42).fit(instances[['xloc', 'yloc']])
centroids = kmeans.cluster_centers_
x_grid_size = (centroids[:, 0].max() - centroids[:, 0].min()) / optimal_k
y_grid_size = (centroids[:, 1].max() - centroids[:, 1].min()) / optimal_k
instances['x_grid'] = instances['xloc'] // x_grid_size
instances['y_grid'] = instances['yloc'] // y_grid_size
spatial_features = instances.groupby(['x_grid', 'y_grid']).size().reset_index(name='grid_density')
instances = instances.merge(spatial_features, how='left', on=['x_grid', 'y_grid'])

encoder = OneHotEncoder()
instances_encoded = pd.DataFrame(encoder.fit_transform(instances[['cell', 'orient']]).toarray())
instances = instances.join(instances_encoded)

G = nx.Graph(A)
betweenness = nx.betweenness_centrality(G)
clustering_coeff = nx.clustering(G)
pagerank = nx.pagerank(G)
eigenvector_centrality = nx.eigenvector_centrality_numpy(G)
    
degree = np.array(A.sum(axis=1)).flatten()

instances['betweenness'] = instances.index.map(betweenness)
instances['clustering_coeff'] = instances.index.map(clustering_coeff)
instances['pagerank'] = instances.index.map(pagerank)
instances['eigenvector_centrality'] = instances.index.map(eigenvector_centrality)
instances['degree'] = degree
instances['demand'] = demand
instances['capacity'] = capacity
instances['demand_variance'] = demand_variance
instances['neighbor_demand'] = neighbor_demand
instances['overflow'] = instances['demand'] - instances['capacity']

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


In [4]:
print(instances.columns)

Index([                  'name',                     'id',
                         'xloc',                   'yloc',
                         'cell',                 'orient',
                       'x_grid',                 'y_grid',
                 'grid_density',                        0,
                              1,                        2,
                              3,                        4,
                              5,                        6,
                              7,                        8,
                              9,                       10,
                             11,                       12,
                             13,                       14,
                             15,                       16,
                  'betweenness',       'clustering_coeff',
                     'pagerank', 'eigenvector_centrality',
                       'degree',                 'demand',
                     'capacity',        'demand_variance

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
one_hot_feature_columns = list(range(17))  # Assuming these are the indices of your one-hot encoded features
original_feature_columns = [
    'betweenness', 'clustering_coeff', 'pagerank', 'eigenvector_centrality',
    'degree', 'demand', 'demand_variance', 'neighbor_demand'
]
feature_columns = original_feature_columns + one_hot_feature_columns
label_column = 'overflow'
features = torch.tensor(instances[feature_columns].values, dtype=torch.float).to(device)
labels = torch.tensor(instances[label_column].values, dtype=torch.float).unsqueeze(1).to(device)
A_coo = A.tocoo()
edge_index = torch.tensor([A_coo.row, A_coo.col], dtype=torch.long).to(device)
indices = np.arange(len(instances))
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=23)
train_mask = torch.zeros(len(instances), dtype=torch.bool)
test_mask = torch.zeros(len(instances), dtype=torch.bool)
train_mask[train_indices] = True
test_mask[test_indices] = True
data = Data(x=features, edge_index=edge_index, y=labels, train_mask=train_mask, test_mask=test_mask)

  edge_index = torch.tensor([A_coo.row, A_coo.col], dtype=torch.long).to(device)


In [None]:
class GCN(torch.nn.Module):
    def __init__(self, num_features, hidden_channels, num_layers):
        super(GCN, self).__init__()
        self.layers = torch.nn.ModuleList()
        self.layers.append(GCNConv(num_features, hidden_channels))
        for _ in range(num_layers - 2):
            self.layers.append(GCNConv(hidden_channels, hidden_channels))
        self.layers.append(GCNConv(hidden_channels, 1))

    def forward(self, x, edge_index):
        for i, layer in enumerate(self.layers):
            x = layer(x, edge_index)
            if i < len(self.layers) - 1:
                x = F.relu(x)
                x = F.dropout(x, training=self.training)
        return x

In [None]:
model = GCN(num_features=len(feature_columns), hidden_channels=16, num_layers=3).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], profile_memory=True, record_shapes=True) as prof:
    with record_function("model_training"):
        model.train()
        for epoch in range(15000):
            optimizer.zero_grad()
            out = model(data.x, data.edge_index)[data.train_mask]
            loss = F.mse_loss(out, data.y[data.train_mask])
            loss.backward()
            optimizer.step()
            if epoch % 100 == 0:
                print(f'Epoch {epoch}: Training Loss = {loss.item()}')
            if epoch == 14999:
                train_pred_mean_congestion = out.mean().item()
                train_true_mean_congestion = data.y[data.train_mask].mean().item()
                print(f'Last Epoch {epoch}: Training True Mean Congestion = {train_true_mean_congestion}, Training Predicted Mean Congestion = {train_pred_mean_congestion}')
                print(abs(train_true_mean_congestion - train_pred_mean_congestion))
print(prof.key_averages().table(sort_by="cuda_memory_usage", row_limit=10))

In [None]:
model.eval()
with torch.no_grad():
    out = model(data.x, data.edge_index)[data.test_mask]
    test_loss = F.mse_loss(out, data.y[data.test_mask]).item()
    test_pred_mean_congestion = out.mean().item()
    test_true_mean_congestion = data.y[data.test_mask].mean().item()
print(f'Test Loss: {test_loss}')
print(f'Test True Mean Congestion = {test_true_mean_congestion}, Test Predicted Mean Congestion = {test_pred_mean_congestion}')
print(abs(test_true_mean_congestion - test_pred_mean_congestion))

# All xbar datasets

In [None]:
def load_and_process_xbar(xbar_path):
    def buildBST(array,start=0,finish=-1):
        if finish<0:
            finish = len(array)
        mid = (start + finish) // 2
        if mid-start==1:
            ltl=start
        else:
            ltl=buildBST(array,start,mid)
        
        if finish-mid==1:
            gtl=mid
        else:
            gtl=buildBST(array,mid,finish)
            
        return((array[mid],ltl,gtl))

    def getGRCIndex(x,y,xbst,ybst):
        while (type(xbst)==tuple):
            if x < xbst[0]:
                xbst=xbst[1]
            else:
                xbst=xbst[2]
                
        while (type(ybst)==tuple):
            if y < ybst[0]:
                ybst=ybst[1]
            else:
                ybst=ybst[2]
                
        return ybst, xbst
    
    with gzip.open(f'{xbar_path}/xbar.json.gz', 'rb') as f:
        design = json.loads(f.read().decode('utf-8'))
    instances = pd.DataFrame(design['instances'])
    conn = np.load(f'{xbar_path}/xbar_connectivity.npz')
    A = coo_matrix((conn['data'], (conn['row'], conn['col'])), shape=conn['shape'])
    A = A.__mul__(A.T).tocoo()

    congestion_data = np.load(f'{xbar_path}/xbar_congestion.npz')
    xbst=buildBST(congestion_data['xBoundaryList'])
    ybst=buildBST(congestion_data['yBoundaryList'])
    demand = np.zeros(shape = [instances.shape[0],])
    
    for k in range(instances.shape[0]):
        xloc = instances.iloc[k]['xloc']; yloc = instances.iloc[k]['yloc']
        i,j=getGRCIndex(xloc,yloc,xbst,ybst)
        d = 0 
        for l in list(congestion_data['layerList']): 
            lyr=list(congestion_data['layerList']).index(l)
            d += congestion_data['demand'][lyr][i][j]
        demand[k] = d
        
    instances['routing_demand'] = demand
    
    capacity = congestion_data['capacity']
    supply = np.zeros(shape=[instances.shape[0],])
    
    for k in range(instances.shape[0]):
        xloc = instances.iloc[k]['xloc']
        yloc = instances.iloc[k]['yloc']
        i, j = getGRCIndex(xloc, yloc, xbst, ybst)
        s = 0
        for l in list(congestion_data['layerList']):
            lyr = list(congestion_data['layerList']).index(l)
            s += capacity[lyr][i][j]
        supply[k] = s
    instances['routing_supply'] = supply
    instances['overflow'] = instances['routing_demand'] - instances['routing_supply']
    instances['features'] = instances[['routing_demand', 'routing_supply']].values.tolist()
    node_features = torch.tensor(instances['features'].values.tolist(), dtype=torch.float)
    node_labels = torch.tensor(instances['overflow'].values, dtype=torch.float).unsqueeze(1)
    edge_index = torch.tensor([A.row, A.col], dtype=torch.long)
    return node_features, edge_index, node_labels

In [None]:
class GCN(torch.nn.Module):
    def __init__(self, num_features, hidden_channels, num_layers):
        super(GCN, self).__init__()
        self.layers = torch.nn.ModuleList()
        self.layers.append(GCNConv(num_features, hidden_channels))
        for _ in range(num_layers - 2):
            self.layers.append(GCNConv(hidden_channels, hidden_channels))
        self.layers.append(GCNConv(hidden_channels, 1))

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        for i, layer in enumerate(self.layers):
            x = layer(x, edge_index)
            if i < len(self.layers) - 1:
                x = F.relu(x)
                x = F.dropout(x, training=self.training)
        return x

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
results = []
all_xbars = ['xbar/' + str(i) for i in range(1, 14)]
for test_xbar_path in all_xbars:
    print(f"Testing on {test_xbar_path}")
    test_data, test_edge_index, test_labels = load_and_process_xbar(test_xbar_path)
    test_data = Data(x=test_data, edge_index=test_edge_index, y=test_labels).to(device)
    train_data = [load_and_process_xbar(xbar_path) for xbar_path in all_xbars if xbar_path != test_xbar_path]
    train_features = torch.cat([data[0] for data in train_data], dim=0)
    train_labels = torch.cat([data[2] for data in train_data], dim=0)
    train_edge_index = torch.cat([data[1] for data in train_data], dim=1)
    train_data = Data(x=train_features, edge_index=train_edge_index, y=train_labels).to(device)
    model = GCN(num_features=2, hidden_channels=16, num_layers=3).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    model.train()
    for epoch in range(10000):
        optimizer.zero_grad()
        out = model(train_data)
        loss = F.mse_loss(out, train_data.y)
        loss.backward()
        optimizer.step()
    model.eval()
    with torch.no_grad():
        out = model(test_data)
        test_loss = F.mse_loss(out, test_data.y)
        print(f"Test Loss on {test_xbar_path}: {test_loss.item()}")
        results.append(test_loss.item())
average_loss = sum(results) / len(results)
print(f'Average Loss in Leave-One-Out Experiment: {average_loss}')