#### Initailization

In [1]:
from feature_extract_func import *

In [2]:
train_design = ["RISCY-a", "RISCY-b", "RISCY-FPU-a", "RISCY-FPU-b"]
test_design_a = ["zero-riscy-a"]
test_design_b = ["zero-riscy-b"]

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
def get_drc_violations(image, threshold=0.1):
    """
    Get DRC violations from the image based on a threshold.
    """
    violations = np.where(image > threshold, 1, 0).sum()
    return violations

#### Preprocessing

In [5]:
import numpy as np
from PIL import Image
import requests, base64
import json
import argparse
from io import BytesIO
import cv2
import heapq
import re



file_path = '/data2/NVIDIA/CircuitNet-N28/Dataset/DRC/feature/zero-riscy-a/7228-zero-riscy-a-1-c2-u0.9-m2-p4-f0.npy'
label_path = '/data2/NVIDIA/CircuitNet-N28/Dataset/DRC/label/zero-riscy-a/7228-zero-riscy-a-1-c2-u0.9-m2-p4-f0.npy'
numpy_image = np.load(file_path)
label_image = np.load(label_path).squeeze()
batch_image = numpy_image.transpose(2,0,1)
image_features = []
image_inferences = []

for i, image in enumerate(batch_image):
    image_features.append(image)
    image_inferences.append(Image.fromarray(np.uint8(image * 255)))
    
get_drc_violations(label_image)

np.int64(11095)

In [6]:
def get_all_features(images):
    final_features = {}
    for feat_func in feat_func_list:
        feat = feat_func(images)
        final_features.update(feat)
        
    return final_features

#### Fitness Scoring

In [7]:
get_all_features(image_features)

{'max_congestion_ripple': 90.0,
 'macro_interference_zone': 225078.75,
 'macro_compactness_index': 0.010917102899855733,
 'cell_density_variance_gradient': np.float64(0.002547227626207137),
 'mean_macro_proximity': np.float64(0.3278047219932913),
 'congestion_gradient': np.float64(0.38317922651810027),
 'cell_density_anisotropy': np.float64(12.659571833871114),
 'mean_eGR_local_variability': np.float64(2.0692643376092803e-05),
 'diagonal_cell_density_gradient': np.float64(0.055367297852235625),
 'mean_cell_density_fluctuation': np.float64(0.40313851594176975),
 'macro_transition_band': np.float64(842.7158679246136),
 'cell_density_skewness': np.float64(1.5351419454983046),
 'cell_density_skewness_gradient': np.float64(10.836382926838187),
 'macro_interaction_perimeter': 1730.2712259292603,
 'cell_density_fluctuation_balance': np.float64(25.466824632629187),
 'congestion_pressure_fluctuation': np.float64(28.412934256099604),
 'congestion_variability_throughout_hierarchy': np.float64(1.5

In [8]:
import pandas as pd
from tqdm import tqdm
import os
import torch
import multiprocessing


def single_extractor(design):
    feature_path = f"/data2/NVIDIA/CircuitNet-N28/Dataset/DRC/feature/{design}/" 
    label_path = f"/data2/NVIDIA/CircuitNet-N28/Dataset/DRC/label/{design}/"

    labels = []
    ids = []

    for filename in tqdm(os.listdir(label_path)):
            file_path = os.path.join(label_path, filename)
            label_image = np.load(file_path).squeeze()
            label = get_drc_violations(label_image)
            ids.append(filename)
            labels.append(label)
            
    df = pd.DataFrame({"id": ids,})

    for filename in tqdm(os.listdir(feature_path)):
            file_path = os.path.join(feature_path, filename)
            numpy_image = np.load(file_path)
            batch_image = numpy_image.transpose(2,0,1)
            image_features = []
            for i, image in enumerate(batch_image):
                image_features.append(image)
            
            index = (df["id"] == filename)
            
            all_features = get_all_features(image_features)
            for key, value in all_features.items():
                df.loc[index, key] = value
        
    df['label'] = labels
    return df


def dataset_setting(designs):
    pool = multiprocessing.Pool()
    df_list = pool.map(single_extractor, designs)
    return pd.concat(df_list)

In [9]:
train_df = dataset_setting(train_design)
test_df_a = dataset_setting(test_design_a)
test_df_b = dataset_setting(test_design_b)

100%|██████████| 1858/1858 [00:40<00:00, 45.41it/s] 
100%|██████████| 1248/1248 [00:45<00:00, 27.17it/s]
100%|██████████| 1969/1969 [00:47<00:00, 41.13it/s]
100%|██████████| 2003/2003 [00:56<00:00, 35.26it/s]
100%|██████████| 1248/1248 [02:19<00:00,  8.95it/s]
100%|██████████| 1858/1858 [04:56<00:00,  6.26it/s]
100%|██████████| 1969/1969 [05:01<00:00,  6.53it/s]
100%|██████████| 2003/2003 [05:53<00:00,  5.67it/s]
100%|██████████| 2042/2042 [00:19<00:00, 102.17it/s]
100%|██████████| 2042/2042 [05:28<00:00,  6.22it/s]
100%|██████████| 1122/1122 [00:27<00:00, 40.94it/s] 
100%|██████████| 1122/1122 [02:16<00:00,  8.21it/s]


In [10]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
train_df = train_df[["id"] + list(feat_pool.keys()) + ["label"]]
test_df_a = test_df_a[["id"] + list(feat_pool.keys()) + ["label"]]
test_df_b = test_df_b[["id"] + list(feat_pool.keys()) + ["label"]]
train_df[list(feat_pool.keys())] = scaler.fit_transform(train_df[list(feat_pool.keys())])
test_df_a[list(feat_pool.keys())] = scaler.fit_transform(test_df_a[list(feat_pool.keys())]) 
test_df_b[list(feat_pool.keys())] = scaler.fit_transform(test_df_b[list(feat_pool.keys())])

In [11]:
train_df.reset_index(drop=True, inplace=True)
test_df_a.reset_index(drop=True, inplace=True)
test_df_b.reset_index(drop=True, inplace=True)

In [12]:
train_df.to_csv("/home/felixchaotw/mllm-physical-design/DRV/dataset/train_df.csv", index=False)
test_df_a.to_csv("/home/felixchaotw/mllm-physical-design/DRV/dataset/test_df_a.csv", index=False)
test_df_b.to_csv("/home/felixchaotw/mllm-physical-design/DRV/dataset/test_df_b.csv", index=False)

In [13]:
train_df = pd.read_csv("/home/felixchaotw/mllm-physical-design/DRV/dataset/train_df.csv")
test_df_a = pd.read_csv("/home/felixchaotw/mllm-physical-design/DRV/dataset/test_df_a.csv")
test_df_b = pd.read_csv("/home/felixchaotw/mllm-physical-design/DRV/dataset/test_df_b.csv")

In [14]:
train_df = train_df[train_df['label'].notna()]
test_df_a = test_df_a[test_df_a['label'].notna()]
test_df_b = test_df_b[test_df_b['label'].notna()]

In [15]:
train_df.reset_index(drop=True, inplace=True)
test_df_a.reset_index(drop=True, inplace=True)
test_df_b.reset_index(drop=True, inplace=True)

In [16]:
train_df

Unnamed: 0,id,max_congestion_ripple,congestion_gradient,mean_macro_proximity,diagonal_cell_density_gradient,mean_cell_density_fluctuation,congestion_transition_amplitude,cell_density_variance_gradient,congestion_variability_throughout_hierarchy,cell_density_skewness,...,macro_interaction_perimeter,macro_interference_zone,cell_density_dipole,macro_compactness_index,cell_density_anisotropy,congestion_pressure_fluctuation,mean_eGR_local_adjacent_cohesion,mean_eGR_local_variability,cell_density_fluctuation_balance,label
0,766-RISCY-a-2-c2-u0.7-m4-p8-f0.npy,0.09,0.500747,0.554939,0.693408,0.699878,0.396056,0.429032,0.026267,0.077399,...,0.121494,0.269221,0.710987,0.113466,0.037357,0.212004,0.012833,0.000196,2.622517e-12,0
1,872-RISCY-a-2-c2-u0.75-m2-p1-f1.npy,0.06,0.386463,0.625622,0.718997,0.723030,0.316797,0.353144,0.038012,0.066449,...,0.316806,0.219616,0.636559,0.503112,0.036032,0.149910,0.005340,0.000101,4.190182e-12,0
2,1710-RISCY-a-3-c5-u0.7-m1-p2-f1.npy,0.05,0.545110,0.628466,0.644529,0.621444,0.433735,0.151127,0.023385,0.063249,...,0.403505,0.271178,0.782676,0.606688,0.042323,0.225370,0.011573,0.000192,3.759506e-12,0
3,1452-RISCY-a-3-c2-u0.8-m3-p5-f0.npy,0.37,0.298792,0.512746,0.550197,0.587003,0.264949,0.456422,0.025996,0.157494,...,0.434012,0.472075,0.681935,0.355195,0.067206,0.275394,0.077750,0.004236,1.425771e-10,3987
4,216-RISCY-a-1-c2-u0.85-m3-p1-f1.npy,0.15,0.694171,0.305279,0.517270,0.552522,0.608646,0.220766,0.055464,0.101628,...,0.223905,0.166985,0.941199,0.381088,0.068761,0.372731,0.043432,0.001401,5.371920e-12,1447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7073,6063-RISCY-FPU-b-1-c5-u0.9-m2-p5-f0.npy,0.10,0.146695,0.488279,0.587810,0.668402,0.133919,0.596737,0.010120,0.219328,...,0.586643,0.706980,0.690581,0.609446,0.057316,0.086449,0.205930,0.005972,9.618711e-11,1819
7074,6882-RISCY-FPU-b-3-c5-u0.85-m2-p4-f0.npy,0.20,0.195378,0.476659,0.501910,0.544452,0.162884,0.341128,0.014344,0.215717,...,0.687445,0.633623,0.870100,0.607488,0.085759,0.157986,0.324823,0.024070,5.077866e-12,90
7075,5971-RISCY-FPU-b-1-c5-u0.7-m2-p1-f0.npy,0.21,0.088070,0.523973,0.633972,0.655260,0.070733,0.246751,0.007793,0.148955,...,0.547125,0.542486,0.782354,0.795739,0.036723,0.084483,0.188660,0.011862,2.558291e-12,8
7076,6435-RISCY-FPU-b-2-c5-u0.75-m2-p6-f0.npy,0.08,0.041184,0.468086,0.494656,0.504945,0.037131,0.252933,0.006357,0.146433,...,0.644006,0.549055,0.928096,0.780294,0.073309,0.051748,0.103379,0.003637,3.561536e-12,43


In [17]:
def id_to_design(name):
    for d in train_design:
        if d in name:
            return d
    return None

In [18]:
train_df["design"] = train_df["id"].apply(id_to_design)

In [21]:
preference_df_list = []
num_pairs = 10000

for design, group in train_df.groupby("design"):
    preference_df = pd.DataFrame(columns=["design", "chosen", "rejected", "chosen_score", "rejected_score"])
    group = group.reset_index(drop=True)
    num_samples = len(group)
    records = {}
    pbar = tqdm(total=num_pairs, desc=f"Processing design: {design}")
    while(len(preference_df) < num_pairs):
            i, j = np.random.choice(num_samples, 2, replace=False)
            if i == j or records.get(i, -1) == j or records.get(j, -1) == i:
                continue
            sample_a = group.iloc[i]
            sample_b = group.iloc[j]
            if sample_a["label"] > sample_b["label"]:
                chosen = sample_a["id"]
                rejected = sample_b["id"]
                chosen_score = sample_a["label"]
                rejected_score = sample_b["label"]
            elif sample_a["label"] == sample_b["label"]:
                continue
            else:
                chosen = sample_b["id"]
                rejected = sample_a["id"]
                chosen_score = sample_b["label"]
                rejected_score = sample_a["label"]
            
            records[i] = j
            preference_df = preference_df._append({"design": design, "chosen": chosen, "rejected": rejected, "chosen_score": chosen_score, "rejected_score": rejected_score}, ignore_index=True)
            pbar.update(1)
            
    pbar.close()
            
    preference_df = preference_df.sample(frac=1).reset_index(drop=True)
    preference_df = preference_df.sample(n=num_pairs)
    preference_df_list.append(preference_df)
            

Processing design: RISCY-FPU-b: 2670it [00:40, 66.71it/s] 
Processing design: RISCY-FPU-a: 100%|██████████| 10000/10000 [00:13<00:00, 759.95it/s]
Processing design: RISCY-FPU-b: 100%|██████████| 10000/10000 [00:11<00:00, 859.32it/s]
Processing design: RISCY-a: 100%|██████████| 10000/10000 [00:12<00:00, 771.49it/s]
Processing design: RISCY-b: 100%|██████████| 10000/10000 [00:11<00:00, 864.10it/s]


In [22]:
preference_df = pd.concat(preference_df_list)
preference_df.reset_index(drop=True, inplace=True)
preference_df.to_csv("/home/felixchaotw/mllm-physical-design/DRV/dataset/preference_df.csv", index=False)