#### Initailization

In [1]:
from feature_extract_func import *

In [2]:
train_design = ["RISCY-a", "RISCY-b", "RISCY-FPU-a", "RISCY-FPU-b"]
test_design_a = ["zero-riscy-a"]
test_design_b = ["zero-riscy-b"]

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [17]:
def get_ir_drop(image, threshold=0.1):
    """
    Get IR drop violations from the image based on a threshold.
    """
    violations = np.where(image >= threshold, 1, 0).sum()
    return violations

In [18]:
selected_indices = [0, 1, 2, 3, 10, 17, 23]

#### Preprocessing

In [20]:
import numpy as np
from PIL import Image
import requests, base64
import json
import argparse
from io import BytesIO
import cv2
import heapq
import re



file_path = '/data2/NVIDIA/CircuitNet-N28/Dataset/IR_drop/feature/zero-riscy-a/7228-zero-riscy-a-1-c2-u0.9-m2-p4-f0.npy'
label_path = '/data2/NVIDIA/CircuitNet-N28/Dataset/IR_drop/label/zero-riscy-a/7228-zero-riscy-a-1-c2-u0.9-m2-p4-f0.npy'
numpy_image = np.load(file_path)
label_image = np.load(label_path).squeeze()
batch_image = numpy_image.transpose(2,0,1)
image_features = []
image_inferences = []

for i, image in enumerate(batch_image):
    if i in selected_indices:
        image_features.append(image)
        image_inferences.append(Image.fromarray(np.uint8(image * 255)))
    
get_ir_drop(label_image)

np.int64(20737)

In [21]:
def get_all_features(images):
    final_features = {}
    for feat_func in feat_func_list:
        feat = feat_func(images)
        final_features.update(feat)
        
    return final_features

#### Fitness Scoring

In [22]:
get_all_features(image_features)

{'horizontal_power_distribution_symmetry': np.float64(3640.707629912144),
 'mean_power_sca': np.float64(0.024479065244095266),
 'heat_intensity_correlation': np.float64(0.833917146373941),
 'central_power_saturation': np.float64(0.9600339582489555),
 'vertical_power_distribution_symmetry': np.float64(0.03819601014988627),
 'proximity_power_pattern_asymmetry': np.float64(0.03576034272477501),
 'macro_power_proximity': np.float64(0.04193359833685848),
 'mean_power_density_deviation': np.float64(0.04100932143797503),
 'edge_power_intensity': np.float64(0.20807330415630096),
 'power_sink_effect': np.float64(0.04626065653418358),
 'mean_power_all': np.float64(0.05989422188693993),
 'mean_power_i': np.float64(0.04322940174227234),
 'power_balance_ratio': np.float64(0.8995970177916303),
 'power_gradient_variation': np.float64(0.05348568985167185),
 'localized_coupling_variability': np.float64(0.10037388992415908),
 'power_intensity_anomaly_detection': np.float64(18810.0),
 'localized_gradient

In [24]:
import pandas as pd
from tqdm import tqdm
import os
import torch
import multiprocessing


def single_extractor(design):
    feature_path = f"/data2/NVIDIA/CircuitNet-N28/Dataset/IR_drop/feature/{design}/" 
    label_path = f"/data2/NVIDIA/CircuitNet-N28/Dataset/IR_drop/label/{design}/"

    labels = []
    ids = []

    for filename in tqdm(os.listdir(label_path)):
            file_path = os.path.join(label_path, filename)
            label_image = np.load(file_path).squeeze()
            label = get_ir_drop(label_image)
            ids.append(filename)
            labels.append(label)
            
    df = pd.DataFrame({"id": ids,})

    for filename in tqdm(os.listdir(feature_path)):
            file_path = os.path.join(feature_path, filename)
            numpy_image = np.load(file_path)
            batch_image = numpy_image.transpose(2,0,1)
            image_features = []
            for i, image in enumerate(batch_image):
                if i in selected_indices:
                    image_features.append(image)
            
            index = (df["id"] == filename)
            
            all_features = get_all_features(image_features)
            for key, value in all_features.items():
                df.loc[index, key] = value
        
    df['label'] = labels
    return df


def dataset_setting(designs):
    pool = multiprocessing.Pool()
    df_list = pool.map(single_extractor, designs)
    return pd.concat(df_list)

In [25]:
train_df = dataset_setting(train_design)
test_df_a = dataset_setting(test_design_a)
test_df_b = dataset_setting(test_design_b)

100%|██████████| 1248/1248 [00:39<00:00, 31.79it/s]
100%|██████████| 1858/1858 [01:06<00:00, 27.74it/s]
100%|██████████| 1969/1969 [01:10<00:00, 27.94it/s]
100%|██████████| 2003/2003 [01:12<00:00, 27.80it/s]
100%|██████████| 1248/1248 [08:43<00:00,  2.38it/s]
100%|██████████| 1858/1858 [08:33<00:00,  3.62it/s]
100%|██████████| 2003/2003 [09:10<00:00,  3.64it/s]
100%|██████████| 1969/1969 [09:27<00:00,  3.47it/s]
100%|██████████| 2042/2042 [00:17<00:00, 119.50it/s]
100%|██████████| 2042/2042 [04:46<00:00,  7.12it/s]
100%|██████████| 1122/1122 [00:09<00:00, 120.48it/s]
100%|██████████| 1122/1122 [02:37<00:00,  7.10it/s]


In [26]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
train_df = train_df[["id"] + list(feat_pool.keys()) + ["label"]]
test_df_a = test_df_a[["id"] + list(feat_pool.keys()) + ["label"]]
test_df_b = test_df_b[["id"] + list(feat_pool.keys()) + ["label"]]
train_df[list(feat_pool.keys())] = scaler.fit_transform(train_df[list(feat_pool.keys())])
test_df_a[list(feat_pool.keys())] = scaler.fit_transform(test_df_a[list(feat_pool.keys())]) 
test_df_b[list(feat_pool.keys())] = scaler.fit_transform(test_df_b[list(feat_pool.keys())])

In [27]:
train_df.reset_index(drop=True, inplace=True)
test_df_a.reset_index(drop=True, inplace=True)
test_df_b.reset_index(drop=True, inplace=True)

In [28]:
train_df.to_csv("/home/felixchaotw/mllm-physical-design/IR_Drop/dataset/train_df.csv", index=False)
test_df_a.to_csv("/home/felixchaotw/mllm-physical-design/IR_Drop/dataset/test_df_a.csv", index=False)
test_df_b.to_csv("/home/felixchaotw/mllm-physical-design/IR_Drop/dataset/test_df_b.csv", index=False)

In [29]:
train_df = pd.read_csv("/home/felixchaotw/mllm-physical-design/IR_Drop/dataset/train_df.csv")
test_df_a = pd.read_csv("/home/felixchaotw/mllm-physical-design/IR_Drop/dataset/test_df_a.csv")
test_df_b = pd.read_csv("/home/felixchaotw/mllm-physical-design/IR_Drop/dataset/test_df_b.csv")

In [30]:
train_df = train_df[train_df['label'].notna()]
test_df_a = test_df_a[test_df_a['label'].notna()]
test_df_b = test_df_b[test_df_b['label'].notna()]

In [31]:
train_df.reset_index(drop=True, inplace=True)
test_df_a.reset_index(drop=True, inplace=True)
test_df_b.reset_index(drop=True, inplace=True)

In [32]:
train_df

Unnamed: 0,id,horizontal_power_distribution_symmetry,mean_power_sca,heat_intensity_correlation,central_power_saturation,vertical_power_distribution_symmetry,proximity_power_pattern_asymmetry,macro_power_proximity,mean_power_density_deviation,edge_power_intensity,...,mean_power_i,power_balance_ratio,power_gradient_variation,localized_coupling_variability,power_intensity_anomaly_detection,localized_gradient_intensity,spatial_correlation_power_i,uniformity_index_power_i,spatial_density_power_i,label
0,766-RISCY-a-2-c2-u0.7-m4-p8-f0.npy,0.508588,0.435359,0.271461,0.199127,0.255520,0.617066,0.489490,0.539859,0.422143,...,0.330414,0.809963,0.468610,0.527969,0.633534,0.570946,0.433980,0.469583,0.543570,25674
1,872-RISCY-a-2-c2-u0.75-m2-p1-f1.npy,0.590106,0.497857,0.428727,0.265539,0.413455,0.826042,0.656772,0.716247,0.549821,...,0.527138,0.845248,0.659294,0.765975,0.733168,0.782165,0.667928,0.690865,0.637514,28281
2,1710-RISCY-a-3-c5-u0.7-m1-p2-f1.npy,0.328630,0.232944,0.243848,0.641162,0.378158,,0.503784,0.396047,0.438597,...,0.312930,0.766948,0.577228,0.574699,0.572595,0.531686,0.353142,0.390370,0.448038,26122
3,1452-RISCY-a-3-c2-u0.8-m3-p5-f0.npy,0.301340,0.361863,0.391074,0.748108,0.558682,0.378880,0.325973,0.491948,0.467309,...,0.250089,0.866403,0.259170,0.362714,0.481442,0.406515,0.270830,0.307219,0.322885,19968
4,216-RISCY-a-1-c2-u0.85-m3-p1-f1.npy,0.660510,0.502298,0.365179,0.351134,0.362139,0.718433,0.662205,0.734384,0.436395,...,0.579994,0.899803,0.528266,0.658242,0.636414,0.669106,0.682938,0.704839,0.633840,27095
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7073,6063-RISCY-FPU-b-1-c5-u0.9-m2-p5-f0.npy,0.375032,0.413517,0.849089,0.165259,0.155053,0.394941,0.266831,0.383433,0.312185,...,0.286251,0.542381,0.284778,0.399762,0.222064,0.400597,0.262676,0.298812,0.354638,20050
7074,6882-RISCY-FPU-b-3-c5-u0.85-m2-p4-f0.npy,0.210784,0.362105,0.822641,0.163866,0.372206,0.430516,0.243548,0.290035,0.196460,...,0.211524,0.533498,0.257814,0.355010,0.145821,0.369566,0.183506,0.215160,0.235836,18751
7075,5971-RISCY-FPU-b-1-c5-u0.7-m2-p1-f0.npy,0.242809,0.355320,0.833745,0.287279,0.379571,0.432152,0.332306,0.286103,0.266116,...,0.238771,0.482081,0.268137,0.401423,0.166529,0.373366,0.231109,0.265918,0.395736,22235
7076,6435-RISCY-FPU-b-2-c5-u0.75-m2-p6-f0.npy,0.269718,0.354334,0.843833,0.167582,0.316266,0.461444,0.275977,0.297959,0.243024,...,0.256188,0.476817,0.289852,0.399557,0.171735,0.397980,0.238907,0.274097,0.404808,23465


In [33]:
def id_to_design(name):
    for d in train_design:
        if d in name:
            return d
    return None

In [34]:
train_df["design"] = train_df["id"].apply(id_to_design)

In [35]:
preference_df_list = []
num_pairs = 25000

for design, group in train_df.groupby("design"):
    preference_df = pd.DataFrame(columns=["design", "chosen", "rejected", "chosen_score", "rejected_score"])
    group = group.reset_index(drop=True)
    num_samples = len(group)
    records = {}
    pbar = tqdm(total=num_pairs, desc=f"Processing design: {design}")
    while(len(preference_df) < num_pairs):
            i, j = np.random.choice(num_samples, 2, replace=False)
            if i == j or records.get(i, -1) == j or records.get(j, -1) == i:
                continue
            sample_a = group.iloc[i]
            sample_b = group.iloc[j]
            if sample_a["label"] > sample_b["label"]:
                chosen = sample_a["id"]
                rejected = sample_b["id"]
                chosen_score = sample_a["label"]
                rejected_score = sample_b["label"]
            elif sample_a["label"] == sample_b["label"]:
                continue
            else:
                chosen = sample_b["id"]
                rejected = sample_a["id"]
                chosen_score = sample_b["label"]
                rejected_score = sample_a["label"]
            
            records[i] = j
            preference_df = preference_df._append({"design": design, "chosen": chosen, "rejected": rejected, "chosen_score": chosen_score, "rejected_score": rejected_score}, ignore_index=True)
            pbar.update(1)
            
    pbar.close()
            
    preference_df = preference_df.sample(frac=1).reset_index(drop=True)
    preference_df = preference_df.sample(n=num_pairs)
    preference_df_list.append(preference_df)
            

Processing design: RISCY-FPU-a: 100%|██████████| 25000/25000 [00:31<00:00, 798.08it/s]
Processing design: RISCY-FPU-b: 100%|██████████| 25000/25000 [00:30<00:00, 807.30it/s]
Processing design: RISCY-a: 100%|██████████| 25000/25000 [00:31<00:00, 792.84it/s]
Processing design: RISCY-b: 100%|██████████| 25000/25000 [00:31<00:00, 792.34it/s]


In [36]:
preference_df = pd.concat(preference_df_list)
preference_df.reset_index(drop=True, inplace=True)
preference_df.to_csv("/home/felixchaotw/mllm-physical-design/IR_Drop/dataset/preference_df.csv", index=False)