In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error, r2_score
import os
import warnings

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.get_logger().setLevel('ERROR')
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
warnings.filterwarnings('ignore', category=FutureWarning)

DATA_FILENAME = '/kaggle/input/gem5-results/gem5_results.csv'  # <--- !! RENAME THIS TO YOUR CSV FILE !!

INPUT_FEATURES = [
    'cpu_clock_GHz', 'l1i_kb', 'l1d_kb', 'l1_assoc', 'l2_kb',
    'l2_assoc', 'fetchWidth', 'decodeWidth', 'issueWidth', 'commitWidth',
    'numROBEntries', 'numIQEntries', 'LQEntries', 'SQEntries', 'branch_predictor'
]

OUTPUT_METRICS = [
    'Area', 'Peak Power', 'Total Leakage', 'Peak Dynamic', 
    'Subthreshold Leakage', 'Gate Leakage', 'Runtime Dynamic', 'ipc',
    'branch_misprediction_rate', 'icache_miss_rate', 
    'dcache_read_miss_rate', 'dcache_write_miss_rate'
]

CATEGORICAL_FEATURES = ['branch_predictor']

2025-10-27 19:05:42.880513: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761591943.067110      37 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1761591943.139499      37 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
# ============================================================================
# PART 1: DATA LOADING AND PREPROCESSING
# ============================================================================
print("--- [Part 1] Loading and Preprocessing Data ---")

if not os.path.exists(DATA_FILENAME):
    print(f"Error: Could not find data file '{DATA_FILENAME}'.")
    print("Please make sure the file is in the same directory and the name is correct.")
    exit()

try:
    df = pd.read_csv(DATA_FILENAME)
except Exception as e:
    print(f"Error reading CSV file: {e}")
    exit()

try:
    X_df = df[INPUT_FEATURES]
    y_df = df[OUTPUT_METRICS]
except KeyError as e:
    print(f"\nError: A column name in your lists is not in the CSV file.")
    print(f"Column not found: {e}")
    print("Please check your 'INPUT_FEATURES' and 'OUTPUT_METRICS' lists.")
    exit()

--- [Part 1] Loading and Preprocessing Data ---


In [4]:
X_df.head()

Unnamed: 0,cpu_clock_GHz,l1i_kb,l1d_kb,l1_assoc,l2_kb,l2_assoc,fetchWidth,decodeWidth,issueWidth,commitWidth,numROBEntries,numIQEntries,LQEntries,SQEntries,branch_predictor
0,3.4,16,32,1,128,16,2,8,2,4,32,32,64,8,TAGE
1,1.4,128,64,2,512,4,8,2,12,8,256,128,32,32,LocalBP
2,3.2,64,16,8,512,4,8,2,2,2,128,96,32,32,TournamentBP
3,2.8,64,16,1,2048,8,12,4,2,4,128,96,64,16,MultiperspectivePerceptron64KB
4,4.0,128,16,2,1024,2,12,8,8,12,192,96,64,64,MultiperspectivePerceptron64KB


In [5]:
y_df.head()

Unnamed: 0,Area,Peak Power,Total Leakage,Peak Dynamic,Subthreshold Leakage,Gate Leakage,Runtime Dynamic,ipc,branch_misprediction_rate,icache_miss_rate,dcache_read_miss_rate,dcache_write_miss_rate
0,104.1838,84.1828,16.289,63.8925,15.3246,1.1498,8.0597,0.6265,0.0563,0.0112,0.0185,0.0104
1,114.8473,76.3575,18.2882,54.4357,16.9324,1.3149,10.3666,0.8389,0.0812,0.0086,0.0173,0.0102
2,79.3346,65.2464,16.7377,41.6266,15.6093,1.1444,7.6594,0.7168,0.0675,0.009,0.1015,0.0106
3,110.7148,66.0129,18.2379,41.8925,17.206,1.171,7.5061,0.6753,0.1243,0.0066,0.0931,0.0106
4,140.6207,155.988,18.3157,132.4366,17.3131,1.2794,24.2688,0.7964,0.1292,0.0075,0.1036,0.0106


In [6]:
print("\n--- [Part 2] Model Training and Testing ---")

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import numpy as np

X_train, X_test, y_train, y_test = train_test_split(
    X_df, y_df, test_size=0.1, random_state=42
)
print(f"Training with {len(X_train)} samples, testing with {len(X_test)} samples.")

numerical_features = [col for col in INPUT_FEATURES if col not in CATEGORICAL_FEATURES]

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),                      # Scale numeric
        ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), CATEGORICAL_FEATURES)  # One-hot encode categorical
    ],
    remainder='passthrough'
)

X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

n_features = X_train_processed.shape[1]

y_scaler = StandardScaler()
y_train_scaled = y_scaler.fit_transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

n_outputs = y_train_scaled.shape[1]

print(f"Original input features: {len(INPUT_FEATURES)}")
print(f"Processed & scaled input features: {n_features}")
print(f"Output metrics: {n_outputs}")

surrogate_model = Sequential([
    Dense(64, kernel_regularizer=l2(1e-4), input_shape=(n_features,)),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.1),

    Dense(32, kernel_regularizer=l2(1e-4)),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.1),

    Dense(16, kernel_regularizer=l2(1e-4)),
    LeakyReLU(alpha=0.1),

    Dense(n_outputs, activation='linear')  # Output layer for regression
])

surrogate_model.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=['mean_absolute_percentage_error']
)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

lr_reduce = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    verbose=1
)

history = surrogate_model.fit(
    X_train_processed, y_train_scaled,
    validation_split=0.2,
    epochs=500,
    batch_size=16,
    callbacks=[early_stop, lr_reduce],
    verbose=0
)

print("Model training complete. ✅")

y_pred_scaled = surrogate_model.predict(X_test_processed)
y_pred = y_scaler.inverse_transform(y_pred_scaled)

mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print(f"Test MAPE (all outputs avg): {mape:.2f}%")



--- [Part 2] Model Training and Testing ---
Training with 2250 samples, testing with 250 samples.
Original input features: 15
Processed & scaled input features: 20
Output metrics: 12


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1761591955.531290      37 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
I0000 00:00:1761591959.886553      98 service.cc:148] XLA service 0x79fe84014010 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1761591959.887013      98 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1761591960.236306      98 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1761591962.219584      98 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.



Epoch 88: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 103: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 115: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.

Epoch 127: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.

Epoch 139: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.

Epoch 144: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 144: early stopping
Restoring model weights from the end of the best epoch: 134.
Model training complete. ✅
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step
Test MAPE (all outputs avg): 3.91%


In [7]:
from sklearn.metrics import r2_score

print("\n--- Evaluating Model on Test Set ---")

loss, mape = surrogate_model.evaluate(X_test_processed, y_test_scaled, verbose=0)
print(f"Test Set Loss (MSE, scaled): {loss:.4f}")
print(f"Test Set Mean Absolute Percentage Error (MAPE, scaled): {mape:.2f}%")

y_pred_scaled = surrogate_model.predict(X_test_processed, verbose=0)
y_pred_test = y_scaler.inverse_transform(y_pred_scaled)

r2 = r2_score(y_test, y_pred_test)
print(f"Test Set R-squared (R², original scale): {r2:.4f} (Closer to 1.0 is better)")


--- Evaluating Model on Test Set ---
Test Set Loss (MSE, scaled): 0.0424
Test Set Mean Absolute Percentage Error (MAPE, scaled): 45.75%
Test Set R-squared (R², original scale): 0.9635 (Closer to 1.0 is better)


In [8]:
print("\n--- ML-Powered Random Search (Find New Configs) ---")

DISCRETE_PARAMS = {
    "l1i_kb": [16, 32, 64, 128],
    "l1d_kb": [16, 32, 64, 128],
    "l1_assoc": [1, 2, 4, 8],
    "l2_kb": [128, 256, 512, 1024, 2048],
    "l2_assoc": [2, 4, 8, 16],
    # Add other integer parameters here if needed
    # "num_cores": [1, 2, 4, 8, 16],
}

search_space = {}

for col in numerical_features:
    if col in DISCRETE_PARAMS:
        # Special discrete parameter
        search_space[col] = ('discrete', DISCRETE_PARAMS[col])
    else:
        # Continuous int or float parameter
        is_int = pd.api.types.is_integer_dtype(X_df[col])
        search_space[col] = ('int' if is_int else 'float', X_df[col].min(), X_df[col].max())

for col in CATEGORICAL_FEATURES:
    search_space[col] = ('categorical', X_df[col].unique())

print("\n--- Generated Search Space ---")
for param, (dtype, *bounds) in search_space.items():
    if dtype == 'discrete':
        print(f" - {param} (discrete): {bounds[0]}")
    elif dtype == 'categorical':
        print(f" - {param} (categorical): {bounds[0]}")
    else:
        print(f" - {param} ({dtype}): min={bounds[0]}, max={bounds[1]}")
print("---------------------------------")

def generate_random_configs(n, space):
    """Generates n random configurations as a DataFrame."""
    configs = {}
    for col, (dtype, *bounds) in space.items():
        if dtype == 'int':
            configs[col] = np.random.randint(bounds[0], bounds[1] + 1, size=n)
        elif dtype == 'float':
            configs[col] = np.random.uniform(bounds[0], bounds[1], size=n)
        elif dtype == 'discrete':
            configs[col] = np.random.choice(bounds[0], size=n)
        elif dtype == 'categorical':
            configs[col] = np.random.choice(bounds[0], size=n)
    return pd.DataFrame(configs)


def find_topk_from_model(model, preproc, space, output_constraints, input_constraints=None, 
                         objective=('pca','min'), n_iter=100000, top_k=5):

    print(f"Running ML-powered random search for {n_iter} iterations...")

    random_X_df = generate_random_configs(n_iter, space)

    if input_constraints:
        feasible_mask = pd.Series(True, index=random_X_df.index)
        for key, cond_list in input_constraints.items():
            if not isinstance(cond_list, list):
                cond_list = [cond_list]
            for op, val in cond_list:
                if op == '<=':
                    feasible_mask &= (random_X_df[key] <= val)
                elif op == '>=':
                    feasible_mask &= (random_X_df[key] >= val)
                elif op == '<':
                    feasible_mask &= (random_X_df[key] < val)
                elif op == '>':
                    feasible_mask &= (random_X_df[key] > val)
                elif op == '==':
                    feasible_mask &= np.isclose(random_X_df[key], val)
                else:
                    raise ValueError(f"Unsupported operator {op}")
        random_X_df = random_X_df.loc[feasible_mask]
        if random_X_df.empty:
            print("No configurations satisfy input constraints!")
            return None, None

    random_X_processed = preproc.transform(random_X_df)
    y_pred_scaled = model.predict(random_X_processed, batch_size=1024, verbose=0)
    y_pred_unscaled = y_scaler.inverse_transform(y_pred_scaled)
    y_pred_df = pd.DataFrame(y_pred_unscaled, columns=OUTPUT_METRICS, index=random_X_df.index)

    y_pred_df['pca'] = y_pred_df['Peak Dynamic'] * y_pred_df['Area'] / y_pred_df['ipc']
    y_pred_df['cpi'] = 1.0 / y_pred_df['ipc'].replace(0, np.nan)

    feasible_mask = pd.Series(True, index=y_pred_df.index)
    for key, cond_list in output_constraints.items():
        if key not in y_pred_df.columns:
            print(f"Warning: {key} not in predicted outputs.")
            feasible_mask &= False
            continue
        if not isinstance(cond_list, list):
            cond_list = [cond_list]
        for op, val in cond_list:
            if op == '<=':
                feasible_mask &= (y_pred_df[key] <= val)
            elif op == '>=':
                feasible_mask &= (y_pred_df[key] >= val)
            elif op == '<':
                feasible_mask &= (y_pred_df[key] < val)
            elif op == '>':
                feasible_mask &= (y_pred_df[key] > val)
            elif op == '==':
                feasible_mask &= np.isclose(y_pred_df[key], val)
            else:
                raise ValueError(f"Unsupported operator {op}")

    feasible_X = random_X_df.loc[feasible_mask]
    feasible_y = y_pred_df.loc[feasible_mask]

    print(f"Found {len(feasible_y)} *predicted* feasible configurations.")
    if feasible_y.empty:
        return None, None

    obj_col, obj_mode = objective
    ascending = True if obj_mode == 'min' else False
    top_indices = feasible_y.sort_values(by=obj_col, ascending=ascending).head(top_k).index

    print(feasible_X.shape, feasible_y.shape)

    return feasible_X.loc[top_indices], feasible_y.loc[top_indices]



--- ML-Powered Random Search (Find New Configs) ---

--- Generated Search Space ---
 - cpu_clock_GHz (float): min=1.0, max=4.0
 - l1i_kb (discrete): [16, 32, 64, 128]
 - l1d_kb (discrete): [16, 32, 64, 128]
 - l1_assoc (discrete): [1, 2, 4, 8]
 - l2_kb (discrete): [128, 256, 512, 1024, 2048]
 - l2_assoc (discrete): [2, 4, 8, 16]
 - fetchWidth (int): min=2, max=12
 - decodeWidth (int): min=2, max=12
 - issueWidth (int): min=2, max=12
 - commitWidth (int): min=2, max=12
 - numROBEntries (int): min=32, max=256
 - numIQEntries (int): min=16, max=128
 - LQEntries (int): min=8, max=64
 - SQEntries (int): min=8, max=64
 - branch_predictor (categorical): ['TAGE' 'LocalBP' 'TournamentBP' 'MultiperspectivePerceptron64KB'
 'BiModeBP' 'TAGE_SC_L_64KB']
---------------------------------


In [9]:
print("\n" + "="*50)
print(" [Part 3] Inference / Optimization (Top-K Designs)")
print("="*50)

OUTPUT_CONSTRAINTS = {
    'Area': [('<=', 150)],
    'Peak Power': [('<=', 120)],
    'ipc': [('>=', 1.0)],
    'branch_misprediction_rate': [('<=', 0.06), ('>=', 0.0)],
    'dcache_write_miss_rate': [('>=', 0.0)],
    'dcache_read_miss_rate': [('>=', 0.0)],
    'icache_miss_rate': [('>=', 0.0)]
}

INPUT_CONSTRAINTS = {
    'cpu_clock_GHz': [('>=', 0.8), ('<=', 1.5)]
}

OBJECTIVE = ('pca', 'min')

# top_k = int(input("Enter number of top designs to retrieve: "))
top_k = 3

top_X, top_y = find_topk_from_model(
    surrogate_model,
    preprocessor,
    search_space,
    output_constraints=OUTPUT_CONSTRAINTS,
    input_constraints=INPUT_CONSTRAINTS,
    objective=OBJECTIVE,
    n_iter=1000000,
    top_k=top_k
)

if top_X is not None:
    for i, idx in enumerate(top_X.index, 1):
        print(f"\nTop {i} Predicted Feasible Configuration:")
        config = top_X.loc[idx]
        metrics = top_y.loc[idx]

        print("Configuration (xi):")
        config_copy = config.copy()
        if 'branch_predictor' in config_copy.index:
            config_copy['BP'] = f'"{config_copy["branch_predictor"]}"'
            config_copy = config_copy.drop('branch_predictor')

        for k, v in config_copy.items():
            if isinstance(v, float):
                print(f"{k}={v:.6f}")
            else:
                print(f"{k}={v}")

        print("\nPredicted Metrics:")
        print(metrics.to_string())
else:
    print("\nNo feasible configurations found for the given constraints.")

print("\n--- End of Script ---")


 [Part 3] Inference / Optimization (Top-K Designs)
Running ML-powered random search for 1000000 iterations...
Found 6900 *predicted* feasible configurations.
(6900, 15) (6900, 14)

Top 1 Predicted Feasible Configuration:
Configuration (xi):
cpu_clock_GHz=1.036969
l1i_kb=128
l1d_kb=64
l1_assoc=8
l2_kb=256
l2_assoc=2
fetchWidth=11
decodeWidth=4
issueWidth=4
commitWidth=10
numROBEntries=115
numIQEntries=91
LQEntries=51
SQEntries=51
BP="TAGE_SC_L_64KB"

Predicted Metrics:
Area                           95.346718
Peak Power                     32.597397
Total Leakage                  16.941883
Peak Dynamic                   14.184817
Subthreshold Leakage           15.854826
Gate Leakage                    1.196789
Runtime Dynamic                 5.487137
ipc                             1.001851
branch_misprediction_rate       0.054943
icache_miss_rate                0.008823
dcache_read_miss_rate           0.015719
dcache_write_miss_rate          0.010199
pca                          1349.