In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("/kaggle/input/status-output/status_output.csv")

In [3]:
df.head()

Unnamed: 0,PIs,POs,AND_Gates_Before,Levels_Before,Step_1,Step_2,Step_3,Step_4,Step_5,Step_6,...,Step_17,Step_18,Step_19,Step_20,ND,Edge,Area,Delay,Levels_After,Power
0,24,25,5416,225,resub -z,drw,resyn,resub -z,resyn3,drwsat2,...,compress2,balance,resub -z,share,3295,8733,3295.0,81.0,81,3219.53
1,207,107,2198,30,share,compress2,share,compress,resyn2a,balance,...,choice,strash,balance,drwsat2,800,1805,800.0,22.0,22,934.57
2,26,11,1592,31,compress,rwsat,compress2,refactor,drf,choice,...,rewrite,refactor -z,compress2,refactor,551,1505,551.0,10.0,10,700.42
3,24,25,5416,225,drw,share,share,choice2,strash,resub -z,...,strash,resyn,compress2,dc2,3309,8676,3309.0,81.0,81,3208.0
4,207,107,2198,30,dc2,choice,strash,resub -z,rewrite,drf,...,resyn,rwsat,resyn2,drf,803,1782,803.0,24.0,24,920.3


In [4]:
# Columns
step_cols = [f'Step_{i}' for i in range(1, 21)]
input_cols = ['PIs', 'POs', 'AND_Gates_Before', 'Levels_Before'] + step_cols
target_col = 'Power'

# Encode the steps to integer tokens
all_steps = sorted(set(step for row in df[step_cols].values for step in row))
step_encoder = {step: idx + 100 for idx, step in enumerate(all_steps)}  # reserve 0-99 for scalar values

# Encode steps
encoded_steps = df[step_cols].applymap(lambda s: step_encoder[s])

# Normalize and scale scalar inputs to avoid huge embedding indices
scalar_inputs = df[['PIs', 'POs', 'AND_Gates_Before', 'Levels_Before']].copy()
scalar_inputs = scalar_inputs.apply(lambda col: col - col.min() + 1)

# Concatenate scalar inputs and steps
df_encoded = pd.concat([scalar_inputs, encoded_steps], axis=1)

# Dataset class
class PowerDataset(Dataset):
    def __init__(self, features, targets):
        self.X = features
        self.y = targets

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x_seq = torch.tensor(self.X[idx], dtype=torch.long)
        y_val = torch.tensor(self.y[idx], dtype=torch.float32)
        return x_seq, y_val

# Normalize QoR target to 0–1 range
qor_min = df[target_col].min()
qor_max = df[target_col].max()
df[target_col + "_scaled"] = (df[target_col] - qor_min) / (qor_max - qor_min)

# Prepare data
X_raw = df_encoded.values.tolist()
y_raw = df[target_col + "_scaled"].values

X_train, X_test, y_train, y_test = train_test_split(X_raw, y_raw, test_size=0.2, random_state=42)

train_dataset = PowerDataset(X_train, y_train)
test_dataset = PowerDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# === Load and preprocess NEW dataset the same way ===
new_df = pd.read_csv("/kaggle/input/status-small/status_output_small.csv")  # Update to your actual file path

# Step encoding using the existing encoder, with handling for unknowns
step_encoder = {step: idx + 100 for idx, step in enumerate(all_steps)}  # Ensure it's outside the scalar range
step_encoder["unknown"] = len(step_encoder) + 100  # Reserve an index for unknown steps
new_encoded_steps = new_df[step_cols].applymap(lambda s: step_encoder.get(s, step_encoder["unknown"]))

# Normalize scalar inputs
new_scalar_inputs = new_df[['PIs', 'POs', 'AND_Gates_Before', 'Levels_Before']].copy()
new_scalar_inputs = new_scalar_inputs.apply(lambda col: col - col.min() + 1)

# Concatenate
new_df_encoded = pd.concat([new_scalar_inputs, new_encoded_steps], axis=1)

# Normalize power using original scaling
new_df[target_col + "_scaled"] = (new_df[target_col] - qor_min) / (qor_max - qor_min)

# Prepare new data
X_new_raw = new_df_encoded.values.tolist()
y_new_raw = new_df[target_col + "_scaled"].values

# Determine the maximum step index in the new dataset
max_step_value_new = new_encoded_steps.values.max()

# Train-test split
X_new_train, X_new_test, y_new_train, y_new_test = train_test_split(X_new_raw, y_new_raw, test_size=0.2, random_state=42)

# Dataset and loaders
train_new_dataset = PowerDataset(X_new_train, y_new_train)
test_new_dataset = PowerDataset(X_new_test, y_new_test)

train_new_loader = DataLoader(train_new_dataset, batch_size=32, shuffle=True)
test_new_loader = DataLoader(test_new_dataset, batch_size=32)



  encoded_steps = df[step_cols].applymap(lambda s: step_encoder[s])
  new_encoded_steps = new_df[step_cols].applymap(lambda s: step_encoder.get(s, step_encoder["unknown"]))


In [5]:
# RNN model
class PowerRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim=64, hidden_dim=128):
        super(PowerRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        embedded = self.embedding(x)
        _, hidden = self.rnn(embedded)
        return self.fc(hidden.squeeze(0))


# Ensure vocab_size is large enough to cover all indices
vocab_size = max(max(df_encoded.values.flatten()) + 1, max(new_df_encoded.values.flatten()) + 1, max_step_value_new + 1, 1000)
model = PowerRNN(vocab_size)

# Training
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# === Early Stopping Setup ===
best_val_loss = float('inf')
patience = 20
counter = 0

# === Training Loop with Early Stopping ===
for epoch in range(300):
    model.train()
    total_loss = 0
    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        output = model(batch_x).squeeze()
        loss = criterion(output, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # === Validation Loss ===
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            output = model(batch_x).squeeze()
            loss = criterion(output, batch_y)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(test_loader)
    print(f"Epoch {epoch+1}, Train Loss: {total_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

    # === Check for Early Stopping ===
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        counter = 0
        best_model_state = model.state_dict()
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            model.load_state_dict(best_model_state)
            break

# === Final Evaluation (MAPA) ===
model.eval()
actuals = []
predictions = []

with torch.no_grad():
    for batch_x, batch_y in test_loader:
        output = model(batch_x).squeeze()
        actuals.extend(batch_y.cpu().numpy())
        predictions.extend(output.cpu().numpy())

actuals = np.array(actuals)
predictions = np.array(predictions)

nonzero_mask = actuals != 0
mape = np.mean(np.abs((actuals[nonzero_mask] - predictions[nonzero_mask]) / actuals[nonzero_mask])) * 100
mapa = 100 - mape

print(f"Mean Absolute Percentage Accuracy (MAPA): {mapa:.2f}%")

Epoch 1, Train Loss: 6.6868, Val Loss: 0.0808
Epoch 2, Train Loss: 6.1698, Val Loss: 0.0795
Epoch 3, Train Loss: 6.1039, Val Loss: 0.0819
Epoch 4, Train Loss: 5.9178, Val Loss: 0.0816
Epoch 5, Train Loss: 4.7080, Val Loss: 0.0153
Epoch 6, Train Loss: 0.5674, Val Loss: 0.0048
Epoch 7, Train Loss: 0.2469, Val Loss: 0.0040
Epoch 8, Train Loss: 0.1972, Val Loss: 0.0030
Epoch 9, Train Loss: 0.1826, Val Loss: 0.0030
Epoch 10, Train Loss: 0.1564, Val Loss: 0.0025
Epoch 11, Train Loss: 0.1370, Val Loss: 0.0024
Epoch 12, Train Loss: 0.1257, Val Loss: 0.0025
Epoch 13, Train Loss: 0.1176, Val Loss: 0.0025
Epoch 14, Train Loss: 0.1078, Val Loss: 0.0021
Epoch 15, Train Loss: 0.1010, Val Loss: 0.0022
Epoch 16, Train Loss: 0.1061, Val Loss: 0.0021
Epoch 17, Train Loss: 0.0832, Val Loss: 0.0021
Epoch 18, Train Loss: 0.0778, Val Loss: 0.0017
Epoch 19, Train Loss: 0.0666, Val Loss: 0.0023
Epoch 20, Train Loss: 0.0641, Val Loss: 0.0015
Epoch 21, Train Loss: 0.0583, Val Loss: 0.0014
Epoch 22, Train Loss: 

In [6]:

model.eval()

# Fine-tuning the model with the new dataset
fine_tune_epochs = 100
for epoch in range(fine_tune_epochs):
    model.train()
    total_loss = 0
    for batch_x, batch_y in train_new_loader:  # Use train_new_loader
        batch_x = batch_x.long()  # Ensure the batch is in the correct type (long tensor for indices)
        optimizer.zero_grad()
        output = model(batch_x).squeeze()  # Forward pass
        loss = criterion(output, batch_y)  # Compute loss
        loss.backward()  # Backpropagate
        optimizer.step()  # Update weights
        total_loss += loss.item()  # Accumulate the loss
    print(f"Fine-tune Epoch {epoch+1}/{fine_tune_epochs}, Loss: {total_loss:.4f}")

# Evaluation on new test set
model.eval()
actuals = []
predictions = []

with torch.no_grad():
    for batch_x, batch_y in test_new_loader:  # Evaluate on test_new_loader
        batch_x = batch_x.long()  # Ensure the batch is in the correct type (long tensor for indices)
        output = model(batch_x).squeeze()
        actuals.extend(batch_y.cpu().numpy())
        predictions.extend(output.cpu().numpy())

actuals = np.array(actuals)
predictions = np.array(predictions)

# Avoid division by zero
nonzero_mask = actuals != 0
mape = np.mean(np.abs((actuals[nonzero_mask] - predictions[nonzero_mask]) / actuals[nonzero_mask])) * 100
mapa = 100 - mape

print(f"Mean Absolute Percentage Accuracy (MAPA): {mapa:.2f}%")

Fine-tune Epoch 1/100, Loss: 50.1976
Fine-tune Epoch 2/100, Loss: 11.0782
Fine-tune Epoch 3/100, Loss: 0.8229
Fine-tune Epoch 4/100, Loss: 0.3238
Fine-tune Epoch 5/100, Loss: 0.2361
Fine-tune Epoch 6/100, Loss: 0.1464
Fine-tune Epoch 7/100, Loss: 0.1179
Fine-tune Epoch 8/100, Loss: 0.1073
Fine-tune Epoch 9/100, Loss: 0.0916
Fine-tune Epoch 10/100, Loss: 0.0819
Fine-tune Epoch 11/100, Loss: 0.0791
Fine-tune Epoch 12/100, Loss: 0.0731
Fine-tune Epoch 13/100, Loss: 0.0691
Fine-tune Epoch 14/100, Loss: 0.0667
Fine-tune Epoch 15/100, Loss: 0.0630
Fine-tune Epoch 16/100, Loss: 0.0607
Fine-tune Epoch 17/100, Loss: 0.0562
Fine-tune Epoch 18/100, Loss: 0.0556
Fine-tune Epoch 19/100, Loss: 0.0507
Fine-tune Epoch 20/100, Loss: 0.0525
Fine-tune Epoch 21/100, Loss: 0.0467
Fine-tune Epoch 22/100, Loss: 0.0437
Fine-tune Epoch 23/100, Loss: 0.0441
Fine-tune Epoch 24/100, Loss: 0.0401
Fine-tune Epoch 25/100, Loss: 0.0424
Fine-tune Epoch 26/100, Loss: 0.0385
Fine-tune Epoch 27/100, Loss: 0.0370
Fine-tun