In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/status-output-1/status_output_small.csv
/kaggle/input/status-output-1/status_output.csv


In [6]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("/kaggle/input/status-output-1/status_output.csv")

# Columns
step_cols = [f'Step_{i}' for i in range(1, 21)]
input_cols = ['PIs', 'POs', 'AND_Gates_Before', 'Levels_Before'] + step_cols
target_col = 'Power'

# Encode the steps to integer tokens
all_steps = sorted(set(step for row in df[step_cols].values for step in row))
step_encoder = {step: idx + 100 for idx, step in enumerate(all_steps)}  # reserve 0-99 for scalar values

# Encode steps
encoded_steps = df[step_cols].applymap(lambda s: step_encoder[s])

# Normalize and scale scalar inputs to avoid huge embedding indices
scalar_inputs = df[['PIs', 'POs', 'AND_Gates_Before', 'Levels_Before']].copy()
scalar_inputs = scalar_inputs.apply(lambda col: col - col.min() + 1)

# Concatenate scalar inputs and steps
df_encoded = pd.concat([scalar_inputs, encoded_steps], axis=1)

# Dataset class
class PowerDataset(Dataset):
    def __init__(self, features, targets):
        self.X = features
        self.y = targets

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x_seq = torch.tensor(self.X[idx], dtype=torch.long)
        y_val = torch.tensor(self.y[idx], dtype=torch.float32)
        return x_seq, y_val

# Normalize QoR target to 0–1 range
qor_min = df[target_col].min()
qor_max = df[target_col].max()
df[target_col + "_scaled"] = (df[target_col] - qor_min) / (qor_max - qor_min)

# Prepare data
X_raw = df_encoded.values.tolist()
y_raw = df[target_col + "_scaled"].values

X_train, X_test, y_train, y_test = train_test_split(X_raw, y_raw, test_size=0.2, random_state=42)

train_dataset = PowerDataset(X_train, y_train)
test_dataset = PowerDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# === Load and preprocess NEW dataset the same way ===
new_df = pd.read_csv("/kaggle/input/status-output-1/status_output_small.csv")  # Update to your actual file path

# Step encoding using the existing encoder, with handling for unknowns
step_encoder = {step: idx + 100 for idx, step in enumerate(all_steps)}  # Ensure it's outside the scalar range
step_encoder["unknown"] = len(step_encoder) + 100  # Reserve an index for unknown steps
new_encoded_steps = new_df[step_cols].applymap(lambda s: step_encoder.get(s, step_encoder["unknown"]))

# Normalize scalar inputs
new_scalar_inputs = new_df[['PIs', 'POs', 'AND_Gates_Before', 'Levels_Before']].copy()
new_scalar_inputs = new_scalar_inputs.apply(lambda col: col - col.min() + 1)

# Concatenate
new_df_encoded = pd.concat([new_scalar_inputs, new_encoded_steps], axis=1)

# Normalize power using original scaling
new_df[target_col + "_scaled"] = (new_df[target_col] - qor_min) / (qor_max - qor_min)

# Prepare new data
X_new_raw = new_df_encoded.values.tolist()
y_new_raw = new_df[target_col + "_scaled"].values

# Determine the maximum step index in the new dataset
max_step_value_new = new_encoded_steps.values.max()

# Train-test split
X_new_train, X_new_test, y_new_train, y_new_test = train_test_split(X_new_raw, y_new_raw, test_size=0.2, random_state=42)

# Dataset and loaders
train_new_dataset = PowerDataset(X_new_train, y_new_train)
test_new_dataset = PowerDataset(X_new_test, y_new_test)

train_new_loader = DataLoader(train_new_dataset, batch_size=32, shuffle=True)
test_new_loader = DataLoader(test_new_dataset, batch_size=32)



  encoded_steps = df[step_cols].applymap(lambda s: step_encoder[s])
  new_encoded_steps = new_df[step_cols].applymap(lambda s: step_encoder.get(s, step_encoder["unknown"]))


In [8]:
#Changes made:
#Switched to a bidirectional GRU to capture context from both directions.
#Stacked 2 GRU layers for more modeling power.
#Added dropout for regularization.
#Used a Linear → ReLU → Linear head for better learning capacity.

class PowerCustomRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim=64, hidden_dim=128, num_layers=2, dropout=0.3):
        super(PowerCustomRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.GRU(
            input_size=embedding_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            dropout=dropout,
            batch_first=True,
            bidirectional=True
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        embedded = self.embedding(x)  # [batch_size, seq_len, embedding_dim]
        _, hidden = self.rnn(embedded)  # hidden shape: [num_layers*2, batch, hidden_dim]
        hidden_cat = torch.cat((hidden[-2], hidden[-1]), dim=1)  # concatenate last layers of both directions
        return self.fc(hidden_cat)

# Ensure vocab_size is large enough to cover all indices
vocab_size = max(max(df_encoded.values.flatten()) + 1, max(new_df_encoded.values.flatten()) + 1, max_step_value_new + 1, 1000)
model = PowerCustomRNN(vocab_size)

# Training
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# === Training with Early Stopping ===
best_loss = float('inf')
patience = 10
counter = 0

for epoch in range(300):
    model.train()
    total_loss = 0
    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        output = model(batch_x).squeeze()
        loss = criterion(output, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/300, Loss: {total_loss:.4f}")

    # Early stopping
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            output = model(batch_x).squeeze()
            val_loss += criterion(output, batch_y).item()
    val_loss /= len(test_loader)

    if val_loss < best_loss:
        best_loss = val_loss
        counter = 0
        best_model_state = model.state_dict()
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            model.load_state_dict(best_model_state)
            break

# === Evaluation ===
model.eval()
actuals = []
predictions = []

with torch.no_grad():
    for batch_x, batch_y in test_loader:
        output = model(batch_x).squeeze()
        actuals.extend(batch_y.cpu().numpy())
        predictions.extend(output.cpu().numpy())

actuals = np.array(actuals)
predictions = np.array(predictions)
nonzero_mask = actuals != 0
mape = np.mean(np.abs((actuals[nonzero_mask] - predictions[nonzero_mask]) / actuals[nonzero_mask])) * 100
mapa = 100 - mape
print(f"Mean Absolute Percentage Accuracy (MAPA): {mapa:.2f}%")

Epoch 1/300, Loss: 0.8040
Epoch 2/300, Loss: 0.1811
Epoch 3/300, Loss: 0.1050
Epoch 4/300, Loss: 0.0805
Epoch 5/300, Loss: 0.0743
Epoch 6/300, Loss: 0.0564
Epoch 7/300, Loss: 0.0542
Epoch 8/300, Loss: 0.0712
Epoch 9/300, Loss: 0.0429
Epoch 10/300, Loss: 0.0383
Epoch 11/300, Loss: 0.0450
Epoch 12/300, Loss: 0.0370
Epoch 13/300, Loss: 0.0344
Epoch 14/300, Loss: 0.0290
Epoch 15/300, Loss: 0.0272
Epoch 16/300, Loss: 0.0306
Epoch 17/300, Loss: 0.0298
Epoch 18/300, Loss: 0.0275
Epoch 19/300, Loss: 0.0231
Epoch 20/300, Loss: 0.0225
Epoch 21/300, Loss: 0.0168
Epoch 22/300, Loss: 0.0188
Epoch 23/300, Loss: 0.0196
Epoch 24/300, Loss: 0.0176
Epoch 25/300, Loss: 0.0175
Epoch 26/300, Loss: 0.0145
Epoch 27/300, Loss: 0.0141
Epoch 28/300, Loss: 0.0115
Epoch 29/300, Loss: 0.0136
Epoch 30/300, Loss: 0.0130
Epoch 31/300, Loss: 0.0133
Epoch 32/300, Loss: 0.0132
Epoch 33/300, Loss: 0.0137
Epoch 34/300, Loss: 0.0118
Epoch 35/300, Loss: 0.0116
Epoch 36/300, Loss: 0.0129
Epoch 37/300, Loss: 0.0105
Epoch 38/3

In [9]:

model.eval()

# Fine-tuning the model with the new dataset
fine_tune_epochs = 100
for epoch in range(fine_tune_epochs):
    model.train()
    total_loss = 0
    for batch_x, batch_y in train_new_loader:  # Use train_new_loader
        batch_x = batch_x.long()  # Ensure the batch is in the correct type (long tensor for indices)
        optimizer.zero_grad()
        output = model(batch_x).squeeze()  # Forward pass
        loss = criterion(output, batch_y)  # Compute loss
        loss.backward()  # Backpropagate
        optimizer.step()  # Update weights
        total_loss += loss.item()  # Accumulate the loss
    print(f"Fine-tune Epoch {epoch+1}/{fine_tune_epochs}, Loss: {total_loss:.4f}")

# Evaluation on new test set
model.eval()
actuals = []
predictions = []

with torch.no_grad():
    for batch_x, batch_y in test_new_loader:  # Evaluate on test_new_loader
        batch_x = batch_x.long()  # Ensure the batch is in the correct type (long tensor for indices)
        output = model(batch_x).squeeze()
        actuals.extend(batch_y.cpu().numpy())
        predictions.extend(output.cpu().numpy())

actuals = np.array(actuals)
predictions = np.array(predictions)

# Avoid division by zero
nonzero_mask = actuals != 0
mape = np.mean(np.abs((actuals[nonzero_mask] - predictions[nonzero_mask]) / actuals[nonzero_mask])) * 100
mapa = 100 - mape

print(f"Mean Absolute Percentage Accuracy (MAPA): {mapa:.2f}%")

Fine-tune Epoch 1/100, Loss: 31.8651
Fine-tune Epoch 2/100, Loss: 4.8504
Fine-tune Epoch 3/100, Loss: 0.7470
Fine-tune Epoch 4/100, Loss: 0.2224
Fine-tune Epoch 5/100, Loss: 0.1334
Fine-tune Epoch 6/100, Loss: 0.0990
Fine-tune Epoch 7/100, Loss: 0.0974
Fine-tune Epoch 8/100, Loss: 0.0902
Fine-tune Epoch 9/100, Loss: 0.0778
Fine-tune Epoch 10/100, Loss: 0.0728
Fine-tune Epoch 11/100, Loss: 0.0730
Fine-tune Epoch 12/100, Loss: 0.0719
Fine-tune Epoch 13/100, Loss: 0.0600
Fine-tune Epoch 14/100, Loss: 0.0441
Fine-tune Epoch 15/100, Loss: 0.0380
Fine-tune Epoch 16/100, Loss: 0.0427
Fine-tune Epoch 17/100, Loss: 0.0372
Fine-tune Epoch 18/100, Loss: 0.0341
Fine-tune Epoch 19/100, Loss: 0.0346
Fine-tune Epoch 20/100, Loss: 0.0331
Fine-tune Epoch 21/100, Loss: 0.0290
Fine-tune Epoch 22/100, Loss: 0.0368
Fine-tune Epoch 23/100, Loss: 0.0320
Fine-tune Epoch 24/100, Loss: 0.0267
Fine-tune Epoch 25/100, Loss: 0.0232
Fine-tune Epoch 26/100, Loss: 0.0228
Fine-tune Epoch 27/100, Loss: 0.0231
Fine-tune