In [None]:
# 📦 Install Required Packages
# In this cell, we're installing essential Python packages using pip.
# We are adding 'torch-geometric' and 'torch-scatter' packages to the environment.
# These packages are crucial for graph-based machine learning tasks.

!pip install torch-geometric torch-scatter


In [None]:
# 📚 Import Necessary Libraries
# In this cell, we import essential Python libraries to support various tasks in our notebook.
# We use libraries like NumPy, Pandas, tqdm, scikit-learn, PyTorch, and others.
# The 'device' variable is set to 'cuda' if a GPU is available; otherwise, it defaults to 'cpu'.

import numpy as np  # 🧮 NumPy for numerical computations
import pandas as pd  # 🐼 Pandas for data manipulation
import os  # 📂 Operating system-related functions
from tqdm import tqdm  # 🔄 tqdm for progress bar visualization

import sklearn  # 🧬 scikit-learn for machine learning utilities
import sklearn.model_selection  # 📊 scikit-learn's model selection module
import torch  # 🔥 PyTorch for deep learning
from torch import nn  # 🧠 PyTorch's neural network module
from torch import Tensor  # 🚀 PyTorch's Tensor data type
from torch_geometric.nn import GCNConv  # 📊 Graph Convolutional Network layer
from torch_geometric.datasets import Planetoid  # 🌍 PyTorch Geometric dataset for graph data
from torch.utils.data import DataLoader, Dataset  # 📦 PyTorch data loading utilities
from timm.scheduler import CosineLRScheduler  # 📈 Learning rate scheduler
import matplotlib.pyplot as plt  # 📊 Matplotlib for plotting

device = 'cuda' if torch.cuda.is_available() else 'cpu'  # ⚙️ Determine if CUDA (GPU) is available


In [None]:
# 📁 Define a Function to Load DataFrames
# This function loads data stored in different splits (train, valid, test) from a specified directory.
# It reads files in the directory, extracts data using NumPy, and organizes it into DataFrames.

def load_df(directory):
    splits = ["train", "valid", "test"]  # 🔄 List of data splits
    dfs = dict()  # 📊 Dictionary to store DataFrames for each split
    
    for split in splits:
        path = os.path.join(directory, split)  # 📂 Define the path to the split's directory
        files = os.listdir(path)  # 🗂 Get a list of files in the split's directory
        list_df = []  # 📄 List to store data dictionaries
        
        for file in files:
            d = dict(np.load(os.path.join(path, file)))  # 📦 Load data using NumPy
            d['file'] = file  # 📄 Include the file name in the data dictionary
            list_df.append(d)  # 🧾 Append the data dictionary to the list
        dfs[split] = pd.DataFrame.from_dict(list_df)  # 🐼 Create a DataFrame from the list of data dictionaries and store it in the dictionary
    return dfs

# 📄 Load data using the defined function and store it in the 'tile_xla' variable
tile_xla = load_df("/kaggle/input/predict-ai-model-runtime/npz_all/npz/tile/xla/")


# 📦 Define Dataset and Model

In [None]:
# 📦 Define Custom Dataset Class
# This class, 'TileDataset', is a custom dataset class for our machine learning task.
# It inherits from the PyTorch 'Dataset' class and implements the necessary methods (__init__, __len__, and __getitem__).

class TileDataset(Dataset):
    def __init__(self, df):
        self.df = df  # 💼 Initialize the dataset with a DataFrame containing the data

    def __len__(self):
        return len(self.df)  # 🔢 Define the length of the dataset, which is the number of rows in the DataFrame

    def __getitem__(self, idx):
        row = self.df.iloc[idx]  # 📄 Get a specific row from the DataFrame based on the provided index
        config_feat = torch.tensor(row['config_feat'].astype(np.float32))  # 🧮 Convert and store 'config_feat' as a PyTorch tensor
        node_feat = torch.tensor(row['node_feat'].astype(np.float32))  # 🧮 Convert and store 'node_feat' as a PyTorch tensor
        node_opcode = torch.tensor(row['node_opcode'].astype(np.int32))  # 🧮 Convert and store 'node_opcode' as a PyTorch tensor
        edge_index = torch.tensor(np.swapaxes(row['edge_index'],0,1).astype(np.int32))  # 🧮 Convert and store 'edge_index' as a PyTorch tensor with axis swapping
        target = (row['config_runtime'] / (row['config_runtime_normalizers'] + 1e-5)).astype(np.float32)  # 📈 Calculate and store the target value with preprocessing
        # 📊 Min-max scale the target value to ensure it's within a specific range (standardization)
        target = (target - np.mean(target)) / (np.std(target) + 1e-5)
        target = torch.tensor(target)  # 🧮 Convert and store the target as a PyTorch tensor
        return config_feat, node_feat, node_opcode, edge_index, target  # 🔁 Return the data and target for a specific sample

# This class defines the structure of our custom dataset, converting and preprocessing data as necessary for training and evaluation.
# The relevant emojis provide a visual context for each part of the code.


In [None]:
# 🧠 Define Simple Neural Network Model
# In this cell, we define a simple neural network model named 'SimpleModel'.
# This model takes input data with specified dimensions and passes it through convolutional and dense layers.

class SimpleModel(torch.nn.Module):
    def __init__(self, hidden_channels, graph_feats, hidden_dim):
        super().__init__()  # 🧬 Initialize the parent class 'torch.nn.Module'
        
        op_embedding_dim = 4  # I choose 4-dimensional embedding
        self.embedding = torch.nn.Embedding(120,  # 120 different op-codes
                                            op_embedding_dim,
                                           )
        assert len(hidden_channels) > 0
        in_channels = op_embedding_dim + 140
        self.convs = torch.nn.ModuleList()
        last_dim = hidden_channels[0]
        
        # Create a sequence of Graph Convolutional Network (GCN) layers
        self.convs.append(GCNConv(in_channels, hidden_channels[0]))
        for i in range(len(hidden_channels) - 1):
            self.convs.append(GCNConv(hidden_channels[i], hidden_channels[i+1]))
            last_dim = hidden_channels[i+1]
        self.convs.append(GCNConv(last_dim, graph_feats))
        
        # Define a sequential dense neural network
        self.dense = torch.nn.Sequential(nn.Linear(graph_feats + 24, 64),
                                         nn.ReLU(),
                                         nn.Linear(64, 64),
                                         nn.ReLU(),
                                         nn.Linear(64, 1),
                                        )

    def forward(self, x_cfg: Tensor, x_feat: Tensor, x_op: Tensor, edge_index: Tensor) -> Tensor:
        
        # Get graph features
        x = torch.cat([x_feat, self.embedding(x_op)], dim=1)  # 📊 Concatenate input features with opcode embeddings
        
        # Pass data through convolutional layers
        for conv in self.convs:
            x = conv(x, edge_index).relu()
        
        # Get 1D graph embedding using average pooling
        x_graph = torch.mean(x, 0)
        
        # Combine graph data with config data
        x = torch.cat([x_cfg, x_graph.repeat((len(x_cfg), 1))], axis=1)  # 🔄 Concatenate config data with repeated graph embeddings
        
        # Pass the combined data through the dense neural network
        x = torch.flatten(self.dense(x))
        
        # Standardize the output
        x = (x - torch.mean(x)) / (torch.std(x) + 1e-5)
        return x

# Create an instance of the 'SimpleModel' and move it to the specified device (CPU or GPU)
model = SimpleModel(hidden_channels=[16, 32, 16, 48], graph_feats=64, hidden_dim=64).to(device)


# 🚂 Train  Epoch

In [None]:
# 📊 Concatenate DataFrames
# In this cell, we concatenate DataFrames 'train' and 'valid' from the 'tile_xla' dictionary along the row axis.
# We then reset the index of the resulting DataFrame for consistent indexing.

# Concatenate 'train' and 'valid' DataFrames along the row axis and reset the index
df = pd.concat((tile_xla["train"], tile_xla["valid"]), axis=0).reset_index(drop=True)

# This operation combines the training and validation data for further processing, ensuring a unified DataFrame.


In [None]:
# 🔄 Cross-Validation Training Loop (Enhanced)

# Define the score_tile_mean function
def score_tile_mean(predictions, df):
    score = 0
    for i in range(len(df)):
        predbest = np.mean(df.iloc[i]['config_runtime'][predictions[i]])
        best = np.mean(np.sort(df.iloc[i]['config_runtime'])[:5])
        score += 2 - predbest / best
    score /= len(df)
    return score

# Define the score_tile_max function
def score_tile_max(predictions, df):
    score = 0
    for i in range(len(df)):
        predbest = np.min(df.iloc[i]['config_runtime'][predictions[i]])
        best = np.min(df.iloc[i]['config_runtime'])
        score += 2 - predbest / best
    score /= len(df)
    return score

# Create a K-Fold cross-validator with 5 splits
kfold = sklearn.model_selection.KFold(n_splits=10, shuffle=True, random_state=0)

# Lists to store mean and max scores for each fold
score_means = []
score_maxs = []

# Define hyperparameters
learning_rate = 5e-5  # Adjust the learning rate to a different value
weight_decay = 1e-6  # Adjust weight decay to a different value
num_epochs = 100  # You can keep the number of epochs as 90 or adjust as needed


# Iterate through each fold
for fold, (tr_idx, va_idx) in enumerate(kfold.split(df)):
    train_dataset = TileDataset(df.iloc[tr_idx])
    val_dataset = TileDataset(df.iloc[va_idx])
    criterion = torch.nn.MSELoss()
    steps = len(train_dataset) * num_epochs  # Update the number of training steps
    warmup_steps = int(steps * 0.1)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = CosineLRScheduler(optimizer, t_initial=steps, warmup_t=warmup_steps, warmup_lr_init=1e-6, lr_min=2e-8)

    best_score = 0
    best_score_max = 0

    # Training loop with increased epochs
    for epoch in range(num_epochs):
        model.train()
        pbar = tqdm(range(len(train_dataset)), leave=False)
        loss_sum = 0
        n = 0
        
        for i in pbar:
            cfg_ft, nd_ft, nd_op, ind, target = train_dataset[i]
            cfg_ft, nd_ft, nd_op, ind, target = cfg_ft.to(device), nd_ft.to(device), nd_op.to(device), ind.to(device), target.to(device)
            
            out = model(cfg_ft, nd_ft, nd_op, ind)
            loss = criterion(out, target)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1e-2)
            scheduler.step(i + len(train_dataset) * epoch)
            optimizer.step()
            loss_sum += loss.item()
            n += 1
            pbar.set_description(f'running loss: {(loss_sum/n):.2f}, current loss: {(loss.item()):.2f}')
        pbar.close()
        model.eval()
        tile_xla_predictions = []
        pbar = tqdm(range(len(val_dataset)), leave=False)
        
        for i in pbar:
            cfg_ft, nd_ft, nd_op, ind, target = val_dataset[i]
            cfg_ft, nd_ft, nd_op, ind, target = cfg_ft.to(device), nd_ft.to(device), nd_op.to(device), ind.to(device), target.to(device)
            
            out = model(cfg_ft, nd_ft, nd_op, ind)
            tile_xla_predictions.append(np.argsort(out.cpu().detach().numpy())[:5])
        pbar.close()
        
        # Calculate and display scores for the current fold and epoch
        score_mean = score_tile_mean(tile_xla_predictions, val_dataset.df)
        score_max = score_tile_max(tile_xla_predictions, val_dataset.df)
        print(f'fold {fold} epoch {epoch}, comp_score = {score_max:.3f}, mean_score = {score_mean:.3f},')
        
        # Update best scores and save the model if the mean score improves
        if score_mean > best_score:
            best_score = score_mean
            best_score_max = score_max
            torch.save(model.state_dict(), f'best_model_{fold}.pth')
    
    # Append the best scores for this fold to the respective lists
    score_means.append(best_score)
    score_maxs.append(best_score_max)

# Calculate and display the mean scores across all folds
print(f'comp_score = {np.mean(score_maxs)}, mean_score = {np.mean(score_means)},')


# 📊 Evaluate on Validation Dataset

# 🚀 Predict and Submit (only tile:xla predictions)

In [None]:
# 📊 Predict on Test Dataset (tile:xla)
# In this section, we use the trained model to make predictions on the test dataset ('tile:xla').

# Create a TileDataset for the 'tile:xla' test dataset
dataset = TileDataset(tile_xla["test"])

# List to store model predictions for each sample in the test dataset
tile_xla_predictions = [[] for i in range(len(dataset))]

# Iterate through each fold (previously trained models)
for fold in range(5):
    # Load the trained model weights for the current fold
    model.load_state_dict(torch.load(f'/kaggle/working/best_model_{fold}.pth'))
    model.eval()  # 🕵️ Set the model to evaluation mode
    pbar = tqdm(range(len(dataset)))  # Progress bar for test data prediction
    
    for i in pbar:
        cfg_ft, nd_ft, nd_op, ind, target = dataset[i]
        cfg_ft, nd_ft, nd_op, ind, target = cfg_ft.to(device), nd_ft.to(device), nd_op.to(device), ind.to(device), target.to(device)

        out = model(cfg_ft, nd_ft, nd_op, ind)
        tile_xla_predictions[i].append(out.cpu().detach().numpy())

# Aggregate predictions by taking the mean and selecting the top 5
tile_xla_predictions = [np.argsort(np.mean(pred, axis=0))[:5] for pred in tile_xla_predictions]

# The 'tile_xla_predictions' now contains the top 5 predicted results for each sample in the 'tile:xla' test dataset.
tile_xla_predictions


In [None]:
# 📊 Generate and Save Submission File
# In this section, we generate a submission file based on the model predictions and save it.

# Read the sample submission file
sub = pd.read_csv('/kaggle/input/predict-ai-model-runtime/sample_submission.csv')

# Iterate through the test file names and update the submission file with top predictions
for i, filename in enumerate(tile_xla["test"]['file'].values):
    id = 'tile:xla:' + filename[:-4]  # Construct the ID for the submission
    sub.loc[sub.ID == id, 'TopConfigs'] = ';'.join(tile_xla_predictions[i].astype(str))

# Save the updated submission file as 'submission.csv' without the index
sub.to_csv('submission.csv', index=False)

# Display the updated submission file
sub
