In [1]:
from llvm_ml.data import load_pyg_dataset
banned_ids = ["clang_347660", "x264_29245", "clang_777523"]
# banned_ids = []
dataset = load_pyg_dataset("./data/ryzen3600_v8.cbuf", use_binary_opcode=False, banned_ids=banned_ids)
print(f"Training with {len(dataset)} samples")
# dataset.print_summary()

Training with 166372 samples


In [10]:
import pytorch_lightning as pl
from torch_geometric.loader import DataLoader
from llvm_ml.utils import plot_histogram
import torch.utils.data
from lightning.pytorch.loggers import TensorBoardLogger
import numpy as np
import PIL.Image
from torchvision.transforms import ToTensor
import torch.nn.functional as F
from model.utils import plot_lift_chart
from torch_geometric.nn import GCNConv, DenseGraphConv
from torch_geometric.utils import to_dense_batch, to_dense_adj
import torchmetrics

import warnings
warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')

class GNN(pl.LightningModule):
    def __init__(self, num_opcodes, hidden_size, batch_size, learning_rate=0.002, debug=False):
        super(GNN, self).__init__()
        
        self.lr = learning_rate
        self.batch_size = batch_size
        
        self.embedding = torch.nn.Embedding(num_opcodes, hidden_size)
        self.conv1 = GCNConv(hidden_size, hidden_size // 2)
        self.fc = torch.nn.Linear(hidden_size // 2, hidden_size)
        self.conv2 = DenseGraphConv(hidden_size, hidden_size // 2, aggr="add")
        self.decode = torch.nn.Linear(hidden_size // 2, 1)
        self.pool = torch.nn.AvgPool2d((1, 1))

        self.train_mae = torchmetrics.MeanAbsoluteError()
        self.val_mae = torchmetrics.MeanAbsoluteError()

    def forward(self, data):
        x = data.x
        edge_index = data.edge_index
        batch = data.batch
        
        x = self.embedding(x)
        
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        
        x, mask = to_dense_batch(x, batch)
        dense_idx = to_dense_adj(edge_index, batch)
        
        x = self.fc(x)
        
        x = self.conv2(x, dense_idx, mask)
        x = F.relu(x)
        
        x = self.decode(x)
        x = F.relu(x)
        
        x = self.pool(x)
        
        return x[:, -1, 0].reshape(self.batch_size)
        

    def training_step(self, batch, batch_idx):
        bb, raw = batch
        y_hat = self(bb)
        loss = F.mse_loss(y_hat, bb.y)

        # # L1 regularization
        # l1_lambda = 1e-5  # Regularization coefficient
        # l1_norm = sum(p.abs().sum() for p in self.parameters())
        # loss = loss + l1_lambda * l1_norm

        self.train_mae(y_hat, bb.y)
        self.log("train_loss", loss, on_epoch=True, batch_size=self.batch_size)
        self.log("train_mae", self.train_mae, on_epoch=True, batch_size=self.batch_size)

        return loss

    def validation_step(self, batch, batch_idx):
        bb, raw = batch
        y_hat = self(bb)
        loss = F.mse_loss(y_hat, bb.y)
        
        self.val_mae(y_hat, bb.y)
        self.log("val_loss", loss, on_epoch=True, batch_size=self.batch_size)
        self.log("val_mae", self.val_mae, on_epoch=True, batch_size=self.batch_size)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=1e-5)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.5, verbose=True, min_lr=1e-6, cooldown=5)
        return {
            'optimizer': optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'monitor': 'val_loss',
            }
        }

In [11]:
batch_size = 512
hidden_size = 256

num_training = int(0.7 * len(dataset))
num_val = len(dataset) - num_training

train_dataset, val_dataset = torch.utils.data.random_split(dataset, [num_training, num_val])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=6, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=6, drop_last=True)

model = GNN(21000, hidden_size, batch_size)

logger = TensorBoardLogger("runs", name="gcn")
logger.log_graph(model)
trainer = pl.Trainer(max_epochs=100, logger=logger)
trainer.fit(model, train_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params
------------------------------------------------
0 | embedding | Embedding         | 5.4 M 
1 | conv1     | GCNConv           | 32.9 K
2 | fc        | Linear            | 33.0 K
3 | conv2     | DenseGraphConv    | 65.7 K
4 | decode    | Linear            | 129   
5 | pool      | AvgPool2d         | 0     
6 | train_mae | MeanAbsoluteError | 0     
7 | val_mae   | MeanAbsoluteError | 0     
------------------------------------------------
5.5 M     Trainable params
0         Non-trainable params
5.5 M     Total params
22.031    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
