In [6]:
import torch.nn as nn
import torch_geometric.nn as gnn
from torch.nn import Module
from llvm_ml.torch.nn import MCNNConfig
from lightning import pytorch as pl
from torch_geometric.utils import to_dense_batch, to_dense_adj
import torch.nn.functional as F
from torch.optim import Adam
from torch.optim import lr_scheduler


class MCEmbedding(Module):
    def __init__(self, num_opcodes, emb_size):
        super().__init__()

        self.embedding = nn.Embedding(num_opcodes, emb_size)
        self.gcn_embedding = nn.Embedding(num_opcodes, emb_size)
        self.pos_encoding = gnn.PositionalEncoding(emb_size)
        self.gcn = gnn.GCNConv(emb_size, emb_size)
        self.norm = gnn.LayerNorm(emb_size)

    def forward(self, input_tensor, edge_index):
        pos_tensor = self.pos_encoding(input_tensor)

        gcn_tensor = self.gcn(self.gcn_embedding(input_tensor), edge_index)

        output = self.embedding(input_tensor) + pos_tensor + gcn_tensor

        return self.norm(output), pos_tensor, gcn_tensor


class MCBERT(pl.LightningModule):
    def __init__(self, config: MCNNConfig):
        super().__init__()

        self.config = config

        self.embedding = MCEmbedding(self.config.num_opcodes, self.config.embedding_size)
        
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.config.embedding_size, nhead=self.config.num_heads_encoder, dropout=self.config.dropout, activation="relu", batch_first=True)
        self.encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=self.config.num_encoders)
        
        self.token_prediction = nn.Linear(self.config.embedding_size, self.config.num_opcodes)


    def forward(self, nodes, edge_index, batch):
        embedded, _, _ = self.embedding(nodes, edge_index)

        encoded, mask = to_dense_batch(embedded, batch)

        # TODO this does not do what I expected
        #dense_edges = to_dense_adj(edge_index, batch)
        #dense_edges = dense_edges.view(encoded.shape[0], encoded.shape[1], encoded.shape[1])

        encoded = self.encoder(encoded)

        token_predictions = self.token_prediction(encoded)
        
        return encoded, F.softmax(token_predictions)

    def _step(self, batch, stage: str):

        bb, raw, mask_id, original_token = batch

        _, masked_token = self.forward(bb.x, bb.edge_index, bb.batch)

        dense_x, _ = to_dense_batch(bb.x, bb.batch)

        log_prefix = "train" if stage == 'train' else "val"
        target_token = dense_x.clone()

        for i in range(self.config.batch_size):
            if mask_id[i] != 0:
                target_token[i, mask_id[i]] = original_token[i]

        loss = F.cross_entropy(masked_token.view(-1, self.config.num_opcodes), target_token.view(-1).long())

        self.log(f"{log_prefix}_loss", loss, on_epoch=True, batch_size=self.config.batch_size)

        return loss, bb, raw

    def training_step(self, batch, batch_idx):
        loss, _, _ = self._step(batch, 'train')
        return loss

    def validation_step(self, batch, batch_idx):
        loss, _, _ = self._step(batch, 'val')
        return loss

    def configure_optimizers(self):
        optimizer = Adam(self.parameters(), lr=self.config.learning_rate, weight_decay=1e-3)
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=0.1)
        return {
            'optimizer': optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'monitor': 'val_loss',
            }
        }

In [2]:
from llvm_ml.torch import BasicBlockDataset
banned_ids = []
dataset = BasicBlockDataset("./data/ryzen3600_v16.cbuf", masked=False, banned_ids=banned_ids, prefilter=True)
print(f"Training with {len(dataset)} samples")

Training with 347988 samples


In [7]:
#from model.GraphBERT import ThroughputEstimator
from torch_geometric.loader import DataLoader
import torch
from lightning.pytorch.loggers import TensorBoardLogger
import lightning.pytorch as pl
from lightning.pytorch.callbacks import ModelSummary, LearningRateMonitor

import warnings
warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')

config = MCNNConfig(dataset.num_opcodes)
config.learning_rate = 1e-4
config.batch_size = 256
config.hidden_size = 128
config.embedding_size = 256
# config.reg_forward_expansion = 16
config.forward_expansion = 2

num_training = int(0.7 * len(dataset))
num_val = len(dataset) - num_training

train_dataset, val_dataset = torch.utils.data.random_split(dataset, [num_training, num_val])
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=6, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=6, drop_last=True)

model = MCBERT(config)

logger = TensorBoardLogger("runs", name="bert")
logger.log_graph(model)
callbacks = [
    ModelSummary(max_depth=-1),
    LearningRateMonitor(),
]
trainer = pl.Trainer(max_epochs=25,
                     logger=logger,
                     precision='16-mixed',
                     callbacks=callbacks,
                     # fast_dev_run=True,
                     #overfit_batches=1,
                     #log_every_n_steps=1,
                     )
trainer.fit(model, train_loader, val_loader)

Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                                | Type                            | Params
-----------------------------------------------------------------------------------------
0  | embedding                           | MCEmbedding                     | 10.8 M
1  | embedding.embedding                 | Embedding                       | 5.4 M 
2  | embedding.gcn_embedding             | Embedding                       | 5.4 M 
3  | embedding.pos_encoding              | PositionalEncoding              | 0     
4  | embedding.gcn                       | GCNConv                        

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  return encoded, F.softmax(token_predictions)


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=25` reached.


In [8]:
torch.save(model, "data/bert_ryzen3600.pt")

In [None]:
# torch.save({'state_dict': model.state_dict()}, "data/ryzen3600.ckpt")