In [5]:
from llvm_ml.data import load_pyg_dataset
# banned_ids = ["clang_347660", "x264_29245", "clang_777523", "clang_1563767", "clang_1201822", "clang_2304591", "clang_307223", "clang_2536327", "clang_600001", "clang_930652", "x264_173431", "clang_61216", "clang_2288557", "clang_2287086", "x264_297519", "clang_1128560", "clang_2088021", "clang_467960", "clang_2300269", "clang_24730", "clang_2617009", "clang_770736"]
banned_ids = []
# banned_ids = ["x264_297519", "x264_310679", "x264_224705", "x264_227241", "x264_221993"]
dataset = load_pyg_dataset("./data/ryzen3600_v16.cbuf", use_binary_opcode=False, banned_ids=banned_ids)
print(f"Training with {len(dataset)} samples")
# dataset.print_summary()

Training with 347988 samples


In [6]:
import pytorch_lightning as pl
from torch_geometric.loader import DataLoader
from llvm_ml.utils import plot_histogram
import torch.utils.data
from lightning.pytorch.loggers import TensorBoardLogger
import numpy as np
import PIL.Image
from torchvision.transforms import ToTensor
from model.estimation import GNNEstimation, LSTMEstimation
import torch.nn.functional as F
import torchmetrics
from model.utils import plot_lift_chart

import warnings
warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')

class GNN(pl.LightningModule):
    def __init__(self, conv, input_dim, hidden_dim, output_dim, batch_size, learning_rate=0.005, debug=False):
        super(GNN, self).__init__()
        if conv == 'None':
            self.model = LSTMEstimation(input_dim, hidden_dim, output_dim, batch_size)
        else:
            self.model = GNNEstimation(conv, input_dim, hidden_dim, output_dim, batch_size)

        self.lr = learning_rate
        self.batch_size = batch_size
        self.debug = debug
        self.val_measurements = [[], []]

        # Metrics
        self.train_mae = torchmetrics.MeanAbsoluteError()
        self.val_mae = torchmetrics.MeanAbsoluteError()
        self.train_cosinesim = torchmetrics.CosineSimilarity()
        self.val_cosinesim = torchmetrics.CosineSimilarity()
        self.train_evar = torchmetrics.ExplainedVariance()
        self.val_evar = torchmetrics.ExplainedVariance()
        
        self.loss_debug_data = []

    def forward(self, data):
        return self.model(data)

    def training_step(self, batch, batch_idx):
        bb, raw = batch
        y_hat = self(bb)
        loss = F.mse_loss(y_hat, bb.y)

        # L1 regularization
        # l1_lambda = 1e-5  # Regularization coefficient
        # l1_norm = sum(p.abs().sum() for p in self.parameters())
        # loss = loss + l1_lambda * l1_norm
        
        self.train_mae(y_hat, bb.y)
        self.train_cosinesim(y_hat, bb.y)
        self.train_evar(y_hat, bb.y)
        self.log("train_loss", loss, on_epoch=True, batch_size=self.batch_size)
        self.log("train_mae", self.train_mae, on_epoch=True, batch_size=self.batch_size)
        self.log("train_cosine_similarity", self.train_cosinesim, on_epoch=True, batch_size=self.batch_size)
        self.log("train_explained_variance", self.train_evar, on_epoch=True, batch_size=self.batch_size)
        
        # if self.global_step == 0:
        #     self.logger.experiment.add_graph(self, bb)

        if self.debug and self.current_epoch == 15:
            piece = {'loss': loss, 'data': []}
            for i in range(self.batch_size):
                piece['data'].append({
                    'predicted': y_hat[i],
                    'expected': bb.y[i],
                    'source': raw['source'][i],
                    'id': raw['id'][i]
                })
            
            self.loss_debug_data.append(piece)
            
        return loss

    def validation_step(self, batch, batch_idx):
        bb, raw = batch
        y_hat = self(bb)
        loss = F.mse_loss(y_hat, bb.y)
        self.val_mae(y_hat, bb.y)
        self.val_cosinesim(y_hat, bb.y)
        self.val_evar(y_hat, bb.y)
        self.log("val_loss", loss, on_epoch=True, batch_size=self.batch_size)
        self.log("val_mae", self.val_mae, on_epoch=True, batch_size=self.batch_size)
        self.log("val_cosine_similarity", self.val_cosinesim, on_epoch=True, batch_size=self.batch_size)
        self.log("val_explained_variance", self.val_evar, on_epoch=True, batch_size=self.batch_size)

        for a, b in zip(y_hat, bb.y):
            self.val_measurements[1].append(a.item())
            self.val_measurements[0].append(b.item())

        if batch_idx == 0:
            num_samples_to_log = min(self.batch_size, 5)
            for i in range(num_samples_to_log):
                self.logger.experiment.add_scalar(f"val/sample_{i}/true", bb.y[i].item(), self.current_epoch)
                self.logger.experiment.add_scalar(f"val/sample_{i}/predicted", y_hat[i].item(), self.current_epoch)
                if self.global_step == 0:
                    self.logger.experiment.add_text(f"val/sample_{i}/source", raw['source'][i], self.global_step)

    def on_train_epoch_end(self) -> None:
        if self.debug and self.current_epoch == 15:
            max_sample = max(self.loss_debug_data, key=lambda d: d['loss'])
            print(f"Max loss is {max_sample['loss']}")
            
            for bb in max_sample['data']:
                print(f"Predicted {bb['predicted']}, expected: {bb['expected']}, id: {bb['id']}\nsource:\n{bb['source']}")

    def on_validation_epoch_end(self):
        if not self.debug:
            # x = np.asarray(self.val_measurements[0])
            # y = np.asarray(self.val_measurements[1])
            # if np.any(x) and np.any(y):
            #    plot = plot_histogram(x, y, percentile=0.95)
            #    image = PIL.Image.open(plot)
            #    image = ToTensor()(image).unsqueeze(0)
            #    self.logger.experiment.add_image("val_histogram", image[0], self.current_epoch)
                
            lift_chart = plot_lift_chart(self.val_measurements)
            image = PIL.Image.open(lift_chart)
            image = ToTensor()(image).unsqueeze(0)
            self.logger.experiment.add_image("val_lift_chart", image[0], self.current_epoch)
        self.val_measurements = [[], []]
        
    def on_train_start(self) -> None:
        self.logger.log_graph(self)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=1e-5)
        # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.5, verbose=True, min_lr=1e-6, cooldown=5)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[1, 7, 10, 15, 25, 30], gamma=0.1, verbose=False)
        return {
            'optimizer': optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'monitor': 'val_loss',
            }
        }

In [7]:
batch_size = 512
hidden_dim = 256
output_dim = 1

num_training = int(0.7 * len(dataset))
num_val = len(dataset) - num_training

train_dataset, val_dataset = torch.utils.data.random_split(dataset, [num_training, num_val])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=6, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=6, drop_last=True)

model = GNN("None", 256, hidden_dim, output_dim, batch_size, debug=False)

model.load_state_dict(torch.load("./data/mcmlm.chkpt"), strict=False)

logger = TensorBoardLogger("runs", name="3600_estimate_embedding")
logger.log_graph(model)
trainer = pl.Trainer(max_epochs=40, logger=logger)
trainer.fit(model, train_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type              | Params
------------------------------------------------------
0 | model           | LSTMEstimation    | 6.6 M 
1 | train_mae       | MeanAbsoluteError | 0     
2 | val_mae         | MeanAbsoluteError | 0     
3 | train_cosinesim | CosineSimilarity  | 0     
4 | val_cosinesim   | CosineSimilarity  | 0     
5 | train_evar      | ExplainedVariance | 0     
6 | val_evar        | ExplainedVariance | 0     
------------------------------------------------------
6.6 M     Trainable params
0         Non-trainable params
6.6 M     Total params
26.245    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
