In [1]:
from python.llvm_ml.data import load_pyg_dataset
dataset = load_pyg_dataset("./data/ryzen3600_v1.pb", use_binary_opcode=False)
print(f"Training with {len(dataset)} samples")

Training with 57586 samples


In [2]:
import pytorch_lightning as pl
from torch_geometric.loader import DataLoader
from python.llvm_ml.utils import plot_histogram
import torch.utils.data
from lightning.pytorch.loggers import TensorBoardLogger
import numpy as np
import PIL.Image
from torchvision.transforms import ToTensor
from model.estimation import GNNEstimation, LSTMEstimation
import torch.nn.functional as F
import torchmetrics

import warnings
warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')

class GNN(pl.LightningModule):
    def __init__(self, conv, input_dim, hidden_dim, output_dim, batch_size, learning_rate=0.01):
        super(GNN, self).__init__()
        if conv == 'None':
            self.model = LSTMEstimation(input_dim, hidden_dim, output_dim, batch_size)
        else:
            self.model = GNNEstimation(conv, input_dim, hidden_dim, output_dim, batch_size)

        self.lr = learning_rate
        self.batch_size = batch_size
        self.val_measurements = [[], []]

        # Metrics
        self.train_mape = torchmetrics.MeanAbsolutePercentageError()
        self.train_mae = torchmetrics.MeanAbsoluteError()
        self.val_mape = torchmetrics.MeanAbsolutePercentageError()
        self.val_mae = torchmetrics.MeanAbsoluteError()
        # self.r2 = torchmetrics.R2Score()
        # self.logcosh = torchmetrics.LogCoshError()
        # self.val_cosinesim = torchmetrics.CosineSimilarity()

    def forward(self, data):
        return self.model(data)

    def training_step(self, batch, batch_idx):
        bb, raw = batch
        y_hat = self(bb)
        loss = F.mse_loss(y_hat, bb.y)
        self.train_mape(y_hat, bb.y)
        self.train_mae(y_hat, bb.y)
        # self.r2(y_hat, bb.y)
        # self.cosinesim(y_hat, bb.y)
        # self.logcosh(y_hat, bb.y)
        self.log("train_loss", loss, on_epoch=True, batch_size=self.batch_size)
        self.log("train_mape", self.train_mape, on_epoch=True, batch_size=self.batch_size)
        self.log("train_mae", self.train_mae, on_epoch=True, batch_size=self.batch_size)
        # if loss > 200:
        #     for i in range(self.batch_size):
        #         self.logger.experiment.add_text(f"train_abnormal_{self.global_step}_{i}", raw['source'][i], self.global_step)
        # self.log("val_r2", self.r2, on_epoch=True, batch_size=self.batch_size)
        # self.log("val_cosine_similarity", self.cosinesim, on_epoch=True, batch_size=self.batch_size)
        # self.log("val_log_cosh", self.logcosh, on_epoch=True, batch_size=self.batch_size)
        return loss

    def validation_step(self, batch, batch_idx):
        bb, raw = batch
        y_hat = self(bb)
        loss = F.mse_loss(y_hat, bb.y)
        self.val_mape(y_hat, bb.y)
        self.val_mae(y_hat, bb.y)
        # self.r2(y_hat, bb.y)
        # self.val_cosinesim(y_hat, bb.y)
        # self.logcosh(y_hat, bb.y)
        self.log("val_loss", loss, on_epoch=True, batch_size=self.batch_size)
        self.log("val_mape", self.val_mape, on_epoch=True, batch_size=self.batch_size)
        self.log("val_mae", self.val_mae, on_epoch=True, batch_size=self.batch_size)
        # self.log("val_r2", self.r2, on_epoch=True, batch_size=self.batch_size)
        # self.log("val_cosine_similarity", self.val_cosinesim, on_epoch=True, batch_size=self.batch_size)
        # self.log("val_log_cosh", self.logcosh, on_epoch=True, batch_size=self.batch_size)

        for a, b in zip(y_hat, bb.y):
            self.val_measurements[0].append(a.item())
            self.val_measurements[1].append(b.item())

        if batch_idx == 0:
            num_samples_to_log = 5
            for i in range(num_samples_to_log):
                self.logger.experiment.add_scalar(f"val/sample_{i}/true", bb.y[i].item(), self.current_epoch)
                self.logger.experiment.add_scalar(f"val/sample_{i}/predicted", y_hat[i].item(), self.current_epoch)
                if self.global_step == 0:
                    self.logger.experiment.add_text(f"val/sample_{i}/source", raw['source'][i], self.global_step)

    def on_validation_epoch_end(self):
        # plot = plot_histogram(np.asarray(self.val_measurements[0]), np.asarray(self.val_measurements[1]), percentile=0.95)
        # image = PIL.Image.open(plot)
        # image = ToTensor()(image).unsqueeze(0)
        # self.logger.experiment.add_image("val_histogram", image[0], self.current_epoch)
        self.val_measurements = [[], []]

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=6, factor=0.1)
        return {
            'optimizer': optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'monitor': 'val_loss',
            }
        }

In [None]:
batch_size = 64
hidden_dim = 512
output_dim = 1

num_training = int(0.8 * len(dataset))
num_val = len(dataset) - num_training

train_dataset, val_dataset = torch.utils.data.random_split(dataset, [num_training, num_val])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=6, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=6, drop_last=True)

model = GNN("GraphConv", 256, hidden_dim, output_dim, batch_size)

logger = TensorBoardLogger("runs", name="3600_estimate_embedding")
logger.log_graph(model)
trainer = pl.Trainer(max_epochs=100, logger=logger)
trainer.fit(model, train_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type                        | Params
-----------------------------------------------------------
0 | model      | GNNEstimation               | 7.7 M 
1 | train_mape | MeanAbsolutePercentageError | 0     
2 | train_mae  | MeanAbsoluteError           | 0     
3 | val_mape   | MeanAbsolutePercentageError | 0     
4 | val_mae    | MeanAbsoluteError           | 0     
-----------------------------------------------------------
7.7 M     Trainable params
0         Non-trainable params
7.7 M     Total params
30.969    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]