In [3]:
embedding_size = 256
hidden_size = 128
batch_size = 32
output_size = 10 # It is known that Willow Cove features 10 "ports"
num_heads = 6

learning_rate = 0.02

In [4]:
import model.utils
import torch
from torch_geometric.loader import DataLoader
import torch.utils.data

dataset = model.utils.BasicBlockDataset("data/i5_1135g7.pb", dtype=torch.float32)
train_size = int(0.3 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=6)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=6)

In [5]:
from model.GraphEncoder import GATEncoder, GCNEncoder
from model.Predictor import Predictor
import torch_geometric

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")

encoder = GATEncoder(dataset.num_opcodes, embedding_size, hidden_size, num_heads).to(device)
#encoder = GCNEncoder(dataset.num_opcodes, embedding_size, hidden_size).to(device)
model = Predictor(encoder, hidden_size, output_size, batch_size).to(device)

In [6]:
from model.model import train
import torch

checkpoint_dir = "checkpoints/tgl"
checkpoint_freq = 10
num_epochs = 200

if torch.cuda.is_available():
    torch.cuda.empty_cache()

train(model, device, train_loader, val_loader, num_epochs, batch_size, learning_rate, checkpoint_dir, checkpoint_freq)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | GATEncoder | 113 K 
1 | fc1     | Linear     | 8.3 K 
2 | fc2     | Linear     | 650   
---------------------------------------
121 K     Trainable params
0         Non-trainable params
121 K     Total params
0.488     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [5]:
torch.save(model, "trained_models/tgl.pt")

In [6]:
choice = dataset[140]
bb, m, raw = choice

from model.utils import print_port_pressure_table, estimate_cycles

print(bb.x)

input_sequence = bb.x.to(device)
edge_index = bb.edge_index.to(device)

out = model(input_sequence, edge_index)
res = out.to("cpu").detach().numpy()
print_port_pressure_table(res[1:], raw["source"])
print(estimate_cycles(out))
print(m)

tensor([[0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
Port  |    0   |    1   |    2   |    3   |    4   |    5   |    6   |    7   |    8   |    9   |
--------------------------------------------------------------------------------------
       | 0.04   | 0.00   | 0.00   | 0.00   | 0.00   | 0.00   | 0.03   | 0.03   | 0.01   | 0.00   | movq	38792(%rbx,%r13,8), %r12
       | 0.04   | 0.00   | 0.00   | 0.00   | 0.00   | 0.00   | 0.03   | 0.03   | 0.01   | 0.00   | xorl	%r15d, %r15d
       | 0.04   | 0.00   | 0.00   | 0.00   | 0.00   | 0.00   | 0.03   | 0.03   | 0.01   | 0.00   | leaq	7536(%r12), %r14
       | 0.04   | 0.00   | 0.00   | 0.00   | 0.00   | 0.00   | 0.03   | 0.03   | 0.01   | 0.00   | cmpl	%r15d, 4952(%rbx)
tensor(0.1763, device='cuda:0', grad_fn=<AddBackward0>)
13.535
