# Predicting Zen 2 micro-architecture

Zen 2 uarch was introduced 2019. The core features 19-stage pipeline with a 4-way decoder.
All tests were run on AMD Ryzen 5 3600.

In [1]:
import model.utils
import torch
import sys
from torch_geometric.loader import DataLoader

dataset = model.utils.BasicBlockDataset("data/x86_64/basic_blocks", "data/x86_64/ryzen3600")
loader = DataLoader(dataset, batch_size=32, shuffle=True)

input_size = 6 # By the number of features extracted with llvm-mc-embed
hidden_size = 128
output_size = 12 # It is known that Zen 2 features 12 "ports"
nhead = 2
learning_rate = 0.0001

In [2]:
from model.GraphEncoder import Encoder
from model.Decoder import Decoder
from model.Transformer import Transformer
from model.Predictor import Predictor

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

encoder = Encoder(input_size, hidden_size, output_size).to(device)
decoder = Decoder(input_size, hidden_size, output_size).to(device)
transformer = Transformer(input_size, nhead, hidden_size).to(device)
agent = Predictor(encoder, transformer, decoder).to(device)

In [None]:
from model.model import train

checkpoint_dir = "checkpoints/ryzen3600"
checkpoint_freq = 5
num_epochs = 500

train(agent, device, loader, num_epochs, learning_rate, checkpoint_dir, checkpoint_freq)

  torch.tensor(measured_cycles, device=total_predicted_cycles.device))
  measured_cycles_term = F.mse_loss(total_predicted_cycles,
  2%|▏         | 1845/98050 [03:37<3:25:12,  7.81it/s] 

In [None]:
torch.save(agent, "trained_models/ryzen3600.pt")

In [None]:
choice = dataset[150]
bb, m, source = choice

input_sequence = bb.x.to(device)
edge_index = bb.edge_index.to(device)

out, _ = agent(input_sequence, edge_index)
res = out.to("cpu").detach().numpy()
model.utils.print_port_pressure_table(res, source)
print(model.utils.estimate_cycles(out))
print(m)