This file generates simple model weights for use by the hardware testbench. More complex model weights can be generated by creating a stronger NN here.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

# Define the model
class SimpleMNISTModel(nn.Module):
    def __init__(self):
        super(SimpleMNISTModel, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate and train the model
model = SimpleMNISTModel()

# Data loader
transform = transforms.ToTensor()
train_dataset = datasets.MNIST('.', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop (few epochs for quick training)
for epoch in range(3):
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f"Epoch {epoch} Batch {batch_idx} Loss {loss.item():.4f}")

# Save model state_dict
torch.save(model.state_dict(), 'mnist_simple.pth')


100%|██████████| 9.91M/9.91M [00:00<00:00, 10.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 441kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.83MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 18.4MB/s]


Epoch 0 Batch 0 Loss 2.2894
Epoch 0 Batch 100 Loss 0.3710
Epoch 0 Batch 200 Loss 0.4208
Epoch 0 Batch 300 Loss 0.2952
Epoch 0 Batch 400 Loss 0.2047
Epoch 0 Batch 500 Loss 0.1324
Epoch 0 Batch 600 Loss 0.3514
Epoch 0 Batch 700 Loss 0.2612
Epoch 0 Batch 800 Loss 0.2384
Epoch 0 Batch 900 Loss 0.2234
Epoch 1 Batch 0 Loss 0.1190
Epoch 1 Batch 100 Loss 0.1110
Epoch 1 Batch 200 Loss 0.2998
Epoch 1 Batch 300 Loss 0.3945
Epoch 1 Batch 400 Loss 0.1045
Epoch 1 Batch 500 Loss 0.2540
Epoch 1 Batch 600 Loss 0.1675
Epoch 1 Batch 700 Loss 0.1472
Epoch 1 Batch 800 Loss 0.1943
Epoch 1 Batch 900 Loss 0.0729
Epoch 2 Batch 0 Loss 0.1100
Epoch 2 Batch 100 Loss 0.0309
Epoch 2 Batch 200 Loss 0.0585
Epoch 2 Batch 300 Loss 0.1433
Epoch 2 Batch 400 Loss 0.0490
Epoch 2 Batch 500 Loss 0.1095
Epoch 2 Batch 600 Loss 0.0264
Epoch 2 Batch 700 Loss 0.1790
Epoch 2 Batch 800 Loss 0.1250
Epoch 2 Batch 900 Loss 0.0581


In [6]:

import struct

def float_to_bf16(f):
    b = struct.unpack('>I', struct.pack('>f', f))[0]
    sign = (b >> 31) & 0x1
    exp  = (b >> 23) & 0xFF
    mant = b & 0x7FFFFF
    # Compose BF16: top 16 bits of float
    bf16 = (sign << 15) | (exp << 7) | (mant >> 16)
    return bf16

# Load the trained model
model = SimpleMNISTModel()
model.load_state_dict(torch.load('mnist_simple.pth'))
model.eval()

# Extract weights
weights = {}
for name, param in model.named_parameters():
    if 'weight' in name:
        weights[name] = param.detach().cpu().numpy()

# For your matrix-vector multiplication, focus on the first layer weights
# Shape: (128, 28*28)
layer1_weights = weights['fc1.weight']  # shape (128, 784)

# Save each neuron weights as 16 BF16 values (one per input pixel)
for neuron_idx, neuron_weights in enumerate(layer1_weights):
    # Convert to BF16
    bf16_vals = [float_to_bf16(w) for w in neuron_weights]
    # Save to file, e.g., weights for neuron 0
    filename = f'weights/weights_neuron_{neuron_idx}.bin'
    with open(filename, 'wb') as f:
        for val in bf16_vals:
            f.write(struct.pack('>H', val))


In [9]:
with open('weights.bin', 'wb') as f:
    for neuron_idx in range(128):  # number of neurons
        filename = f'weights/weights_neuron_{neuron_idx}.bin'
        with open(filename, 'rb') as fin:
            f.write(fin.read())