In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# --------------------
# Config
# --------------------
n = 37
offset = 10
hidden = 64
train_size = 2000
val_size = 400
batch_size = 128
epochs = 500
lr = 0.05

device = "cuda" if torch.cuda.is_available() else "cpu"

# --------------------
# Data
# --------------------
domain = np.arange(n)
np.random.shuffle(domain)

# Choose a few values that will be **held out** and only appear in validation
val_unique = domain[: n // 3]
train_unique = domain[n // 3 :]

train_inputs = np.random.choice(train_unique, size=train_size, replace=True)
val_inputs = np.random.choice(val_unique, size=val_size, replace=True)

train_targets = (train_inputs + offset) % n
val_targets = (val_inputs + offset) % n

def one_hot(xs, num_classes):
    arr = np.zeros((len(xs), num_classes), dtype=np.float32)
    arr[np.arange(len(xs)), xs] = 1.0
    return arr

X_train = torch.tensor(one_hot(train_inputs, n)).to(device)
y_train = torch.tensor(train_targets, dtype=torch.long).to(device)
X_val = torch.tensor(one_hot(val_inputs, n)).to(device)
y_val = torch.tensor(val_targets, dtype=torch.long).to(device)

# --------------------
# Model
# --------------------
class MLP(nn.Module):
    def __init__(self, n, h):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n, h),
            nn.ReLU(),
            nn.Linear(h, h),
            nn.ReLU(),
            nn.Linear(h, n)
        )
    def forward(self, x):
        return self.net(x)

model = MLP(n, hidden).to(device)
opt = optim.Adam(model.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss()

# --------------------
# Training
# --------------------
for epoch in range(epochs):
    idx = np.random.permutation(len(X_train))
    Xb = X_train[idx]
    yb = y_train[idx]

    model.train()
    for i in range(0, len(X_train), batch_size):
        bx = Xb[i:i+batch_size]
        by = yb[i:i+batch_size]
        opt.zero_grad()
        logits = model(bx)
        loss = loss_fn(logits, by)
        loss.backward()
        opt.step()

    if epoch % 10 == 0 or epoch == epochs - 1:
        model.eval()
        with torch.no_grad():
            preds = model(X_val).argmax(1)
            acc = (preds == y_val).float().mean().item()
        print(f"Epoch {epoch:03d} | val acc = {acc*100:.2f}%")

# --------------------
# Show a few predictions
# --------------------
model.eval()
with torch.no_grad():
    preds = model(X_val).argmax(1).cpu().numpy()

print("\nSamples:")
for i in range(10):
    print(f"x={val_inputs[i]:2d} → target={(val_inputs[i]+offset)%n:2d}, pred={preds[i]:2d}")
print("#######")
for i in range(10):
    print(f"x={train_inputs[i]:2d} → target={(train_inputs[i]+offset)%n:2d}, pred={preds[i]:2d}")


Epoch 000 | val acc = 0.00%
Epoch 010 | val acc = 0.00%
Epoch 020 | val acc = 0.00%
Epoch 030 | val acc = 0.00%
Epoch 040 | val acc = 0.00%
Epoch 050 | val acc = 0.00%
Epoch 060 | val acc = 0.00%
Epoch 070 | val acc = 0.00%
Epoch 080 | val acc = 0.00%
Epoch 090 | val acc = 0.00%
Epoch 100 | val acc = 0.00%
Epoch 110 | val acc = 0.00%
Epoch 120 | val acc = 0.00%
Epoch 130 | val acc = 0.00%
Epoch 140 | val acc = 0.00%
Epoch 150 | val acc = 0.00%
Epoch 160 | val acc = 0.00%
Epoch 170 | val acc = 0.00%
Epoch 180 | val acc = 0.00%
Epoch 190 | val acc = 0.00%
Epoch 200 | val acc = 0.00%
Epoch 210 | val acc = 0.00%
Epoch 220 | val acc = 0.00%
Epoch 230 | val acc = 0.00%
Epoch 240 | val acc = 0.00%
Epoch 250 | val acc = 0.00%
Epoch 260 | val acc = 0.00%
Epoch 270 | val acc = 0.00%
Epoch 280 | val acc = 0.00%
Epoch 290 | val acc = 0.00%
Epoch 300 | val acc = 0.00%
Epoch 310 | val acc = 0.00%
Epoch 320 | val acc = 0.00%
Epoch 330 | val acc = 0.00%
Epoch 340 | val acc = 0.00%
Epoch 350 | val acc 