In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np


# 資料產生器：根據 x1 > x2 決定 label
def generate_data(n):
    x = np.random.rand(n, 2).astype(np.float32)
    y = (x[:, 0] > x[:, 1]).astype(np.float32).reshape(-1, 1)
    return torch.from_numpy(x), torch.from_numpy(y)


# 網路架構：2 → 8 → 1 with sigmoid
class SimpleNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(2, 8)
        self.layer2 = nn.Linear(8, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.sigmoid(self.layer1(x))
        x = self.layer2(x)
        return x


# 訓練函數
def train(epochs=1000, batch_size=128):
    model = SimpleNet()
    criterion = nn.BCEWithLogitsLoss()  # 注意：這個整合了 sigmoid + BCE
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    x_all, y_all = generate_data(1024)

    for epoch in range(epochs):

        epoch_loss = 0.0
        correct = 0

        for i in range(0, 1024, batch_size):
            x_batch = x_all[i:i + batch_size]
            y_batch = y_all[i:i + batch_size]

            logits = model(x_batch)
            loss = criterion(logits, y_batch)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            probs = torch.sigmoid(logits)
            pred = (probs > 0.5).float()
            correct += (pred == y_batch).sum().item()
            epoch_loss += loss.item() * x_batch.size(0)

        if epoch % 100 == 0 or epoch == epochs - 1:
            print(
                f"Epoch {epoch}, BCE Loss: {epoch_loss / 1024:.6f}, Accuracy: {correct / 1024:.6f}")
            with torch.no_grad():
                print("[Layer1 Weight]\n", model.layer1.weight.data)
                print("[Layer1 Bias]\n", model.layer1.bias.data)
                print("[Layer2 Weight]\n", model.layer2.weight.data)
                print("[Layer2 Bias]\n", model.layer2.bias.data)
                print("----------------------------------------")


# 執行訓練
train()

Epoch 0, BCE Loss: 0.684666, Accuracy: 0.755859
[Layer1 Weight]
 tensor([[-0.1924,  0.0560],
        [ 0.6549, -0.7021],
        [-0.3391,  0.1689],
        [-0.2954, -0.1210],
        [-0.3896, -0.3415],
        [ 0.6658, -0.2486],
        [-0.5041, -0.4486],
        [-0.6682,  0.3957]])
[Layer1 Bias]
 tensor([ 0.1698, -0.3643,  0.0516,  0.6626, -0.3195,  0.2740, -0.0156, -0.2606])
[Layer2 Weight]
 tensor([[ 0.0072,  0.0567,  0.0773, -0.3191,  0.3494,  0.1678,  0.0609, -0.1957]])
[Layer2 Bias]
 tensor([-0.0201])
----------------------------------------
Epoch 100, BCE Loss: 0.668994, Accuracy: 0.916992
[Layer1 Weight]
 tensor([[-0.1949,  0.0585],
        [ 0.6823, -0.7241],
        [-0.3337,  0.1632],
        [-0.3479, -0.0686],
        [-0.3401, -0.3932],
        [ 0.6969, -0.2896],
        [-0.4957, -0.4575],
        [-0.7061,  0.4452]])
[Layer1 Bias]
 tensor([ 0.1698, -0.3581,  0.0515,  0.6607, -0.3200,  0.2657, -0.0159, -0.2509])
[Layer2 Weight]
 tensor([[-0.0379,  0.2682, -0.0084,