In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import math
from ndlinear import NdLinear 

In [3]:
# Data loading & normalization
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)


100.0%


In [4]:
# Define a CNN-NdLinear model. 
class NdCNN(nn.Module):
    def __init__(self, input_shape, hidden_size):
        super(NdCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.ndlinear = NdLinear(input_shape, hidden_size)
        final_dim = math.prod(hidden_size)
        self.fc_out = nn.Linear(final_dim, 100)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.ndlinear(x)
        x = x.view(x.shape[0], -1)
        x = self.fc_out(self.relu(x))
        return x

In [5]:
# Set the device. 
if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
                "built with MPS enabled.")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
                "and/or you do not have an MPS-enabled device on this machine.")
    compute_device = torch.device("cpu")
else:
    compute_device = torch.device("mps")

In [6]:
# Instantiate model, loss, and optimizer
nd_cnn = NdCNN((64, 8, 8), (32, 8, 8)).to(compute_device)
# An example of incorrect usage. This will be equivalent to a naive nn.Linear layer. 
# nd_cnn = NdCNN((64,), (32,)).to(compute_device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(nd_cnn.parameters(), lr=0.001)

In [7]:
# Training loop. Display loss and accuracy for each epoch. 
epochs = 20
ndcnn_loss = [] 
ndcnn_acc = [] 
params_ndcnn = sum(p.numel() for p in nd_cnn.parameters() if p.requires_grad)
for epoch in range(epochs):
    nd_cnn.train() 
    running_loss = 0.0 
    correct_ndcnn, total = 0, 0 
    for images, labels in trainloader:
        images, labels = images.to(compute_device), labels.to(compute_device)
        optimizer.zero_grad()
        outputs_hyper = nd_cnn(images)
        loss_hyper = criterion(outputs_hyper, labels)
        loss_hyper.backward()
        optimizer.step()
        running_loss += loss_hyper.item()
        ndcnn_loss.append(running_loss / len(trainloader))

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(compute_device), labels.to(compute_device)
            outputs_hyper = nd_cnn(images)
            _, predicted_hyper = torch.max(outputs_hyper, 1)
            correct_ndcnn += (predicted_hyper == labels).sum().item()
            total += labels.size(0)
    ndcnn_acc.append(100 * correct_ndcnn / total)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {ndcnn_loss[-1]:.4f}, Acc: {ndcnn_acc[-1]:.2f}%")

Epoch 1/20 - Loss: 1.5389, Acc: 57.77%
Epoch 2/20 - Loss: 1.1114, Acc: 62.83%
Epoch 3/20 - Loss: 0.9761, Acc: 66.17%
Epoch 4/20 - Loss: 0.8860, Acc: 69.14%
Epoch 5/20 - Loss: 0.8177, Acc: 70.01%
Epoch 6/20 - Loss: 0.7648, Acc: 71.63%
Epoch 7/20 - Loss: 0.7198, Acc: 72.40%
Epoch 8/20 - Loss: 0.6848, Acc: 72.07%
Epoch 9/20 - Loss: 0.6551, Acc: 72.52%
Epoch 10/20 - Loss: 0.6253, Acc: 73.34%
Epoch 11/20 - Loss: 0.6017, Acc: 73.57%
Epoch 12/20 - Loss: 0.5767, Acc: 73.48%
Epoch 13/20 - Loss: 0.5557, Acc: 73.89%
Epoch 14/20 - Loss: 0.5392, Acc: 73.93%
Epoch 15/20 - Loss: 0.5210, Acc: 72.98%
Epoch 16/20 - Loss: 0.5066, Acc: 73.87%
Epoch 17/20 - Loss: 0.4908, Acc: 74.10%
Epoch 18/20 - Loss: 0.4757, Acc: 73.96%
Epoch 19/20 - Loss: 0.4583, Acc: 73.74%
Epoch 20/20 - Loss: 0.4467, Acc: 74.49%


In [None]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import torchvision
# import torchvision.transforms as transforms
# import math
# import matplotlib.pyplot as plt
# from ndlinear import NdLinear

# # Set the device. 
# if not torch.backends.mps.is_available():
#     if not torch.backends.mps.is_built():
#         print("MPS not available because the current PyTorch install was not "
#                 "built with MPS enabled.")
#     else:
#         print("MPS not available because the current MacOS version is not 12.3+ "
#                 "and/or you do not have an MPS-enabled device on this machine.")
#     compute_device = torch.device("cpu")
# else:
#     compute_device = torch.device("mps")

# # 数据加载 & 预处理
# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize((0.5,), (0.5,))
# ])
# trainset = torchvision.datasets.CIFAR10(root='./data', train=True,  download=True, transform=transform)
# testset  = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
# testloader  = torch.utils.data.DataLoader(testset,  batch_size=64, shuffle=False)

# —— Baseline 模型（用 nn.Linear） —— #
class BaselineCNN(nn.Module):
    def __init__(self, input_shape, hidden_size):
        super().__init__()
        self.conv1   = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2   = nn.Conv2d(32, 64, 3, padding=1)
        self.relu    = nn.ReLU()
        self.pool    = nn.MaxPool2d(2, 2)
        final_dim    = math.prod(input_shape)
        self.fc1     = nn.Linear(final_dim, hidden_size[0]*hidden_size[1]*hidden_size[2])
        self.fc_out  = nn.Linear(hidden_size[0]*hidden_size[1]*hidden_size[2], 10)
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        return self.fc_out(x)

# —— NdLinear 模型 —— #
class NdCNN(nn.Module):
    def __init__(self, input_shape, hidden_size):
        super().__init__()
        self.conv1    = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2    = nn.Conv2d(32, 64, 3, padding=1)
        self.relu     = nn.ReLU()
        self.pool     = nn.MaxPool2d(2, 2)
        self.ndlinear = NdLinear(input_shape, hidden_size)
        final_dim    = math.prod(hidden_size)
        self.fc_out   = nn.Linear(final_dim, 10)
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.ndlinear(x)
        x = x.view(x.size(0), -1)
        return self.fc_out(self.relu(x))

# —— 训练与评估函数 —— #
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            preds = model(imgs).argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return correct / total * 100

# —— 实例化 & 训练对比 —— #
input_shape = (64, 8, 8)
hidden_size = (32, 8, 8)
baseline = BaselineCNN(input_shape, hidden_size).to(device)
ndmodel  = NdCNN(input_shape, hidden_size).to(device)

epochs = 10
criterion = nn.CrossEntropyLoss()
opt_base = optim.Adam(baseline.parameters(), lr=1e-3)
opt_nd   = optim.Adam(ndmodel.parameters(),   lr=1e-3)

history = {
    'base_loss': [], 'base_acc': [],
    'nd_loss':   [], 'nd_acc':   []
}

for epoch in range(1, epochs+1):
    loss_b = train_epoch(baseline, trainloader, criterion, opt_base)
    acc_b  = evaluate(baseline, testloader)
    history['base_loss'].append(loss_b)
    history['base_acc'].append(acc_b)
    
    loss_n = train_epoch(ndmodel, trainloader, criterion, opt_nd)
    acc_n  = evaluate(ndmodel, testloader)
    history['nd_loss'].append(loss_n)
    history['nd_acc'].append(acc_n)
    
    print(f"Epoch {epoch}: "
          f"Baseline→ Loss={loss_b:.4f}, Acc={acc_b:.2f}%  |  "
          f"NdLinear→ Loss={loss_n:.4f}, Acc={acc_n:.2f}%")

# —— 绘图对比 —— #
plt.figure()
plt.plot(range(1, epochs+1), history['base_acc'], label='BaselineCNN')
plt.plot(range(1, epochs+1), history['nd_acc'],   label='NdCNN')
plt.title('Test Accuracy Comparison')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.figure()
plt.plot(range(1, epochs+1), history['base_loss'], label='BaselineCNN')
plt.plot(range(1, epochs+1), history['nd_loss'],   label='NdCNN')
plt.title('Training Loss Comparison')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.show()
