In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torchvision import datasets, transforms
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图片转换为 Tensor
    transforms.Normalize((0.5,), (0.5,)),  # 对图片进行归一化
])

In [9]:
from torch.utils.data import DataLoader
from lightning.fabric import Fabric

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)

test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)

fabric = Fabric(accelerator='cuda')  
all_batches = []
for batch in test_loader:
    batch_on_device = fabric.to_device(batch)
    all_batches.append(batch_on_device)

In [10]:
# 在数据加载之前查看图像形状
sample_image = datasets.MNIST(root='./data', train=True, download=True, transform=transform)[0][0]
print(train_dataset[0][0].shape)  # 查看输出的形状，确保它是 (3, 224, 224)

torch.Size([1, 28, 28])


In [11]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger

In [12]:
class VGGMNIST(pl.LightningModule):
  def __init__(self):
        super(VGGMNIST, self).__init__()
        # 定义卷积块 1
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),  # 输入通道为 1，输出通道为 64
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # 尺寸减半
        )
        # 定义卷积块 2
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),  # 输入 64，输出 128
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # 再次减半
        )
        # 定义全连接层
        self.fc1 = nn.Linear(128 * 7 * 7, 256)  # 128 个通道，7x7 尺寸
        self.fc2 = nn.Linear(256, 10)  # 最终输出 10 类

        # 损失函数
        self.loss = nn.CrossEntropyLoss()

  def forward(self, x):
        # 前向传播
        x = self.conv_block1(x)  # 卷积块 1
        x = self.conv_block2(x)  # 卷积块 2
        x = x.view(x.size(0), -1)  # 展平
        x = F.relu(self.fc1(x))  # 全连接层 1
        x = self.fc2(x)  # 全连接层 2
        return x

  def training_step(self, batch, batch_no):
    # implement single training step
    x, y = batch
    logits = self(x)
    loss = self.loss(logits, y)
    accuracy = (logits.argmax(dim=1) == y).float().mean()
    self.log("train_loss", loss)
    self.log("train_accuracy", accuracy)
    return loss
  
  def configure_optimizers(self):
    # choose your optimizer
    return torch.optim.RMSprop(self.parameters(), lr=0.0008)

In [13]:
model = VGGMNIST()
logger = TensorBoardLogger("mnist_logs", name="vgg")
trainer = pl.Trainer(
    devices=1,           
    accelerator='gpu', 
    max_epochs=1, # set number of epochs
   enable_progress_bar=True,
   precision=16,
   logger=logger 
)
trainer.fit(model, train_loader)

d:\Anaconda\lib\site-packages\lightning_fabric\connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type             | Params | Mode 
---------------------------------------------------------
0 | conv_block1 | Sequential       | 640    | train
1 | conv_block2 | Sequential       | 73.9 K | train
2 | fc1        

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


In [14]:
trainer.save_checkpoint("VGG_mnist.pt")

In [15]:
from tqdm.autonotebook import tqdm

def get_prediction(x, model: pl.LightningModule):
  model.freeze() # prepares model for predicting
  probabilities = torch.softmax(model(x), dim=1)
  predicted_class = torch.argmax(probabilities, dim=1)
  return predicted_class, probabilities

inference_model = VGGMNIST.load_from_checkpoint("VGG_mnist.pt", map_location="cuda")

In [16]:
true_y, pred_y = [], []
for batch in tqdm(iter(all_batches), total=len(all_batches)):
  x, y = batch
  true_y.extend(y.cpu())
  preds, probs = get_prediction(x, inference_model)
  pred_y.extend(preds.cpu())

  0%|          | 0/313 [00:00<?, ?it/s]

In [17]:
from sklearn.metrics import classification_report
print(classification_report(true_y, pred_y, digits=3))

              precision    recall  f1-score   support

           0      0.995     0.991     0.993       980
           1      0.994     0.996     0.995      1135
           2      0.986     0.989     0.987      1032
           3      0.994     0.982     0.988      1010
           4      0.991     0.992     0.991       982
           5      0.991     0.978     0.984       892
           6      0.980     0.993     0.987       958
           7      0.988     0.974     0.981      1028
           8      0.980     0.993     0.986       974
           9      0.975     0.986     0.981      1009

    accuracy                          0.987     10000
   macro avg      0.987     0.987     0.987     10000
weighted avg      0.987     0.987     0.987     10000

