In [63]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from tqdm import tqdm

# 图像预处理：统一大小并进行标准化
transform = transforms.Compose([
    transforms.Resize((32, 32)),  
    transforms.ToTensor(),       
    transforms.Normalize((0.5,), (0.5,))  
])

# 自定义数据集类，用于加载图像和标签
class TinySegDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        """
        :param annotations_file: 标签文件路径
        :param img_dir: 图像所在的目录
        :param transform: 图像预处理
        """
        self.annotations_file = annotations_file
        self.img_dir = img_dir
        self.transform = transform
        
        # 读取标签文件
        with open(annotations_file, 'r') as f:
            self.annotations = f.readlines()
        
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        # 获取标签和图像路径
        line = self.annotations[idx].strip()
        img_name, label = line.split(": ")
        img_path = os.path.join(self.img_dir, img_name)
        
        # 打开图像
        image = Image.open(img_path).convert('RGB')
        
        # 应用预处理
        if self.transform:
            image = self.transform(image)
        
        return image, int(label)

# 实例化数据集
annotations_file = r"C:\Users\Yet Young\Desktop\documents\ML\tiny_seg_exp\TinySeg\labels.txt"  # 替换为你的标签文件路径
img_dir = r"C:\Users\Yet Young\Desktop\documents\ML\tiny_seg_exp\TinySeg\Annotations"  # 图像所在的目录
dataset = TinySegDataset(annotations_file, img_dir, transform)

test_size = 624
train_size = len(dataset) - test_size

# 切分数据集
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)


In [64]:
import torch
import torch.nn as nn
from torch.optim import SGD

# LeNet 模型定义
class LeNet(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet, self).__init__()
        # 第一层卷积：输入通道为 3，输出通道为 6
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)  
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # 输出尺寸: (6, 14, 14)
        
        # 第二层卷积：输入通道为 6，输出通道为 16
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)  # 输出尺寸: (16, 5, 5)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # 输出尺寸: (16, 5, 5)
        
        # 全连接层：根据卷积输出调整输入尺寸
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 输入尺寸为 16 * 5 * 5 = 400
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))

        x = x.view(x.size(0), -1)  # 展平

        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)  # 输出
        return x


In [65]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader
from torchvision import transforms
from torch.optim import SGD
from torchvision import datasets

# 定义 LeNet Lightning 模型
class LeNetLightning(pl.LightningModule):
    def __init__(self, num_classes=5):
        super().__init__()
        self.model = LeNet(num_classes=num_classes)
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss_fn(logits, y)
        accuracy = (logits.argmax(dim=1) == y).float().mean()
        self.log("train_loss", loss)
        self.log("train_accuracy", accuracy)
        return loss

    def configure_optimizers(self):
        return SGD(self.parameters(), lr=0.02, momentum=0.9)


In [66]:
# 初始化模型
model = LeNetLightning()

# TensorBoard Logger
logger = TensorBoardLogger("voc_logs", name="lenet")

# 创建 PyTorch Lightning Trainer
trainer = pl.Trainer(
    devices=1,           # 使用 1 个 GPU
    accelerator='gpu',   # 使用 GPU 加速
    max_epochs = 5,        # 设置训练的 epoch 数量
    enable_progress_bar=True,
    logger=logger 
)

# 开始训练
trainer.fit(model, train_loader)

# 保存训练好的模型
trainer.save_checkpoint("lenet_voc_trained.pt")


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | LeNet            | 61.6 K | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
61.6 K    Trainable params
0         Non-trainable params
61.6 K    Total params
0.246     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode
d:\Anaconda\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


In [67]:
# 加载训练后的模型
inference_model = LeNetLightning.load_from_checkpoint("lenet_voc_trained.pt", map_location="cuda")
inference_model.eval()  # 设置为评估模式

# 推断函数
def get_prediction(x, model: pl.LightningModule):
    model.freeze()  # 准备模型进行预测
    probabilities = torch.softmax(model(x), dim=1)
    predicted_class = torch.argmax(probabilities, dim=1)
    return predicted_class, probabilities

# 进行推断
true_y, pred_y = [], []

for batch in tqdm(test_loader, total=len(test_loader)):
    x, y = batch
    x = x.to("cuda")  # 将数据放到GPU

    # 获取预测结果
    preds, _ = get_prediction(x, inference_model)
    
    true_y.extend(y.cpu().numpy())  # 收集真实标签
    pred_y.extend(preds.cpu().numpy())  # 收集预测标签

# 输出分类报告
from sklearn.metrics import classification_report
print(classification_report(true_y, pred_y, digits=3))


100%|██████████| 10/10 [00:00<00:00, 12.77it/s]

              precision    recall  f1-score   support

           0      0.981     0.883     0.930        60
           1      0.980     1.000     0.990        50
           2      0.968     0.983     0.975       343
           3      0.907     0.975     0.940        80
           4      0.953     0.890     0.920        91

    accuracy                          0.960       624
   macro avg      0.958     0.946     0.951       624
weighted avg      0.960     0.960     0.960       624




