In [91]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from tqdm import tqdm

# 图像预处理：统一大小并进行标准化
transform = transforms.Compose([
    transforms.Resize((32, 32)),  
    transforms.ToTensor(),      
    transforms.Normalize((0.5,), (0.5,)) 
])

# 自定义数据集类，用于加载图像和标签
class TinySegDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        """
        :param annotations_file: 标签文件路径
        :param img_dir: 图像所在的目录
        :param transform: 图像预处理
        """
        self.annotations_file = annotations_file
        self.img_dir = img_dir
        self.transform = transform
        
        # 读取标签文件
        with open(annotations_file, 'r') as f:
            self.annotations = f.readlines()
        
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        # 获取标签和图像路径
        line = self.annotations[idx].strip()
        img_name, label = line.split(": ")
        img_path = os.path.join(self.img_dir, img_name)
        
        # 打开图像
        image = Image.open(img_path).convert('RGB')
        
        # 应用预处理
        if self.transform:
            image = self.transform(image)
        
        return image, int(label)

# 实例化数据集
annotations_file = r"C:\Users\Yet Young\Desktop\documents\ML\tiny_seg_exp\TinySeg\labels.txt"  # 替换为你的标签文件路径
img_dir = r"C:\Users\Yet Young\Desktop\documents\ML\tiny_seg_exp\TinySeg\Annotations"  # 图像所在的目录
dataset = TinySegDataset(annotations_file, img_dir, transform)


test_size = 624
train_size = len(dataset) - test_size

# 切分数据集
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)


In [92]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from torchvision.models import resnet18
model = resnet18(num_classes=10)
import torch
import torch.nn as nn


class ResNetVOC(pl.LightningModule):
  def __init__(self):
    super().__init__()
    # define model and loss
    self.model = resnet18(num_classes=5)
    self.model.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    self.loss = nn.CrossEntropyLoss()

  def forward(self, x):
    return self.model(x)
  
  def training_step(self, batch, batch_no):
    # implement single training step
    x, y = batch
    logits = self(x)
    loss = self.loss(logits, y)
    accuracy = (logits.argmax(dim=1) == y).float().mean()
    self.log("train_loss", loss)
    self.log("train_accuracy", accuracy)
    return loss
  
  def configure_optimizers(self):
    # choose your optimizer
    return torch.optim.RMSprop(self.parameters(), lr=0.005)



In [95]:
model = ResNetVOC()
logger = TensorBoardLogger("voc_logs", name="resnet")
trainer = pl.Trainer(
    devices=1,           
    accelerator='gpu', 
    max_epochs=5, # set number of epochs
   enable_progress_bar=True,
   logger=logger 
)
trainer.fit(model, train_loader)
# 保存训练好的模型
trainer.save_checkpoint("resnet_voc_trained.pt")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params | Mode 
---------------------------------------------------
0 | model | ResNet           | 11.2 M | train
1 | loss  | CrossEntropyLoss | 0      | train
---------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.716    Total estimated model params size (MB)
69        Modules in train mode
0         Modules in eval mode
d:\Anaconda\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


In [96]:
# 加载训练后的模型
inference_model = ResNetVOC.load_from_checkpoint("resnet_voc_trained.pt", map_location="cuda")
inference_model.eval()  # 设置为评估模式

# 推断函数
def get_prediction(x, model: pl.LightningModule):
    model.freeze()  # 准备模型进行预测
    probabilities = torch.softmax(model(x), dim=1)
    predicted_class = torch.argmax(probabilities, dim=1)
    return predicted_class, probabilities

# 进行推断
true_y, pred_y = [], []

for batch in tqdm(test_loader, total=len(test_loader)):
    x, y = batch
    x = x.to("cuda")  # 将数据放到GPU

    # 获取预测结果
    preds, _ = get_prediction(x, inference_model)
    
    true_y.extend(y.cpu().numpy())  # 收集真实标签
    pred_y.extend(preds.cpu().numpy())  # 收集预测标签

# 输出分类报告
from sklearn.metrics import classification_report
print(classification_report(true_y, pred_y, digits=3))


100%|██████████| 10/10 [00:01<00:00,  9.35it/s]

              precision    recall  f1-score   support

           0      0.959     0.979     0.969        48
           1      1.000     1.000     1.000        41
           2      0.978     0.986     0.982       364
           3      1.000     1.000     1.000        97
           4      0.957     0.905     0.931        74

    accuracy                          0.979       624
   macro avg      0.979     0.974     0.976       624
weighted avg      0.979     0.979     0.979       624




