In [1]:
import os
import torch
import numpy as np
import torch.utils.data
from PIL import Image
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [None]:
# PyTorch 的视觉工具包，包含常用模型和数据增强方法。
import torchvision
# Faster R-CNN 的分类头，用于替换原有的分类器。
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
# Mask R-CNN 的掩码预测头，用于替换原有的掩码预测器。
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor 

# 加载一个预训练的 Mask R-CNN 模型，并替换其分类头和掩码预测头以适应新的类别数。
def get_instance_segmentation_model(num_classes): # 输入参数为类别数（包括背景）
    
    # 加载基于 ResNet50+FPN 的 Mask R-CNN，并使用 COCO 数据集的预训练权重。
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # 获取原有分类头的输入特征数
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # 用 FastRCNNPredictor 替换原有的 box predictor
    # 使其输出类别数为 num_classes（适应自己的数据集）。
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # 获取原有 mask 预测头的输入通道数
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels

    # 设置隐藏层的通道数
    hidden_layer = 256

    # 用 MaskRCNNPredictor 替换原有的 mask predictor
    # 使其输出类别数为 num_classes
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)

    return model

In [None]:
# 导入 transforms 模块，通常包含了一些数据增强和预处理的操作
import transforms as T
# 导入 evaluate 函数，用于模型评估
from engine import evaluate

# 返回一组图像预处理/增强操作的组合
def get_transform(train):
    transforms = []
    # 无论训练还是测试，都要把 PIL 图像转换为 PyTorch 的 Tensor 格式
    transforms.append(T.ToTensor())

    # 如果是训练阶段，则执行以下操作
    if train:
        # 以 50% 的概率对图像和标注做水平翻转
        transforms.append(T.RandomHorizontalFlip(0.5))

    return T.Compose(transforms)

In [None]:
# 自定义工具模块，通常包含数据整理、评估等函数
import utils
# 训练一个epoch的函数
from engine import train_one_epoch
# 自定义的数据集类，用于加载VOC格式的数据
from sbd_try import TryDataset
import torch

# 尝试启用 MKL 加速（对部分CPU操作有提升）
torch.backends.mkl.enabled = True

# 加载训练集，应用训练时的数据增强
dataset = TryDataset('voc', get_transform(train=True))
# 加载测试集，应用测试时的数据增强
dataset_test = TryDataset('voc', get_transform(train=False))

# 设置随机种子，保证每次划分一致
torch.manual_seed(1)
# 生成一个打乱的索引列表
indices = torch.randperm(len(dataset)).tolist()
# 划分训练集和测试集，最后20个样本作为测试集
dataset = torch.utils.data.Subset(dataset, indices[:-20])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-20:])

# PyTorch的数据加载器，负责批量读取数据、打乱顺序、并行加速等
# 每个批次只加载1张图像
# 训练集打乱顺序
# 使用4个子进程加载数据
# 采用自定义的collate_fn函数整理数据
# 返回一个元组，通常是 (images, targets)
# 其中 images 是一个 batch 的图片，targets 是一个 batch 的标签。
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=1, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

# 测试集不打乱顺序
data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device = torch.device('cpu')

In [None]:
# 3个前景类别+1个背景
num_classes = 4

# 调用前面定义的函数，创建一个Mask R-CNN实例分割模型
model = get_instance_segmentation_model(num_classes)

model.to(device)



MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(in

In [None]:
# 优化器构建
# 只优化那些 requires_grad=True 的参数
params = [p for p in model.parameters() if p.requires_grad]
# 使用SGD优化器，学习率0.01，动量0.9，权重衰减0.0005。
optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, weight_decay=0.0005)

# 学习率调度器：每5个epoch把学习率减半，帮助模型后期更稳定地收敛。
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5, last_epoch=-1)

# 训练循环：一次10个epoch
num_epochs = 10
for epoch in range(num_epochs):
    # 训练一个epoch，参数包括模型、优化器、数据加载器、设备、当前epoch编号和日志打印频率
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=1)

    # 每个epoch结束后更新一次学习率。
    lr_scheduler.step()

    # 在测试集上评估当前模型的效果。
    evaluate(model, data_loader_test, device)

torch.save(model, 'model_1')

Epoch: [0]  [  0/100]  eta: 0:48:40  lr: 0.000111  loss: 8.6736 (8.6736)  loss_classifier: 1.0456 (1.0456)  loss_box_reg: 0.1225 (0.1225)  loss_mask: 6.4049 (6.4049)  loss_objectness: 0.9557 (0.9557)  loss_rpn_box_reg: 0.1449 (0.1449)  time: 29.2049  data: 8.7941
Epoch: [0]  [  1/100]  eta: 1:06:04  lr: 0.000212  loss: 6.4949 (7.5843)  loss_classifier: 1.0456 (1.0819)  loss_box_reg: 0.1225 (0.3591)  loss_mask: 2.6046 (4.5047)  loss_objectness: 0.9557 (1.4573)  loss_rpn_box_reg: 0.1449 (0.1812)  time: 40.0447  data: 8.2410
Epoch: [0]  [  2/100]  eta: 0:52:10  lr: 0.000313  loss: 6.4949 (6.7041)  loss_classifier: 1.0456 (1.0333)  loss_box_reg: 0.1225 (0.2663)  loss_mask: 3.9023 (4.3039)  loss_objectness: 0.9557 (0.9740)  loss_rpn_box_reg: 0.1449 (0.1266)  time: 31.9396  data: 5.4974
Epoch: [0]  [  3/100]  eta: 0:51:56  lr: 0.000414  loss: 4.9439 (5.9703)  loss_classifier: 0.9934 (1.0234)  loss_box_reg: 0.1225 (0.4045)  loss_mask: 2.6046 (3.4707)  loss_objectness: 0.5042 (0.8565)  loss_rp

### train_one_epoch:
- 模型设置为训练模式，激活LayerNorm、Dropout等训练专属操作。
    - 创建并初始化 `metric_logger` 用于指标记录，添加学习率监控。
- 学习率调度（warmup）
    - 第0轮启用预热策略，缓慢提升学习率，避免训练初期梯度震荡。
- 遍历数据加载器
- 将数据移到指定设备（如GPU）
- 前向计算损失,多卡训练需要统计loss
- 梯度清零、反向传播、权重更新
- 学习率更新
- 指标更新：记录当前loss和学习率数值。

### evaluate：对输入的验证/测试数据集进行推断，计算并输出性能指标。

#### 装饰器和设备设置
- `@torch.no_grad()`  ：禁用梯度计算，减少内存开销，加快推断速度，评估时必备。
- `n_threads = torch.get_num_threads()`  记录当前线程数，以便后续恢复。
- `cpu_device = torch.device("cpu")`  用于将推断结果转移到CPU，便于后续处理。
- `model.eval()`  切换模型到评估模式，禁用Dropout和BatchNorm的训练行为。

#### 评估相关初始化
- `metric_logger = utils.MetricLogger(delimiter="  ")`  用于记录和打印评估过程中的统计信息。
- `coco = get_coco_api_from_dataset(data_loader.dataset)`  获取 COCO 标准评估接口，适配常用目标检测数据集。
- `iou_types = _get_iou_types(model)`  根据模型类型获取对应的 IoU 评估指标（bbox，mask等）。
- `coco_evaluator = CocoEvaluator(coco, iou_types)`  初始化 COCO 评估器，用于计算AP、AR等常用指标。

#### 评估过程（循环数据集）
- 遍历`data_loader`获取图像和标注，日志打印每100个batch。
- 数据转移至`device`（如GPU）。
- 记录推断时间，调用`model(image)`进行前向推断。
- 输出结果转回CPU便于后续计算。
- 生成对应的结果字典`res`（图像ID映射到预测）。
- 用`coco_evaluator.update(res)`更新评估结果。
- 使用`metric_logger.update()`记录模型推断时间和评估时间。

#### 评估统计与打印
- `metric_logger.synchronize_between_processes()`  多进程同步指标。
- 打印平均指标。
- `coco_evaluator.synchronize_between_processes()`  多进程同步COCO评估状态。
- `coco_evaluator.accumulate()`  积累所有预测结果，准备计算指标。
- `coco_evaluator.summarize()`  计算并打印各指标（mAP等）总结。
- 恢复线程数 `torch.set_num_threads(n_threads)`。
- 返回 `coco_evaluator` 对象包含全部评估结果。

In [5]:
num_classes = 4
model = torch.load('model_1')
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# the learning rate scheduler decreases the learning rate by 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.8)

# training
num_epochs = 2
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=1)

    # update the learning rate
    lr_scheduler.step()

    # evaluate on the test dataset
    evaluate(model, data_loader_test, device)

torch.save(model, 'model_2')

Epoch: [0]  [  0/100]  eta: 1:45:44  lr: 0.000055  loss: 0.3415 (0.3415)  loss_classifier: 0.0771 (0.0771)  loss_box_reg: 0.1039 (0.1039)  loss_mask: 0.1021 (0.1021)  loss_objectness: 0.0114 (0.0114)  loss_rpn_box_reg: 0.0469 (0.0469)  time: 63.4410  data: 26.9001
Epoch: [0]  [  1/100]  eta: 1:28:43  lr: 0.000106  loss: 0.3415 (0.7025)  loss_classifier: 0.0771 (0.1165)  loss_box_reg: 0.1039 (0.1752)  loss_mask: 0.1021 (0.1405)  loss_objectness: 0.0114 (0.0207)  loss_rpn_box_reg: 0.0469 (0.2496)  time: 53.7759  data: 13.4654
Epoch: [0]  [  2/100]  eta: 1:12:42  lr: 0.000156  loss: 0.3415 (0.5743)  loss_classifier: 0.0899 (0.1077)  loss_box_reg: 0.1163 (0.1556)  loss_mask: 0.1021 (0.1244)  loss_objectness: 0.0114 (0.0147)  loss_rpn_box_reg: 0.0469 (0.1720)  time: 44.5164  data: 8.9776
Epoch: [0]  [  3/100]  eta: 1:06:45  lr: 0.000207  loss: 0.3415 (0.5639)  loss_classifier: 0.0899 (0.1103)  loss_box_reg: 0.1163 (0.1568)  loss_mask: 0.1021 (0.1295)  loss_objectness: 0.0114 (0.0151)  loss_

In [6]:
num_classes = 4
model = torch.load('model_2')
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.004, momentum=0.9, weight_decay=0.0005)

# the learning rate scheduler decreases the learning rate by 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9)

# training
num_epochs = 4
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=1)

    # update the learning rate
    lr_scheduler.step()

    # evaluate on the test dataset
    evaluate(model, data_loader_test, device)

torch.save(model, 'model_3')

Epoch: [0]  [  0/100]  eta: 0:51:16  lr: 0.000044  loss: 0.0725 (0.0725)  loss_classifier: 0.0150 (0.0150)  loss_box_reg: 0.0084 (0.0084)  loss_mask: 0.0337 (0.0337)  loss_objectness: 0.0020 (0.0020)  loss_rpn_box_reg: 0.0133 (0.0133)  time: 30.7623  data: 8.4953
Epoch: [0]  [  1/100]  eta: 1:03:46  lr: 0.000085  loss: 0.0725 (0.2407)  loss_classifier: 0.0150 (0.0484)  loss_box_reg: 0.0084 (0.0622)  loss_mask: 0.0337 (0.0654)  loss_objectness: 0.0020 (0.0079)  loss_rpn_box_reg: 0.0133 (0.0568)  time: 38.6516  data: 6.3279
Epoch: [0]  [  2/100]  eta: 0:58:30  lr: 0.000125  loss: 0.2312 (0.2376)  loss_classifier: 0.0466 (0.0478)  loss_box_reg: 0.0577 (0.0607)  loss_mask: 0.0651 (0.0653)  loss_objectness: 0.0027 (0.0061)  loss_rpn_box_reg: 0.0591 (0.0576)  time: 35.8211  data: 4.2218
Epoch: [0]  [  3/100]  eta: 0:57:11  lr: 0.000165  loss: 0.2312 (0.2628)  loss_classifier: 0.0466 (0.0648)  loss_box_reg: 0.0577 (0.0704)  loss_mask: 0.0651 (0.0706)  loss_objectness: 0.0027 (0.0066)  loss_rp

In [7]:
num_classes = 4
model = torch.load('model_3')
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)

# the learning rate scheduler decreases the learning rate by 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)

# training
num_epochs = 4
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=1)

    # update the learning rate
    lr_scheduler.step()

    # evaluate on the test dataset
    evaluate(model, data_loader_test, device)

torch.save(model, 'model_4')

Epoch: [0]  [  0/100]  eta: 4:04:32  lr: 0.000011  loss: 0.3999 (0.3999)  loss_classifier: 0.0768 (0.0768)  loss_box_reg: 0.1333 (0.1333)  loss_mask: 0.0958 (0.0958)  loss_objectness: 0.0042 (0.0042)  loss_rpn_box_reg: 0.0897 (0.0897)  time: 146.7265  data: 77.4304
Epoch: [0]  [  1/100]  eta: 2:52:08  lr: 0.000021  loss: 0.3999 (0.6725)  loss_classifier: 0.0768 (0.1229)  loss_box_reg: 0.1333 (0.1956)  loss_mask: 0.0958 (0.1304)  loss_objectness: 0.0042 (0.0328)  loss_rpn_box_reg: 0.0897 (0.1908)  time: 104.3261  data: 38.7447
Epoch: [0]  [  2/100]  eta: 2:19:34  lr: 0.000031  loss: 0.5825 (0.6425)  loss_classifier: 0.1021 (0.1159)  loss_box_reg: 0.1468 (0.1793)  loss_mask: 0.1078 (0.1229)  loss_objectness: 0.0173 (0.0276)  loss_rpn_box_reg: 0.2086 (0.1967)  time: 85.4589  data: 25.8456
Epoch: [0]  [  3/100]  eta: 1:56:09  lr: 0.000041  loss: 0.5534 (0.6202)  loss_classifier: 0.1021 (0.1215)  loss_box_reg: 0.1468 (0.1763)  loss_mask: 0.1078 (0.1285)  loss_objectness: 0.0078 (0.0227)  lo

In [8]:
num_classes = 4
model = torch.load('model_4')
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9, weight_decay=0.0005)

# the learning rate scheduler decreases the learning rate by 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# training
num_epochs = 10
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=1)

    # update the learning rate
    lr_scheduler.step()

    # evaluate on the test dataset
    evaluate(model, data_loader_test, device)

torch.save(model, 'model_5')

Epoch: [0]  [  0/100]  eta: 4:54:18  lr: 0.000006  loss: 0.3357 (0.3357)  loss_classifier: 0.0601 (0.0601)  loss_box_reg: 0.1033 (0.1033)  loss_mask: 0.0615 (0.0615)  loss_objectness: 0.0324 (0.0324)  loss_rpn_box_reg: 0.0784 (0.0784)  time: 176.5892  data: 107.2044
Epoch: [0]  [  1/100]  eta: 3:03:57  lr: 0.000011  loss: 0.2037 (0.2697)  loss_classifier: 0.0441 (0.0521)  loss_box_reg: 0.0481 (0.0757)  loss_mask: 0.0489 (0.0552)  loss_objectness: 0.0032 (0.0178)  loss_rpn_box_reg: 0.0594 (0.0689)  time: 111.4919  data: 53.6161
Epoch: [0]  [  2/100]  eta: 2:39:59  lr: 0.000016  loss: 0.3357 (0.4016)  loss_classifier: 0.0601 (0.0679)  loss_box_reg: 0.1033 (0.1040)  loss_mask: 0.0615 (0.0779)  loss_objectness: 0.0324 (0.0308)  loss_rpn_box_reg: 0.0784 (0.1210)  time: 97.9521  data: 35.7514
Epoch: [0]  [  3/100]  eta: 2:08:39  lr: 0.000021  loss: 0.2037 (0.3178)  loss_classifier: 0.0441 (0.0517)  loss_box_reg: 0.0481 (0.0797)  loss_mask: 0.0489 (0.0699)  loss_objectness: 0.0032 (0.0233)  l

In [9]:
num_classes = 4
model = torch.load('model_5')
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0004, momentum=0.9, weight_decay=0.0005)

# the learning rate scheduler decreases the learning rate by 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# training
num_epochs = 5
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=1)

    # update the learning rate
    lr_scheduler.step()

    # evaluate on the test dataset
    evaluate(model, data_loader_test, device)

torch.save(model, 'model_6')

Epoch: [0]  [  0/100]  eta: 2:49:55  lr: 0.000004  loss: 0.4092 (0.4092)  loss_classifier: 0.0484 (0.0484)  loss_box_reg: 0.0999 (0.0999)  loss_mask: 0.0819 (0.0819)  loss_objectness: 0.0283 (0.0283)  loss_rpn_box_reg: 0.1508 (0.1508)  time: 101.9543  data: 44.5430
Epoch: [0]  [  1/100]  eta: 1:59:21  lr: 0.000008  loss: 0.2310 (0.3201)  loss_classifier: 0.0373 (0.0428)  loss_box_reg: 0.0619 (0.0809)  loss_mask: 0.0590 (0.0704)  loss_objectness: 0.0054 (0.0169)  loss_rpn_box_reg: 0.0674 (0.1091)  time: 72.3365  data: 22.2734
Epoch: [0]  [  2/100]  eta: 1:40:09  lr: 0.000013  loss: 0.2310 (0.2852)  loss_classifier: 0.0461 (0.0439)  loss_box_reg: 0.0619 (0.0703)  loss_mask: 0.0819 (0.0759)  loss_objectness: 0.0135 (0.0157)  loss_rpn_box_reg: 0.0674 (0.0793)  time: 61.3230  data: 14.8497
Epoch: [0]  [  3/100]  eta: 1:24:58  lr: 0.000017  loss: 0.2310 (0.2878)  loss_classifier: 0.0373 (0.0412)  loss_box_reg: 0.0619 (0.0684)  loss_mask: 0.0688 (0.0741)  loss_objectness: 0.0135 (0.0167)  los

In [10]:
num_classes = 4
model = torch.load('model_6')
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)

# the learning rate scheduler decreases the learning rate by 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# training
num_epochs = 5
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=1)

    # update the learning rate
    lr_scheduler.step()

    # evaluate on the test dataset
    evaluate(model, data_loader_test, device)

torch.save(model, 'model_7')

Epoch: [0]  [  0/100]  eta: 2:17:16  lr: 0.000011  loss: 0.3647 (0.3647)  loss_classifier: 0.0804 (0.0804)  loss_box_reg: 0.0969 (0.0969)  loss_mask: 0.0903 (0.0903)  loss_objectness: 0.0118 (0.0118)  loss_rpn_box_reg: 0.0853 (0.0853)  time: 82.3663  data: 39.0655
Epoch: [0]  [  1/100]  eta: 1:27:02  lr: 0.000021  loss: 0.1781 (0.2714)  loss_classifier: 0.0377 (0.0590)  loss_box_reg: 0.0664 (0.0817)  loss_mask: 0.0631 (0.0767)  loss_objectness: 0.0019 (0.0069)  loss_rpn_box_reg: 0.0089 (0.0471)  time: 52.7528  data: 19.5330
Epoch: [0]  [  2/100]  eta: 1:15:13  lr: 0.000031  loss: 0.3128 (0.2852)  loss_classifier: 0.0379 (0.0520)  loss_box_reg: 0.0773 (0.0802)  loss_mask: 0.0848 (0.0794)  loss_objectness: 0.0118 (0.0147)  loss_rpn_box_reg: 0.0825 (0.0589)  time: 46.0521  data: 13.0231
Epoch: [0]  [  3/100]  eta: 1:08:35  lr: 0.000041  loss: 0.2052 (0.2652)  loss_classifier: 0.0377 (0.0461)  loss_box_reg: 0.0697 (0.0776)  loss_mask: 0.0737 (0.0780)  loss_objectness: 0.0102 (0.0136)  loss

# 从这里之后都是训练+可视化的尝试

In [None]:
import torch
from torchvision import transforms
from PIL import Image

# 加载图片
image = Image.open('voc/JPEGImages/100.jpg')  # 替换为你的图片路径

# 定义转换操作
transform = transforms.Compose([
    transforms.ToTensor(),  # 将 PIL 图像转换为张量
])

# 应用转换
tensor_image = transform(image)

# 打印张量的形状和数据类型
#print("Tensor shape:", tensor_image.shape)  # 输出张量的形状
#print("Tensor data type:", tensor_image.dtype)  # 输出张量的数据类型

model = torch.load('model_1')

# 输入图片，获得模型的预测结果（包含分割掩码、类别、置信度等）。
model.eval()
with torch.no_grad():
    predictions = model([tensor_image.to(device)])
prediction = predictions[0]

# 取出每个实例的置信度分数（scores），只保留大于0.7的实例。
scores=prediction['scores'].cpu().numpy()
score_d=0.7
# 对应地筛选掩码（masks）和类别标签（labels）。
mask_np=(scores>score_d)
# 掩码乘255并转为byte类型，方便后续可视化或保存。
masks=prediction['masks'].mul(255).byte().cpu().numpy()[mask_np]
labels=prediction['labels'].cpu().numpy()[mask_np].astype(np.uint8)

In [None]:
colors=[(0, 0, 0), (0 ,255 ,0), (0 ,255, 255), (255 ,255, 0), (255, 140, 0), (255, 0, 0)]
#黑，绿，蓝，黄，橙，红

# 生成每个实例的彩色掩码图
# 遍历所有高置信度的掩码和对应类别标签
mask_images=[]
for mask,label in zip(masks,labels):
    # 把单通道掩码复制成3通道（RGB），方便后续与颜色融合。
    mask_origin=mask.copy().repeat(3,axis=0).transpose(1,2,0) #(1,h,w)->(3,h,w)->(h,w,3)
    # 掩码区域全部赋值为类别编号（label），背景为0。
    mask[mask>0]=label
    #转为RGB图像，*(mask_o/255)用于调节每个像素点亮度
    #(h,w,1)->(h*w*1,1)->(h*w*1,3)->(h,w,3)
    #(h,w,3)*(h,w,3)
    mask_image = np.reshape(np.array(colors, np.uint8)[np.reshape(mask, [-1])], [image.size[1], image.size[0], -1])*(mask_origin/255)
    #mask_image = np.reshape(np.array(colors, np.uint8)[np.reshape(mask, [-1])], [image.size[1], image.size[0], -1])  
    mask_images.append(mask_image)

#定义图像大小
result=np.zeros((image.size[1], image.size[0],3))
for i in mask_images:
    #融合，每张实例图占的比例一样
    result=result+1/len(mask_images)*i

import cv2
cv2.imwrite('masks.png',result)

True