In [1]:
import os
from PIL import Image
import torch
import torchvision
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
import torch.optim as optim
from utils import datasets
from utils import utils
from utils import train_val,train_val_for_det
from utils import net #网络文件于此
from utils import metrics
import warnings
# 完全禁用警告
warnings.filterwarnings("ignore")

In [2]:
from utils.utils import Config,Logs,BestSelector
config=utils.Config(
    dataset_sep=[
        0.82,0.17,0.01          
        ],
    resize_size=(128,128),#图像尺寸
    batch_size=2,
    lr=0.0007,
    epochs=5,#epoch轮数
    hidden_size=256,
    optim="Adam",
    momentum=0.9,
    weight_decay=1e-4,
    seed=42,
    mean= [0.50638 ,0.49962538 ,0.45205265],
    std=[0.23568255 ,0.24141274 ,0.25167742],
    AMP=True,
    checkpoint_interval=0.25,#只保存4个模型
    source_dir=r"Cifar-10",#原始数据集，每个分类一个文件夹，每个文件夹里包含多个图片
    data_path=r"data\PascalVOC",#项目数据集
    data_crop_rate=0.25,#只使用0.25
    # classes=["Apple","Carambola","Pear","Plum","Tomatoes"],
    device="cuda" if torch.cuda.is_available() else "cpu",

)


#### 加载 Pascal VOC 数据集

In [3]:

import torchvision
train_val_dataset = torchvision.datasets.VOCDetection(
    root=config.data_path, 
    year='2012',
    image_set='trainval',
    # download=True, 
    transform=datasets.get_transform(
        chance="val",
        resize_size=config.resize_size,
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
)
train_dataset,val_dataset=datasets.get_parts_of_datasets(train_val_dataset,rate=0.75,only_train=False)#训练验证3-1开

test_dataset = torchvision.datasets.VOCDetection(
        root=config.data_path,
        year='2012',
        image_set='val',
        # download=True,
        
        transform=datasets.get_transform(
            chance="val",
            resize_size=config.resize_size,
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
))
print(f"原始训练集大小：{len(train_dataset)}")
print(f"原始验证集大小：{len(val_dataset)}")
print(f"原始测试集大小：{len(test_dataset)}")

原始训练集大小：8655
原始验证集大小：2885
原始测试集大小：5823


##### load数据集，并根据数据量进行裁剪。原数据集较大，本身为学习项目只取其中1/4数据进行训练测试

In [4]:
from utils import datasets_for_det
crop_rate=config.data_crop_rate
train_dataset_crop=datasets.get_parts_of_datasets(train_dataset,crop_rate)
val_dataset_crop=datasets.get_parts_of_datasets(val_dataset,crop_rate)
test_dataset_crop=datasets.get_parts_of_datasets(test_dataset,crop_rate)
train_dataset_size=len(train_dataset_crop)
val_dataset_size=len(val_dataset_crop)
test_dataset_size=len(test_dataset_crop)

print(f"本次训练用训练集大小：{len(train_dataset_crop)}")
print(f"本次训练用验证集大小：{len(val_dataset_crop)}")
print(f"本次测试用测试集大小：{len(test_dataset_crop)}")
collate_fn=datasets_for_det.PascalVOC.collate_fn
train_loader=DataLoader(train_dataset_crop,batch_size=config.batch_size,shuffle=True,drop_last=True,collate_fn=collate_fn)
val_loader=DataLoader(val_dataset_crop,batch_size=config.batch_size,shuffle=False,collate_fn=collate_fn)
test_loader=DataLoader(test_dataset_crop,batch_size=config.batch_size,shuffle=False,collate_fn=collate_fn)

print(f"tarin_dataloader加载完毕, {len(train_loader)}个batch, batch大小为{config.batch_size}")
print(f"val_dataloader  加载完毕, {len(val_loader)}个batch, batch大小为{config.batch_size}")
print(f"test_dataloader 加载完毕, {len(test_loader)}个batch, batch大小为{config.batch_size}")


本次训练用训练集大小：2163
本次训练用验证集大小：721
本次测试用测试集大小：1455
tarin_dataloader加载完毕, 1081个batch, batch大小为2
val_dataloader  加载完毕, 361个batch, batch大小为2
test_dataloader 加载完毕, 728个batch, batch大小为2


#### 记录本次训练和测试用的数据量，还有数据样本信息

In [5]:
#查看数据加载情况
for images, targets in val_loader:
    # images = [img.to(device) for img in images]
    images = torch.stack(images, dim=0).to(config.device)#一batch的图像[B,C,H,W]
    targets = [{k: v.to(config.device) for k, v in t.items()} for t in targets] #一batch的['boxes', 'labels']
    print(len(targets))
    print(targets[0]["boxes"].shape)#[num,4]第i个图像有 num个box
    print(targets[0]["labels"].shape)#[num]num个box对应的分类
    #output = {"boxes": reg_preds, "scores": scores, "labels": labels}
    config.update(
        inputs_shape=images.shape,#[B,C,H,W]
        # classes=train_val_dataset.classes,#原始数据集保留classes
    )
    break

    
config.update(
    train_datasize=train_dataset_size,
    val_datasetsize=val_dataset_size,
    test_datasetsize=test_dataset_size,
    # classes=train_val_dataset.classes,#原始数据集保留classes
)


2
torch.Size([1, 4])
torch.Size([1])


#### 加载预训练模型

In [6]:
from torchvision import models
def get_pretrained(config=config):
    '''
        获取预训练模型
        @param config: 配置文件
        @return: 预训练模型
    '''
    
    model=models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
    return model.to(config.device)

test_model=get_pretrained()
measurer=metrics.ModelMeasurer(test_model)
unit=1
parameters_num,inference_time=measurer.simply_check_model(input_shape=config.inputs_shape)
print(f"inference_time:{inference_time} s")
print(f"parameters_num:{parameters_num}")
config.update(
    network=test_model.__class__.__name__,
    inference_time=inference_time,
    parameters_num=parameters_num,
)
config

参数数量：41755286


Warm up ....: 100%|██████████| 100/100 [00:11<00:00,  8.85it/s]
Testing ...: 100%|██████████| 300/300 [00:32<00:00,  9.27it/s]

推理一个batch的时间：0.10713303428649902 s
inference_time:0.10713303428649902 s
parameters_num:41755286





dataset_sep : [0.82, 0.17, 0.01] 
resize_size : (128, 128) 
batch_size : 2 
lr : 0.0007 
epochs : 5 
hidden_size : 256 
optim : Adam 
momentum : 0.9 
weight_decay : 0.0001 
seed : 42 
mean : [0.50638, 0.49962538, 0.45205265] 
std : [0.23568255, 0.24141274, 0.25167742] 
AMP : True 
checkpoint_interval : 0.25 
source_dir : Cifar-10 
data_path : data\PascalVOC 
data_crop_rate : 0.25 
device : cuda 
inputs_shape : torch.Size([2, 3, 128, 128]) 
train_datasize : 2163 
val_datasetsize : 721 
test_datasetsize : 1455 
network : FasterRCNN 
inference_time : 0.10713303428649902 
parameters_num : 41755286 

#### 开始训练

In [None]:

bestMod=utils.BestSelector(acc=0)
train_logs=utils.Logs()
model=get_pretrained()
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
# criterion = nn.BCELoss()
if config.optim == "Adam":
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
else:
    optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, weight_decay=1e-4)
    
bestMod,train_logs=train_val_for_det.train_model(
            model, 
            criterion,
            optimizer, 
            train_loader,
            val_loader,
            bestMod=bestMod,
            train_logs=train_logs,
            config=config, 
            checkpoint_interval=10,
            show_progress_interval=3,
            AMP=config.AMP,#是否使用混合精度训练
            multi_loss_weight=[1,0.3,0.3],
            lr_scheduler_step=0.7,#动态调整学习率
            # num_epochs=config.epochs
        )

训练检测模型的 Epoch:   0%|          | 0/5 [00:00<?, ?it/s]


AssertionError: targets should not be none when in training mode

In [None]:
print(f"{config.epochs} epoch中 最好的模型")
print(bestMod)

200 epoch中 最好的模型
acc : 0.5748 ,loss : 1.0468 ,precision : 0.5797 ,recall : 0.5726 ,ap : 0.0915 ,epoch : 27 


#### 保存模型超参数和训练日志

In [None]:
saveDir=r'save_weights'
saveDir=os.path.join(
        saveDir,
        f'{bestMod.model.__class__.__name__[:10]}-acc={round(bestMod.acc,5)}-loss={round(bestMod.loss,3)}-max_epochs={config.epochs}'
)  
utils.saveProcess(
    saveDir=saveDir,
    bestMod=bestMod,
    train_log=train_logs,
    config=config
)

save_weights\MobileNetV-acc=0.5748-loss=1.047-max_epochs=200
{'acc': 0.5748, 'model': 'save_weights\\MobileNetV-acc=0.5748-loss=1.047-max_epochs=200\\best.pth', 'loss': 1.046791520611993, 'precision': 0.5797411973688208, 'recall': 0.5725587596346459, 'ap': 0.09154744311197122, 'epoch': 27, 'checkpoints': {'checkpoint_0': 'save_weights\\MobileNetV-acc=0.5748-loss=1.047-max_epochs=200\\checkpoint_0.pth'}}


#### 进行测试

In [None]:
import torch
from utils import utils,train_val
import os

# config=Config(os.path.join(dir,'config.json'))
# model=BestSelector(os.path.join(dir,'metrics.json'))
# saveDir=r'save_weights\BinaryClassificationMobileNetV3Large-acc=0.74336-loss=1.671334-max_epochs=40-1100'
Model,config,logs=utils.loadProcess(saveDir=saveDir)
metrics=train_val.validate_model(
    model=Model.model,
    val_loader=test_loader,
    device=config.device,
    only_val=True,
    criterion=criterion
)

metrics

模型测试中:: 100%|██████████| 16/16 [00:01<00:00, 11.48it/s]


{'Accuracy': 0.6255,
 'Precision': 0.6261924414212041,
 'Recall': 0.6238458165625979,
 'F1': 0.6176151310374951,
 'AP': 0.0799395743929934,
 'Loss': 1.109235592186451}

#### 保存数据到tensorboard

In [None]:
from utils import metrics,utils
recoder=metrics.TensorboardRecorder(#存到tensorboard显示
    log_dir="runs/",
    input_shape=[4,3,128,128],
    model=model

)
recoder.logs_scalars(
    logs.logs,
    prefix="train"
)