In [1]:
from torch import nn



def Class_pred(num_inputs, num_anchors, num_classes):
    return nn.Conv2d(num_inputs, num_anchors * (num_classes + 1),
                     kernel_size=3, padding=1)

def BBox_pred(num_inputs, num_anchors):
    return nn.Conv2d(num_inputs, num_anchors * 4, 
                     kernel_size=3, padding=1)

def forward(x, block):
    return block(x)

def flatten_pred(pred):
    return torch.flatten(pred.permute(0, 2, 3, 1), start_dim=1)

def concat_preds(preds):
    return torch.cat([flatten_pred(p) for p in preds], dim=1)


def down_sample_blk(in_channels, out_channels):
    blk = []
    for _ in range(2):
        blk.append(nn.Conv2d(in_channels, out_channels,
                             kernel_size=3, padding=1))
        blk.append(nn.BatchNorm2d(out_channels))
        blk.append(nn.ReLU())
        in_channels = out_channels
    blk.append(nn.MaxPool2d(2))
    return nn.Sequential(*blk)


def base_net():
    blk = []
    num_filters = [3, 16, 32, 64]
    for i in range(len(num_filters) - 1):
        blk.append(down_sample_blk(num_filters[i], num_filters[i+1]))
    return nn.Sequential(*blk)

def get_blk(i):
    if i == 0:
        blk = base_net()
    elif i == 1:
        blk = down_sample_blk(64, 128)
    elif i == 4:
        blk = nn.AdaptiveMaxPool2d((1,1))
    else:
        blk = down_sample_blk(128, 128)
    return blk


def blk_forward(X, blk, size, ratio, cls_predictor, bbox_predictor):
    Y = blk(X)
    anchors = DataModel.multibox_prior(Y, sizes=size, ratios=ratio)
    cls_preds = cls_predictor(Y)
    bbox_preds = bbox_predictor(Y)
    return (Y, anchors, cls_preds, bbox_preds)


class SSD(nn.Module):
    def __init__(self, num_classes, **kwargs):
        super(SSD, self).__init__(**kwargs)
        self.num_classes = num_classes
        idx_to_in_channels = [64, 128, 128, 128, 128]
        for i in range(5):
            # 即赋值语句self.blk_i=get_blk(i)
            setattr(self, f'blk_{i}', get_blk(i))
            setattr(self, f'cls_{i}', Class_pred(idx_to_in_channels[i],
                                                    num_anchors, num_classes))
            setattr(self, f'bbox_{i}', BBox_pred(idx_to_in_channels[i],
                                                      num_anchors))

    def forward(self, X):
        anchors, cls_preds, bbox_preds = [None] * 5, [None] * 5, [None] * 5
        for i in range(5):
            X, anchors[i], cls_preds[i], bbox_preds[i] = blk_forward(
                X, getattr(self, f'blk_{i}'), sizes[i], ratios[i],
                getattr(self, f'cls_{i}'), getattr(self, f'bbox_{i}'))
        anchors = torch.cat(anchors, dim=1)
        cls_preds = concat_preds(cls_preds)
        cls_preds = cls_preds.reshape(
            cls_preds.shape[0], -1, self.num_classes + 1)
        bbox_preds = concat_preds(bbox_preds)
        return anchors, cls_preds, bbox_preds

In [2]:
def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
    batch_size, num_classes = cls_preds.shape[0], cls_preds.shape[2]
    cls = cls_loss(cls_preds.reshape(-1, num_classes),
                   cls_labels.reshape(-1)).reshape(batch_size, -1).mean(dim=1)
    bbox = bbox_loss(bbox_preds * bbox_masks,
                     bbox_labels * bbox_masks).mean(dim=1)
    return cls + bbox

def cls_eval(cls_preds, cls_labels):
    return float((cls_preds.argmax(dim=-1).type(
        cls_labels.dtype) == cls_labels).sum())


def bbox_eval(bbox_preds, bbox_labels, bbox_masks):
    return float((torch.abs((bbox_labels - bbox_preds) * bbox_masks)).sum())

In [3]:
cls_loss = nn.CrossEntropyLoss(reduction='none')
bbox_loss = nn.L1Loss(reduction='none')

In [4]:
import torch
import DataModel
from torch.utils.data import DataLoader
data=torch.load('data.pt')
train_iter = DataLoader(data,batch_size=64,shuffle=True)

In [5]:
import torch
device='cuda' if torch.cuda.is_available() else 'cpu'
sizes = [[0.2, 0.272], [0.37, 0.447], [0.54, 0.619], [0.71, 0.79],
         [0.88, 0.961]]
ratios = [[1, 2, 0.5]] * 5
num_anchors = len(sizes[0]) + len(ratios[0]) - 1
net=SSD(22).to(device)

In [6]:
from torch.optim import Adam,lr_scheduler
num_epochs = 2
epoch_len=len(data)
net = net.to(device)
optimizer=Adam(net.parameters(),lr=0.0006)
scheduler=lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda epoch: 1/(2**epoch))
for epoch in range(num_epochs):
    metric =[[0],[0],[0]]
    net.train()
    now=0
    for features, target in train_iter:
        nums=len(features)
        now+=nums
        optimizer.zero_grad()
        X, Y = features.float().to(device),target.float().to(device)
        
        anchors, cls_preds, bbox_preds = net(X)

        
        bbox_labels, bbox_masks, cls_labels = DataModel.multibox_target(anchors, Y)
        
        
        l = calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels,
                      bbox_masks)
        l.mean().backward()
        optimizer.step()
        metric[1].append( cls_eval(cls_preds, cls_labels)/ cls_labels.numel())
        metric[2].append( bbox_eval(bbox_preds, bbox_labels, bbox_masks)/bbox_labels.numel())
        
        print(f'cls_eval:{metric[1][-1]:.7f}    class_eval:{metric[2][-1]:.7f}    :{now}/{epoch_len*num_epochs}',end='\r')
    torch.save(net.state_dict(),f'./weights{epoch}.bin')
    scheduler.step()

cls_eval:0.9964229    class_eval:0.0032335    :16550/33100

In [21]:
from PIL import Image
import torch.nn.functional  as F
img=Image.open('./data/VOCdevkit/VOC2012/JPEGImages/2007_000027.jpg')
X=DataModel.convert_to_256(img)
X=X.unsqueeze(0)
print(X.shape)
def predict(X):
    net.eval()
    anchors, cls_preds, bbox_preds = net(X.to(device))
    cls_probs = F.softmax(cls_preds, dim=2).permute(0, 2, 1)
    output = DataModel.multibox_detection(cls_probs, bbox_preds, anchors)
    idx = [i for i, row in enumerate(output[0]) if row[0] != -1]
    return output[0, idx]

output = predict(X)

torch.Size([1, 3, 256, 256])


AttributeError: module 'DataModel' has no attribute 'multibox_detection'

In [20]:
def display(img, output, threshold):
    d2l.set_figsize((5, 5))
    fig = d2l.plt.imshow(img)
    for row in output:
        score = float(row[1])
        if score < threshold:
            continue
        h, w = img.shape[0:2]
        bbox = [row[2:6] * torch.tensor((w, h, w, h), device=row.device)]
        d2l.show_bboxes(fig.axes, bbox, '%.2f' % score, 'w')

display(img, output.cpu(), threshold=0.9)

NameError: name 'output' is not defined