# 41 物体检测和数据集
## 物体检测
- 边缘框
  - 两种定义方法
- 目标检测数据集

## 边缘框实现

In [None]:
%matplotlib inline
import torch 
from d2l import torch

d2l.set_figure()
img = d2l.plt.imread('../img/cardog.jpg')
d2l.plt.imshow(img)

# 两种表示之间的切换
def box_corner_to_center(boxes):
    # 从左上右下转换到中间、宽度、高度
    x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    cx = (x1 + x2) / 2
    cy = (y1 + y2) / 2
    w = x2 - x1
    h = y2 - y1
    boxes = torch.stack((cx, cy, w, h), axi=-1)
    return boxes   

def box_center_to_corner(boxes):
    cx, cy, w, h = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxeso[:, 3]
    x1 = cx - 0.5 * w
    x2 = cx + 0.5 * w
    y1 = cy + 0.5 * h
    y2 = cy - 0.5 * h
    boxes = torch.stack((x1,  x2, y1, y2), axis=-1)
    return boxes
 
# 绘制框
def bbox_to_rect(bbox, color):
    return d2l.matplotlib.Rectangle(xy=(box[0], bbox[1]), 
                                    width=bbox[2] - bboxb[0],
                                    height=bbox[3] - bbox[1], 
                                    fill=False,
                                    edgecolor=color, 
                                    linewidth=2)
fig = d2l.plt.show(img)
fig.axes.add_patch(bbox_to_rect(cat_bbox), 'red')
fig.axes.add_patch(bbox_to_rect(dog_bbox), 'blue')

## 数据集

# 42 锚框
## 锚框
- 基于锚框
  - 边缘框
  - 预测锚框是否含有关注的物体//预测偏移
- IoU 交并比
  - Jacquard 指数: 集合交集并集之比
- 赋予锚框标号
  - 背景/与真实边缘框相关联
  - 大量负样本
  - 不断查找IoU最大值，并去除对应行列
- NMS 非极大值抑制输出
  - NMS合并相似的预测
    - 选中非背景类的最大预测值
    - 删掉$IoU>\theta$ 锚框
    - 不断重复

## 代码

In [60]:
#@save
def multibox_prior(data, sizes, ratios):
    """生成以每个像素为中心具有不同形状的锚框"""
    in_height, in_width = data.shape[-2:]
    device, num_sizes, num_ratios = data.device, len(sizes), len(ratios)
    boxes_per_pixel = (num_sizes + num_ratios - 1)
    size_tensor = torch.tensor(sizes, device=device)
    ratio_tensor = torch.tensor(ratios, device=device)

    offset_h, offset_w = 0.5, 0.5
    steps_h = 1.0 / in_height 
    steps_w = 1.0 / in_width  

    center_h = (torch.arange(in_height, device=device) + offset_h) * steps_h
    center_w = (torch.arange(in_width, device=device) + offset_w) * steps_w
    shift_y, shift_x = torch.meshgrid(center_h, center_w)
    shift_y, shift_x = shift_y.reshape(-1), shift_x.reshape(-1)

    # 生成“boxes_per_pixel”个高和宽，
    # 之后用于创建锚框的四角坐标(xmin,xmax,ymin,ymax)
    w = torch.cat((size_tensor * torch.sqrt(ratio_tensor[0]),
                   sizes[0] * torch.sqrt(ratio_tensor[1:])))\
                   * in_height / in_width  # 处理矩形输入
    h = torch.cat((size_tensor / torch.sqrt(ratio_tensor[0]),
                   sizes[0] / torch.sqrt(ratio_tensor[1:])))
    
    # 除以2来获得半高和半宽                                            ## 玛德为什么要转置    
    anchor_manipulations = torch.stack((-w, -h, w, h)).T.repeat(      ## T代表转置  #repeat对应具体列的操作
                                        in_height * in_width, 1) / 2

    
    out_grid = torch.stack([shift_x, shift_y, shift_x, shift_y],
                dim=1).repeat_interleave(boxes_per_pixel, dim=0)
    output = out_grid + anchor_manipulations
    return output.unsqueeze(0)
# 很多涉及到重复的函数：repeat, repeat_interleave, tile

import torch
w = torch.tensor([[1, 2], [3, 4]])
#ww = torch.stack((w, w))

ww = w.repeat(2, 1)   # 操作维度，输入具体维数
www = w.repeat(2, 1, 1)
wwww = w.repeat_interleave(2, dim=1)  # 输入重复次数
w5 = w.tile(1, 2)     # 以瓷砖理解
#print(ww)
#print(w5)

w6 = torch.cat((w, w), dim=0)
print(w6)

tensor([[1, 2],
        [3, 4],
        [1, 2],
        [3, 4]])


# 45 SSD实现
- 对每一个像素进行实现，输入通道，输出通道也是每个像素对应
- 连接多尺度的预测
```python
def forward(x, block):
    return block(x)
Y1 = forward(torch.tensor((2, 8, 20, 20), cls_predictor(8, 5, 10)) 
# (1, 8, 20, 20)--批次，通道，高，宽
# (8, 5, 10)--(通道， 锚框数目， 类别)

torch.flatten(pred.permute(0, 2, 3, 1), start_dim=1)
torch.cat([flatten(p) for p in preds], dim=1)
             
## down_sample_blk

```

# PS:

In [None]:
## pre-train
import torchvision.models as models
model = models.resnet50(pretrained=True)
fc_features = model.fc.in_features
model.fc = nn.Linear(fc_features, 9) ## 修改输出类别：1000->9
##增加卷积层
    