Large diffs are not rendered by default.

Large diffs are not rendered by default.

@@ -0,0 +1,45 @@
import xml.etree.ElementTree as ET
import os
from config import opt

def parse_rec(filename):
"""
Parse a PASCAL VOC xml file
将数据集从xml解析为txt 用于生成voc2007test.txt等
"""
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
#obj_struct['pose'] = obj.find('pose').text
#obj_struct['truncated'] = int(obj.find('truncated').text)
#obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(float(bbox.find('xmin').text)),
int(float(bbox.find('ymin').text)),
int(float(bbox.find('xmax').text)),
int(float(bbox.find('ymax').text))]
objects.append(obj_struct)

return objects

txt_file = open('voc2012.txt','w')
Annotations = opt.train_Annotations
xml_files = os.listdir(Annotations)


for xml_file in xml_files:
image_path = xml_file.split('.')[0] + '.jpg'
txt_file.write(image_path+' ')
results = parse_rec(Annotations + xml_file)
num_obj = len(results)
txt_file.write(str(num_obj)+' ')
for result in results:
class_name = result['name']
bbox = result['bbox']
class_name = opt.VOC_CLASSES.index(class_name)
txt_file.write(str(bbox[0])+' '+str(bbox[1])+' '+str(bbox[2])+' '+str(bbox[3])+' '+str(class_name)+' ')
txt_file.write('\n')

txt_file.close()
@@ -0,0 +1,3 @@
0 9.242578115770893
1 8.39983797688638
2 7.773137907828054

Large diffs are not rendered by default.

Empty file.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,207 @@
#encoding:utf-8
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import math
import torch.nn.functional as F
'''
只用了vgg16方法
'''

__all__ = [
'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
'vgg19_bn', 'vgg19',
]


model_urls = {
'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
}


class VGG(nn.Module):

def __init__(self, features, num_classes=1000):
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, num_classes),
)
self._initialize_weights()

def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
x = F.sigmoid(x) #归一化到0-1
x = x.view(-1,7,7,30)
return x

def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()


def make_layers(cfg, batch_norm=False):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
return nn.Sequential(*layers)


cfg = {
'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def vgg11(pretrained=False, **kwargs):
"""VGG 11-layer model (configuration "A")
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = VGG(make_layers(cfg['A']), **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['vgg11']))
return model


def vgg11_bn(pretrained=False, **kwargs):
"""VGG 11-layer model (configuration "A") with batch normalization
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['vgg11_bn']))
return model


def vgg13(pretrained=False, **kwargs):
"""VGG 13-layer model (configuration "B")
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = VGG(make_layers(cfg['B']), **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['vgg13']))
return model


def vgg13_bn(pretrained=False, **kwargs):
"""VGG 13-layer model (configuration "B") with batch normalization
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = VGG(make_layers(cfg['B'], batch_norm=True), **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['vgg13_bn']))
return model


def vgg16(pretrained=False, **kwargs):
"""VGG 16-layer model (configuration "D")
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = VGG(make_layers(cfg['D']), **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['vgg16']))
return model


def vgg16_bn(pretrained=False, **kwargs):
"""VGG 16-layer model (configuration "D") with batch normalization
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = VGG(make_layers(cfg['D'], batch_norm=True), **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn']))
return model


def vgg19(pretrained=False, **kwargs):
"""VGG 19-layer model (configuration "E")
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = VGG(make_layers(cfg['E']), **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['vgg19']))
return model


def vgg19_bn(pretrained=False, **kwargs):
"""VGG 19-layer model (configuration 'E') with batch normalization
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = VGG(make_layers(cfg['E'], batch_norm=True), **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['vgg19_bn']))
return model

def test():
import torch
from torch.autograd import Variable
model = vgg16()
model.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 1470),
)
print(model.classifier[6])
#print(model)
img = torch.rand(2,3,224,224)
img = Variable(img)
output = model(img)
print(output.size())

if __name__ == '__main__':
test()
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,69 @@
import visdom
import numpy as np

class Visualizer():
def __init__(self, env='main', **kwargs):
'''
**kwargs, dict option
'''
self.vis = visdom.Visdom(env=env)
self.index = {} # x, dict
self.log_text = ''
self.env = env

def plot_train_val(self, loss_train=None, loss_val=None):
'''
plot val loss and train loss in one figure
'''
x = self.index.get('train_val', 0)

if x == 0:
loss = loss_train if loss_train else loss_val
win_y = np.column_stack((loss, loss))
win_x = np.column_stack((x, x))
self.win = self.vis.line(Y=win_y, X=win_x,
env=self.env)
# opts=dict(
# title='train_test_loss',
# ))
self.index['train_val'] = x + 1
return

if loss_train != None:
self.vis.line(Y=np.array([loss_train]), X=np.array([x]),
win=self.win,
name='1',
update='append',
env=self.env)
self.index['train_val'] = x + 5
else:
self.vis.line(Y=np.array([loss_val]), X=np.array([x]),
win=self.win,
name='2',
update='append',
env=self.env)

def plot_many(self, d):
'''
d: dict {name, value}
'''
for k, v in d.iteritems():
self.plot(k, v)

def plot(self, name, y, **kwargs):
'''
plot('loss', 1.00)
'''
x = self.index.get(name, 0) # if none, return 0
self.vis.line(Y=np.array([y]), X=np.array([x]),
win=name,
opts=dict(title=name),
update=None if x== 0 else 'append',
**kwargs)
self.index[name] = x + 1

def log(self, info, win='log_text'):
'''
show text in box not write into txt?
'''
pass
@@ -0,0 +1,51 @@
import xml.etree.ElementTree as ET
import os

VOC_CLASSES = ( # always index 0
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')

def parse_rec(filename):
""" Parse a PASCAL VOC xml file """
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
#obj_struct['pose'] = obj.find('pose').text
#obj_struct['truncated'] = int(obj.find('truncated').text)
#obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(float(bbox.find('xmin').text)),
int(float(bbox.find('ymin').text)),
int(float(bbox.find('xmax').text)),
int(float(bbox.find('ymax').text))]
objects.append(obj_struct)

return objects

txt_file = open('voc2012.txt','w')
#bobo change dir
Annotations = '/home/zhuhui/data/VOCdevkit/VOC2012/Annotations/'
xml_files = os.listdir(Annotations)

count = 0
for xml_file in xml_files:
count += 1
image_path = xml_file.split('.')[0] + '.jpg'
txt_file.write(image_path+' ')
results = parse_rec(Annotations + xml_file)
num_obj = len(results)
txt_file.write(str(num_obj)+' ')
for result in results:
class_name = result['name']
bbox = result['bbox']
class_name = VOC_CLASSES.index(class_name)
txt_file.write(str(bbox[0])+' '+str(bbox[1])+' '+str(bbox[2])+' '+str(bbox[3])+' '+str(class_name)+' ')
txt_file.write('\n')
#if count == 10:
# break
txt_file.close()
@@ -0,0 +1,117 @@
#encoding:utf-8
#
#created by xiongzihua 2017.12.26
#
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class yoloLoss(nn.Module):
def __init__(self,S,B,l_coord,l_noobj):
super(yoloLoss,self).__init__()
self.S = S
self.B = B
self.l_coord = l_coord
self.l_noobj = l_noobj

def compute_iou(self, box1, box2):
'''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].
Args:
box1: (tensor) bounding boxes, sized [N,4].
box2: (tensor) bounding boxes, sized [M,4].
Return:
(tensor) iou, sized [N,M].
'''
N = box1.size(0)
M = box2.size(0)

lt = torch.max(
box1[:,:2].unsqueeze(1).expand(N,M,2), # [N,2] -> [N,1,2] -> [N,M,2]
box2[:,:2].unsqueeze(0).expand(N,M,2), # [M,2] -> [1,M,2] -> [N,M,2]
)

rb = torch.min(
box1[:,2:].unsqueeze(1).expand(N,M,2), # [N,2] -> [N,1,2] -> [N,M,2]
box2[:,2:].unsqueeze(0).expand(N,M,2), # [M,2] -> [1,M,2] -> [N,M,2]
)

wh = rb - lt # [N,M,2]
# wh(wh<0)= 0 # clip at 0
wh= (wh < 0).float()
inter = wh[:,:,0] * wh[:,:,1] # [N,M]

area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1]) # [N,]
area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1]) # [M,]
area1 = area1.unsqueeze(1).expand_as(inter) # [N,] -> [N,1] -> [N,M]
area2 = area2.unsqueeze(0).expand_as(inter) # [M,] -> [1,M] -> [N,M]

iou = inter / (area1 + area2 - inter)
return iou
def forward(self,pred_tensor,target_tensor):
'''
pred_tensor: (tensor) size(batchsize,S,S,Bx5+20=30) [x,y,w,h,c]
target_tensor: (tensor) size(batchsize,S,S,30)
'''
N = pred_tensor.size()[0]
coo_mask = target_tensor[:,:,:,4] > 0
noo_mask = target_tensor[:,:,:,4] == 0
coo_mask = coo_mask.unsqueeze(-1).expand_as(target_tensor)
noo_mask = noo_mask.unsqueeze(-1).expand_as(target_tensor)

coo_pred = pred_tensor[coo_mask].view(-1,30)
box_pred = coo_pred[:,:10].contiguous().view(-1,5) #box[x1,y1,w1,h1,c1]
class_pred = coo_pred[:,10:] #[x2,y2,w2,h2,c2]

coo_target = target_tensor[coo_mask].view(-1,30)
box_target = coo_target[:,:10].contiguous().view(-1,5)
class_target = coo_target[:,10:]

# compute not contain obj loss
noo_pred = pred_tensor[noo_mask].view(-1,30)
noo_target = target_tensor[noo_mask].view(-1,30)
noo_pred_mask = torch.cuda.ByteTensor(noo_pred.size())
noo_pred_mask.zero_()
noo_pred_mask[:,4]=1;noo_pred_mask[:,9]=1
noo_pred_c = noo_pred[noo_pred_mask] #noo pred只需要计算 c 的损失 size[-1,2]
noo_target_c = noo_target[noo_pred_mask]
nooobj_loss = F.mse_loss(noo_pred_c,noo_target_c,size_average=False)

#compute contain obj loss
coo_response_mask = torch.cuda.ByteTensor(box_target.size())
coo_response_mask.zero_()
coo_not_response_mask = torch.cuda.ByteTensor(box_target.size())
coo_not_response_mask.zero_()
for i in range(0,box_target.size()[0],2): #choose the best iou box
box1 = box_pred[i:i+2]
box1_xyxy = Variable(torch.FloatTensor(box1.size()))
box1_xyxy[:,:2] = box1[:,:2] -0.5*box1[:,2:4]
box1_xyxy[:,2:4] = box1[:,:2] +0.5*box1[:,2:4]
box2 = box_target[i].view(-1,5)
box2_xyxy = Variable(torch.FloatTensor(box2.size()))
box2_xyxy[:,:2] = box2[:,:2] -0.5*box2[:,2:4]
box2_xyxy[:,2:4] = box2[:,:2] +0.5*box2[:,2:4]
iou = self.compute_iou(box1_xyxy[:,:4],box2_xyxy[:,:4]) #[2,1]
max_iou,max_index = iou.max(0)
max_index = max_index.data.cuda()

coo_response_mask[i+max_index]=1
coo_not_response_mask[i+1-max_index]=1
#1.response loss
box_pred_response = box_pred[coo_response_mask].view(-1,5)
box_target_response = box_target[coo_response_mask].view(-1,5)
contain_loss = F.mse_loss(box_pred_response[:,4],box_target_response[:,4],size_average=False)
loc_loss = F.mse_loss(box_pred_response[:,:2],box_target_response[:,:2],size_average=False) + F.mse_loss(torch.sqrt(box_pred_response[:,2:4]),torch.sqrt(box_target_response[:,2:4]),size_average=False)
#2.not response loss
box_pred_not_response = box_pred[coo_not_response_mask].view(-1,5)
box_target_not_response = box_target[coo_not_response_mask].view(-1,5)
box_target_not_response[:,4]= 0
not_contain_loss = F.mse_loss(box_pred_response[:,4],box_target_response[:,4],size_average=False)
#3.class loss
class_loss = F.mse_loss(class_pred,class_target,size_average=False)

return (self.l_coord*loc_loss + contain_loss + not_contain_loss + self.l_noobj*nooobj_loss + class_loss)/N




@@ -20,10 +20,14 @@
----------

# YOLO V1版本:
- [引用地址][1]
- 目前可用,需要重构格式
- 基本实现:pytorchYOLOv1master [引用地址][1]
- 重构代码实现:YOLOv1ByBobo
- ~~目前可用,需要重构格式~~
- 模型在训练集上效果差不多,在测试集效果很不好
- 目前工作:抽时间重构代码,以便更容易理解
- 目前工作:
1、~~抽时间重构代码,以便更容易理解~~
2、仅重构完代码,效果一样。需抽时间优化模型效果


----------