| @@ -0,0 +1,45 @@ | ||
| import xml.etree.ElementTree as ET | ||
| import os | ||
| from config import opt | ||
|
|
||
| def parse_rec(filename): | ||
| """ | ||
| Parse a PASCAL VOC xml file | ||
| 将数据集从xml解析为txt 用于生成voc2007test.txt等 | ||
| """ | ||
| tree = ET.parse(filename) | ||
| objects = [] | ||
| for obj in tree.findall('object'): | ||
| obj_struct = {} | ||
| obj_struct['name'] = obj.find('name').text | ||
| #obj_struct['pose'] = obj.find('pose').text | ||
| #obj_struct['truncated'] = int(obj.find('truncated').text) | ||
| #obj_struct['difficult'] = int(obj.find('difficult').text) | ||
| bbox = obj.find('bndbox') | ||
| obj_struct['bbox'] = [int(float(bbox.find('xmin').text)), | ||
| int(float(bbox.find('ymin').text)), | ||
| int(float(bbox.find('xmax').text)), | ||
| int(float(bbox.find('ymax').text))] | ||
| objects.append(obj_struct) | ||
|
|
||
| return objects | ||
|
|
||
| txt_file = open('voc2012.txt','w') | ||
| Annotations = opt.train_Annotations | ||
| xml_files = os.listdir(Annotations) | ||
|
|
||
|
|
||
| for xml_file in xml_files: | ||
| image_path = xml_file.split('.')[0] + '.jpg' | ||
| txt_file.write(image_path+' ') | ||
| results = parse_rec(Annotations + xml_file) | ||
| num_obj = len(results) | ||
| txt_file.write(str(num_obj)+' ') | ||
| for result in results: | ||
| class_name = result['name'] | ||
| bbox = result['bbox'] | ||
| class_name = opt.VOC_CLASSES.index(class_name) | ||
| txt_file.write(str(bbox[0])+' '+str(bbox[1])+' '+str(bbox[2])+' '+str(bbox[3])+' '+str(class_name)+' ') | ||
| txt_file.write('\n') | ||
|
|
||
| txt_file.close() |
| @@ -0,0 +1,3 @@ | ||
| 0 9.242578115770893 | ||
| 1 8.39983797688638 | ||
| 2 7.773137907828054 |
| @@ -0,0 +1,207 @@ | ||
| #encoding:utf-8 | ||
| import torch.nn as nn | ||
| import torch.utils.model_zoo as model_zoo | ||
| import math | ||
| import torch.nn.functional as F | ||
| ''' | ||
| 只用了vgg16方法 | ||
| ''' | ||
|
|
||
| __all__ = [ | ||
| 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', | ||
| 'vgg19_bn', 'vgg19', | ||
| ] | ||
|
|
||
|
|
||
| model_urls = { | ||
| 'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth', | ||
| 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth', | ||
| 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth', | ||
| 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth', | ||
| 'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth', | ||
| 'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth', | ||
| 'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', | ||
| 'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth', | ||
| } | ||
|
|
||
|
|
||
| class VGG(nn.Module): | ||
|
|
||
| def __init__(self, features, num_classes=1000): | ||
| super(VGG, self).__init__() | ||
| self.features = features | ||
| self.classifier = nn.Sequential( | ||
| nn.Linear(512 * 7 * 7, 4096), | ||
| nn.ReLU(True), | ||
| nn.Dropout(), | ||
| nn.Linear(4096, 4096), | ||
| nn.ReLU(True), | ||
| nn.Dropout(), | ||
| nn.Linear(4096, num_classes), | ||
| ) | ||
| self._initialize_weights() | ||
|
|
||
| def forward(self, x): | ||
| x = self.features(x) | ||
| x = x.view(x.size(0), -1) | ||
| x = self.classifier(x) | ||
| x = F.sigmoid(x) #归一化到0-1 | ||
| x = x.view(-1,7,7,30) | ||
| return x | ||
|
|
||
| def _initialize_weights(self): | ||
| for m in self.modules(): | ||
| if isinstance(m, nn.Conv2d): | ||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | ||
| m.weight.data.normal_(0, math.sqrt(2. / n)) | ||
| if m.bias is not None: | ||
| m.bias.data.zero_() | ||
| elif isinstance(m, nn.BatchNorm2d): | ||
| m.weight.data.fill_(1) | ||
| m.bias.data.zero_() | ||
| elif isinstance(m, nn.Linear): | ||
| m.weight.data.normal_(0, 0.01) | ||
| m.bias.data.zero_() | ||
|
|
||
|
|
||
| def make_layers(cfg, batch_norm=False): | ||
| layers = [] | ||
| in_channels = 3 | ||
| for v in cfg: | ||
| if v == 'M': | ||
| layers += [nn.MaxPool2d(kernel_size=2, stride=2)] | ||
| else: | ||
| conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) | ||
| if batch_norm: | ||
| layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] | ||
| else: | ||
| layers += [conv2d, nn.ReLU(inplace=True)] | ||
| in_channels = v | ||
| return nn.Sequential(*layers) | ||
|
|
||
|
|
||
| cfg = { | ||
| 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], | ||
| 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], | ||
| 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], | ||
| 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], | ||
| } | ||
|
|
||
|
|
||
| def vgg11(pretrained=False, **kwargs): | ||
| """VGG 11-layer model (configuration "A") | ||
| Args: | ||
| pretrained (bool): If True, returns a model pre-trained on ImageNet | ||
| """ | ||
| model = VGG(make_layers(cfg['A']), **kwargs) | ||
| if pretrained: | ||
| model.load_state_dict(model_zoo.load_url(model_urls['vgg11'])) | ||
| return model | ||
|
|
||
|
|
||
| def vgg11_bn(pretrained=False, **kwargs): | ||
| """VGG 11-layer model (configuration "A") with batch normalization | ||
| Args: | ||
| pretrained (bool): If True, returns a model pre-trained on ImageNet | ||
| """ | ||
| model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs) | ||
| if pretrained: | ||
| model.load_state_dict(model_zoo.load_url(model_urls['vgg11_bn'])) | ||
| return model | ||
|
|
||
|
|
||
| def vgg13(pretrained=False, **kwargs): | ||
| """VGG 13-layer model (configuration "B") | ||
| Args: | ||
| pretrained (bool): If True, returns a model pre-trained on ImageNet | ||
| """ | ||
| model = VGG(make_layers(cfg['B']), **kwargs) | ||
| if pretrained: | ||
| model.load_state_dict(model_zoo.load_url(model_urls['vgg13'])) | ||
| return model | ||
|
|
||
|
|
||
| def vgg13_bn(pretrained=False, **kwargs): | ||
| """VGG 13-layer model (configuration "B") with batch normalization | ||
| Args: | ||
| pretrained (bool): If True, returns a model pre-trained on ImageNet | ||
| """ | ||
| model = VGG(make_layers(cfg['B'], batch_norm=True), **kwargs) | ||
| if pretrained: | ||
| model.load_state_dict(model_zoo.load_url(model_urls['vgg13_bn'])) | ||
| return model | ||
|
|
||
|
|
||
| def vgg16(pretrained=False, **kwargs): | ||
| """VGG 16-layer model (configuration "D") | ||
| Args: | ||
| pretrained (bool): If True, returns a model pre-trained on ImageNet | ||
| """ | ||
| model = VGG(make_layers(cfg['D']), **kwargs) | ||
| if pretrained: | ||
| model.load_state_dict(model_zoo.load_url(model_urls['vgg16'])) | ||
| return model | ||
|
|
||
|
|
||
| def vgg16_bn(pretrained=False, **kwargs): | ||
| """VGG 16-layer model (configuration "D") with batch normalization | ||
| Args: | ||
| pretrained (bool): If True, returns a model pre-trained on ImageNet | ||
| """ | ||
| model = VGG(make_layers(cfg['D'], batch_norm=True), **kwargs) | ||
| if pretrained: | ||
| model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn'])) | ||
| return model | ||
|
|
||
|
|
||
| def vgg19(pretrained=False, **kwargs): | ||
| """VGG 19-layer model (configuration "E") | ||
| Args: | ||
| pretrained (bool): If True, returns a model pre-trained on ImageNet | ||
| """ | ||
| model = VGG(make_layers(cfg['E']), **kwargs) | ||
| if pretrained: | ||
| model.load_state_dict(model_zoo.load_url(model_urls['vgg19'])) | ||
| return model | ||
|
|
||
|
|
||
| def vgg19_bn(pretrained=False, **kwargs): | ||
| """VGG 19-layer model (configuration 'E') with batch normalization | ||
| Args: | ||
| pretrained (bool): If True, returns a model pre-trained on ImageNet | ||
| """ | ||
| model = VGG(make_layers(cfg['E'], batch_norm=True), **kwargs) | ||
| if pretrained: | ||
| model.load_state_dict(model_zoo.load_url(model_urls['vgg19_bn'])) | ||
| return model | ||
|
|
||
| def test(): | ||
| import torch | ||
| from torch.autograd import Variable | ||
| model = vgg16() | ||
| model.classifier = nn.Sequential( | ||
| nn.Linear(512 * 7 * 7, 4096), | ||
| nn.ReLU(True), | ||
| nn.Dropout(), | ||
| nn.Linear(4096, 4096), | ||
| nn.ReLU(True), | ||
| nn.Dropout(), | ||
| nn.Linear(4096, 1470), | ||
| ) | ||
| print(model.classifier[6]) | ||
| #print(model) | ||
| img = torch.rand(2,3,224,224) | ||
| img = Variable(img) | ||
| output = model(img) | ||
| print(output.size()) | ||
|
|
||
| if __name__ == '__main__': | ||
| test() |
| @@ -0,0 +1,69 @@ | ||
| import visdom | ||
| import numpy as np | ||
|
|
||
| class Visualizer(): | ||
| def __init__(self, env='main', **kwargs): | ||
| ''' | ||
| **kwargs, dict option | ||
| ''' | ||
| self.vis = visdom.Visdom(env=env) | ||
| self.index = {} # x, dict | ||
| self.log_text = '' | ||
| self.env = env | ||
|
|
||
| def plot_train_val(self, loss_train=None, loss_val=None): | ||
| ''' | ||
| plot val loss and train loss in one figure | ||
| ''' | ||
| x = self.index.get('train_val', 0) | ||
|
|
||
| if x == 0: | ||
| loss = loss_train if loss_train else loss_val | ||
| win_y = np.column_stack((loss, loss)) | ||
| win_x = np.column_stack((x, x)) | ||
| self.win = self.vis.line(Y=win_y, X=win_x, | ||
| env=self.env) | ||
| # opts=dict( | ||
| # title='train_test_loss', | ||
| # )) | ||
| self.index['train_val'] = x + 1 | ||
| return | ||
|
|
||
| if loss_train != None: | ||
| self.vis.line(Y=np.array([loss_train]), X=np.array([x]), | ||
| win=self.win, | ||
| name='1', | ||
| update='append', | ||
| env=self.env) | ||
| self.index['train_val'] = x + 5 | ||
| else: | ||
| self.vis.line(Y=np.array([loss_val]), X=np.array([x]), | ||
| win=self.win, | ||
| name='2', | ||
| update='append', | ||
| env=self.env) | ||
|
|
||
| def plot_many(self, d): | ||
| ''' | ||
| d: dict {name, value} | ||
| ''' | ||
| for k, v in d.iteritems(): | ||
| self.plot(k, v) | ||
|
|
||
| def plot(self, name, y, **kwargs): | ||
| ''' | ||
| plot('loss', 1.00) | ||
| ''' | ||
| x = self.index.get(name, 0) # if none, return 0 | ||
| self.vis.line(Y=np.array([y]), X=np.array([x]), | ||
| win=name, | ||
| opts=dict(title=name), | ||
| update=None if x== 0 else 'append', | ||
| **kwargs) | ||
| self.index[name] = x + 1 | ||
|
|
||
| def log(self, info, win='log_text'): | ||
| ''' | ||
| show text in box not write into txt? | ||
| ''' | ||
| pass |
| @@ -0,0 +1,51 @@ | ||
| import xml.etree.ElementTree as ET | ||
| import os | ||
|
|
||
| VOC_CLASSES = ( # always index 0 | ||
| 'aeroplane', 'bicycle', 'bird', 'boat', | ||
| 'bottle', 'bus', 'car', 'cat', 'chair', | ||
| 'cow', 'diningtable', 'dog', 'horse', | ||
| 'motorbike', 'person', 'pottedplant', | ||
| 'sheep', 'sofa', 'train', 'tvmonitor') | ||
|
|
||
| def parse_rec(filename): | ||
| """ Parse a PASCAL VOC xml file """ | ||
| tree = ET.parse(filename) | ||
| objects = [] | ||
| for obj in tree.findall('object'): | ||
| obj_struct = {} | ||
| obj_struct['name'] = obj.find('name').text | ||
| #obj_struct['pose'] = obj.find('pose').text | ||
| #obj_struct['truncated'] = int(obj.find('truncated').text) | ||
| #obj_struct['difficult'] = int(obj.find('difficult').text) | ||
| bbox = obj.find('bndbox') | ||
| obj_struct['bbox'] = [int(float(bbox.find('xmin').text)), | ||
| int(float(bbox.find('ymin').text)), | ||
| int(float(bbox.find('xmax').text)), | ||
| int(float(bbox.find('ymax').text))] | ||
| objects.append(obj_struct) | ||
|
|
||
| return objects | ||
|
|
||
| txt_file = open('voc2012.txt','w') | ||
| #bobo change dir | ||
| Annotations = '/home/zhuhui/data/VOCdevkit/VOC2012/Annotations/' | ||
| xml_files = os.listdir(Annotations) | ||
|
|
||
| count = 0 | ||
| for xml_file in xml_files: | ||
| count += 1 | ||
| image_path = xml_file.split('.')[0] + '.jpg' | ||
| txt_file.write(image_path+' ') | ||
| results = parse_rec(Annotations + xml_file) | ||
| num_obj = len(results) | ||
| txt_file.write(str(num_obj)+' ') | ||
| for result in results: | ||
| class_name = result['name'] | ||
| bbox = result['bbox'] | ||
| class_name = VOC_CLASSES.index(class_name) | ||
| txt_file.write(str(bbox[0])+' '+str(bbox[1])+' '+str(bbox[2])+' '+str(bbox[3])+' '+str(class_name)+' ') | ||
| txt_file.write('\n') | ||
| #if count == 10: | ||
| # break | ||
| txt_file.close() |
| @@ -0,0 +1,117 @@ | ||
| #encoding:utf-8 | ||
| # | ||
| #created by xiongzihua 2017.12.26 | ||
| # | ||
| import torch | ||
| import torch.nn as nn | ||
| import torch.nn.functional as F | ||
| from torch.autograd import Variable | ||
|
|
||
| class yoloLoss(nn.Module): | ||
| def __init__(self,S,B,l_coord,l_noobj): | ||
| super(yoloLoss,self).__init__() | ||
| self.S = S | ||
| self.B = B | ||
| self.l_coord = l_coord | ||
| self.l_noobj = l_noobj | ||
|
|
||
| def compute_iou(self, box1, box2): | ||
| '''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2]. | ||
| Args: | ||
| box1: (tensor) bounding boxes, sized [N,4]. | ||
| box2: (tensor) bounding boxes, sized [M,4]. | ||
| Return: | ||
| (tensor) iou, sized [N,M]. | ||
| ''' | ||
| N = box1.size(0) | ||
| M = box2.size(0) | ||
|
|
||
| lt = torch.max( | ||
| box1[:,:2].unsqueeze(1).expand(N,M,2), # [N,2] -> [N,1,2] -> [N,M,2] | ||
| box2[:,:2].unsqueeze(0).expand(N,M,2), # [M,2] -> [1,M,2] -> [N,M,2] | ||
| ) | ||
|
|
||
| rb = torch.min( | ||
| box1[:,2:].unsqueeze(1).expand(N,M,2), # [N,2] -> [N,1,2] -> [N,M,2] | ||
| box2[:,2:].unsqueeze(0).expand(N,M,2), # [M,2] -> [1,M,2] -> [N,M,2] | ||
| ) | ||
|
|
||
| wh = rb - lt # [N,M,2] | ||
| # wh(wh<0)= 0 # clip at 0 | ||
| wh= (wh < 0).float() | ||
| inter = wh[:,:,0] * wh[:,:,1] # [N,M] | ||
|
|
||
| area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1]) # [N,] | ||
| area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1]) # [M,] | ||
| area1 = area1.unsqueeze(1).expand_as(inter) # [N,] -> [N,1] -> [N,M] | ||
| area2 = area2.unsqueeze(0).expand_as(inter) # [M,] -> [1,M] -> [N,M] | ||
|
|
||
| iou = inter / (area1 + area2 - inter) | ||
| return iou | ||
| def forward(self,pred_tensor,target_tensor): | ||
| ''' | ||
| pred_tensor: (tensor) size(batchsize,S,S,Bx5+20=30) [x,y,w,h,c] | ||
| target_tensor: (tensor) size(batchsize,S,S,30) | ||
| ''' | ||
| N = pred_tensor.size()[0] | ||
| coo_mask = target_tensor[:,:,:,4] > 0 | ||
| noo_mask = target_tensor[:,:,:,4] == 0 | ||
| coo_mask = coo_mask.unsqueeze(-1).expand_as(target_tensor) | ||
| noo_mask = noo_mask.unsqueeze(-1).expand_as(target_tensor) | ||
|
|
||
| coo_pred = pred_tensor[coo_mask].view(-1,30) | ||
| box_pred = coo_pred[:,:10].contiguous().view(-1,5) #box[x1,y1,w1,h1,c1] | ||
| class_pred = coo_pred[:,10:] #[x2,y2,w2,h2,c2] | ||
|
|
||
| coo_target = target_tensor[coo_mask].view(-1,30) | ||
| box_target = coo_target[:,:10].contiguous().view(-1,5) | ||
| class_target = coo_target[:,10:] | ||
|
|
||
| # compute not contain obj loss | ||
| noo_pred = pred_tensor[noo_mask].view(-1,30) | ||
| noo_target = target_tensor[noo_mask].view(-1,30) | ||
| noo_pred_mask = torch.cuda.ByteTensor(noo_pred.size()) | ||
| noo_pred_mask.zero_() | ||
| noo_pred_mask[:,4]=1;noo_pred_mask[:,9]=1 | ||
| noo_pred_c = noo_pred[noo_pred_mask] #noo pred只需要计算 c 的损失 size[-1,2] | ||
| noo_target_c = noo_target[noo_pred_mask] | ||
| nooobj_loss = F.mse_loss(noo_pred_c,noo_target_c,size_average=False) | ||
|
|
||
| #compute contain obj loss | ||
| coo_response_mask = torch.cuda.ByteTensor(box_target.size()) | ||
| coo_response_mask.zero_() | ||
| coo_not_response_mask = torch.cuda.ByteTensor(box_target.size()) | ||
| coo_not_response_mask.zero_() | ||
| for i in range(0,box_target.size()[0],2): #choose the best iou box | ||
| box1 = box_pred[i:i+2] | ||
| box1_xyxy = Variable(torch.FloatTensor(box1.size())) | ||
| box1_xyxy[:,:2] = box1[:,:2] -0.5*box1[:,2:4] | ||
| box1_xyxy[:,2:4] = box1[:,:2] +0.5*box1[:,2:4] | ||
| box2 = box_target[i].view(-1,5) | ||
| box2_xyxy = Variable(torch.FloatTensor(box2.size())) | ||
| box2_xyxy[:,:2] = box2[:,:2] -0.5*box2[:,2:4] | ||
| box2_xyxy[:,2:4] = box2[:,:2] +0.5*box2[:,2:4] | ||
| iou = self.compute_iou(box1_xyxy[:,:4],box2_xyxy[:,:4]) #[2,1] | ||
| max_iou,max_index = iou.max(0) | ||
| max_index = max_index.data.cuda() | ||
|
|
||
| coo_response_mask[i+max_index]=1 | ||
| coo_not_response_mask[i+1-max_index]=1 | ||
| #1.response loss | ||
| box_pred_response = box_pred[coo_response_mask].view(-1,5) | ||
| box_target_response = box_target[coo_response_mask].view(-1,5) | ||
| contain_loss = F.mse_loss(box_pred_response[:,4],box_target_response[:,4],size_average=False) | ||
| loc_loss = F.mse_loss(box_pred_response[:,:2],box_target_response[:,:2],size_average=False) + F.mse_loss(torch.sqrt(box_pred_response[:,2:4]),torch.sqrt(box_target_response[:,2:4]),size_average=False) | ||
| #2.not response loss | ||
| box_pred_not_response = box_pred[coo_not_response_mask].view(-1,5) | ||
| box_target_not_response = box_target[coo_not_response_mask].view(-1,5) | ||
| box_target_not_response[:,4]= 0 | ||
| not_contain_loss = F.mse_loss(box_pred_response[:,4],box_target_response[:,4],size_average=False) | ||
| #3.class loss | ||
| class_loss = F.mse_loss(class_pred,class_target,size_average=False) | ||
|
|
||
| return (self.l_coord*loc_loss + contain_loss + not_contain_loss + self.l_noobj*nooobj_loss + class_loss)/N | ||
|
|
||
|
|
||
|
|
||
|
|