In [1]:
from __future__ import division
from utils import *
import darknet as dk
import time
import torch 
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2 
import os 
import os.path as osp
import pickle as pkl
import pandas as pd
import random
import itertools
import argparse
from tqdm import tqdm
import torchvision.transforms as transforms
from torch.utils import data
from PIL import Image
#from pycocotools.coco import COCO
import json
from skimage.transform import resize
import data_augmentation

class CoCoDataset(data.Dataset):
    def __init__(self,img_folder,resolution,append_label='images',is_training=True):
        self.resolution=(resolution,resolution)
        self.is_training=is_training
        self.append_label=append_label
        with open(img_folder,'r') as f:
            self.img_files=f.readlines()
        self.labels=[path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]
        self.max_objs=50
        self.transform=data_augmentation.Compose()
        if self.is_training:
           
            self.transform.add(data_augmentation.ImageBaseAug())
        self.transform.add(data_augmentation.ResizeImage(self.resolution))    
        self.transform.add(data_augmentation.ToTensor(self.max_objs))

    def __len__(self):
        return len(self.img_files)
    
    def __getitem__(self,idx):
        img_path=self.img_files[idx%len(self.img_files)].rstrip()
        img_path=self.append_label+img_path
        #img=Image.open(img_path).convert('RGB')
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        label_path=self.labels[idx%len(self.img_files)].rstrip()
        label_path=self.append_label+label_path
        if os.path.exists(label_path):
            labels = np.loadtxt(label_path).reshape(-1, 5)
        else:
            print('oof',label_path)
            labels = np.zeros((1, 5), np.float32)
        
        sample = {'image': img, 'label': labels}
        if self.transform is not None:
            sample=self.transform(sample)
            
        return sample
    
    def getitem(self, idx):
        img_path=self.img_files[idx%len(self.img_files)].rstrip()
        #print(img_path)
        img_path=self.append_label+img_path
        img=np.array(Image.open(img_path).convert('RGB'))
        
        #print(img.shape)
        h,w,_=img.shape
        dim_diff=np.abs(h-w)
        pad1,pad2=dim_diff//2,dim_diff-dim_diff//2
        pad=((pad1,pad2),(0,0),(0,0)) if h<=w else((0,0),(pad1,pad2),(0,0))
        #input_img=np.pad(img,pad,'constant',constant_values=128)/255.0
        #input_img=transforms.functional.pad(img,pad,'constant',128)/255.
        padded_h,padded_w,_=img.shape
        #input_img=transforms.functional.resize(input_img,self.resolution)
        input_img =resize(img, (*self.resolution, 3), mode='reflect')
        input_img = np.transpose(input_img, (2, 0, 1))
        input_img = torch.from_numpy(input_img).float()

        label_path=self.labels[idx%len(self.img_files)].rstrip()
        
        labels=None
        
        if os.path.exists(label_path):
            labels = np.loadtxt(label_path).reshape(-1, 5)
            
            x1 = w * (labels[:, 1] - labels[:, 3]/2)
            y1 = h * (labels[:, 2] - labels[:, 4]/2)
            x2 = w * (labels[:, 1] + labels[:, 3]/2)
            y2 = h * (labels[:, 2] + labels[:, 4]/2)
    
            x1 += pad[1][0]
            y1 += pad[0][0]
            x2 += pad[1][0]
            y2 += pad[0][0]
            
            labels[:, 1] = ((x1 + x2) / 2) / padded_w
            labels[:, 2] = ((y1 + y2) / 2) / padded_h
            labels[:, 3] *= w / padded_w
            labels[:, 4] *= h / padded_h
            
        filled_labels=np.zeros((self.max_objs,5))
        
        if labels is not None:
            
            filled_labels[range(len(labels))[:self.max_objs]]=labels[:self.max_objs]
            
        filled_labels=torch.from_numpy(filled_labels)
        
        return img_path,input_img,filled_labels
   

In [2]:
cuda=True
num_classes = 80
batch_size=10
resolution=416
nms_thresh=0.6
confidence=0.5
resolution=416
#input_imgs='images/'
#input_imgs='data/coco/trainvalno5k.txt'
input_imgs='images/trainvalno5k.txt'
link="cfg/yolov3.cfg"

is_training=True
epochs=2


model=dk.Darknet(link)
#model.apply()
model=model.to(get_device())
model.train()

#Tensor=torch.FloatTensor.to(get_device())
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

optimizer=torch.optim.Adam(filter(lambda p:p.requires_grad,model.parameters()))
dataloader = data.DataLoader(CoCoDataset(input_imgs,resolution,is_training=is_training),batch_size=batch_size,shuffle=False, pin_memory=False)

for epoch in range(epochs):
    for batch_i, sample in enumerate(dataloader):
            imgs=sample['image']
            targets=sample['label']
            imgs = Variable(imgs.type(Tensor))
            targets = Variable(targets.type(Tensor), requires_grad=False)
            optimizer.zero_grad()
           #print(targets)
            loss=model(imgs,targets)
            loss.backward()
            optimizer.step()

            print(
                "[Epoch %d/%d, Batch %d/%d] [Losses: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f, recall: %.5f, precision: %.5f]"
                % (
                    epoch,
                    epochs,
                    batch_i,
                    len(dataloader),
                    model.losses["x"],
                    model.losses["y"],
                    model.losses["w"],
                    model.losses["h"],
                    model.losses["conf"],
                    model.losses["cls"],
                    loss.item(),
                    model.losses["recall"],
                    model.losses["precision"],
                )
            )
            
            model.seen += imgs.size(0)
            
            #if batch_i==0:
            #    break;
            #print(imgs)

[Epoch 0/2, Batch 0/11727] [Losses: x 0.257722, y 0.279835, w 4.472505, h 5.426966, conf nan, cls 1.316923, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 1/11727] [Losses: x 0.269594, y 0.249322, w 10.645700, h 10.392942, conf nan, cls 1.315169, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 2/11727] [Losses: x 0.283745, y 0.282023, w 5.017067, h 5.504388, conf nan, cls 1.312805, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 3/11727] [Losses: x 0.318707, y 0.274200, w 6.617477, h 7.204269, conf nan, cls 1.301341, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 4/11727] [Losses: x 0.250352, y 0.266185, w 3.072226, h 4.389412, conf nan, cls 1.307905, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 5/11727] [Losses: x 0.275028, y 0.196782, w 3.860775, h 5.318764, conf nan, cls 1.309470, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 6/11727] [Losses: x 0.274168, y 0.286597, w 

[Epoch 0/2, Batch 52/11727] [Losses: x 0.255493, y 0.258187, w 2.748450, h 2.189387, conf nan, cls 1.222862, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 53/11727] [Losses: x 0.250321, y 0.257098, w 1.636530, h 1.779845, conf nan, cls 1.203371, total nan, recall: 0.00709, precision: 0.00013]
[Epoch 0/2, Batch 54/11727] [Losses: x 0.286702, y 0.261844, w 2.129825, h 2.227146, conf nan, cls 1.242885, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 55/11727] [Losses: x 0.265780, y 0.244120, w 1.684720, h 2.173025, conf nan, cls 1.232825, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 56/11727] [Losses: x 0.263053, y 0.279834, w 3.250091, h 2.766753, conf nan, cls 1.226277, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 57/11727] [Losses: x 0.261485, y 0.239969, w 1.924506, h 3.363768, conf nan, cls 1.253835, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 58/11727] [Losses: x 0.252244, y 0.24311

[Epoch 0/2, Batch 104/11727] [Losses: x 0.213042, y 0.242136, w 1.350549, h 1.550060, conf nan, cls 1.244006, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 105/11727] [Losses: x 0.264991, y 0.272113, w 2.450437, h 2.570612, conf nan, cls 1.246646, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 106/11727] [Losses: x 0.260778, y 0.246536, w 1.718672, h 1.456600, conf nan, cls 1.198278, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 107/11727] [Losses: x 0.273327, y 0.242434, w 1.439824, h 1.736636, conf nan, cls 1.214607, total nan, recall: 0.00388, precision: 0.00007]
[Epoch 0/2, Batch 108/11727] [Losses: x 0.262471, y 0.244555, w 1.873188, h 1.159895, conf nan, cls 1.285792, total nan, recall: 0.01010, precision: 0.00007]
[Epoch 0/2, Batch 109/11727] [Losses: x 0.296456, y 0.276235, w 1.278335, h 1.785548, conf nan, cls 1.224509, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 110/11727] [Losses: x 0.253863, y 

[Epoch 0/2, Batch 155/11727] [Losses: x 0.266252, y 0.226580, w 1.525339, h 1.338585, conf nan, cls 1.243003, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 156/11727] [Losses: x 0.245251, y 0.256023, w 1.692845, h 2.389308, conf nan, cls 1.176560, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 157/11727] [Losses: x 0.267879, y 0.298606, w 2.158817, h 2.424874, conf nan, cls 1.239380, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 158/11727] [Losses: x 0.232013, y 0.267737, w 1.652735, h 1.980598, conf nan, cls 1.211339, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 159/11727] [Losses: x 0.253478, y 0.220274, w 1.661729, h 1.798116, conf nan, cls 1.229875, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 160/11727] [Losses: x 0.237596, y 0.264744, w 1.739470, h 2.864066, conf nan, cls 1.187111, total nan, recall: 0.00000, precision: 0.00000]
[Epoch 0/2, Batch 161/11727] [Losses: x 0.249212, y 

KeyboardInterrupt: 