## Common Requirements

In [None]:
# download baseline code
!git init
!git pull https://github.com/epsilon-deltta/ssd_guillotine.git

### Training On VOC2007 and VOC2012 

In [None]:
# download voc2007,2012 
!wget http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
!wget http://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar
!wget http://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar
!wget http://pjreddie.com/media/files/VOC2012test.tar

!tar -xvf VOCtrainval_06-Nov-2007.tar 
!tar -xvf VOCtest_06-Nov-2007.tar 
!tar -xvf VOCtrainval_11-May-2012.tar 
!tar -xvf VOC2012test.tar

!rm  VOCtrainval_06-Nov-2007.tar
!rm  VOCtest_06-Nov-2007.tar 
!rm  VOCtrainval_11-May-2012.tar 
!rm  VOC2012test.tar
# integrated voc2007&voc2012 split data
!python create_data_lists.py

In [None]:
import time
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
from model import SSD300, MultiBoxLoss
from datasets import PascalVOCDataset
from utils import *

#### settings

# Data parameters
data_folder = './'  # folder with data files
keep_difficult = True  # use objects considered difficult to detect?

# Model parameters
# Not too many here since the SSD300 has a very specific structure
n_classes = len(label_map)  # number of different types of objects
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Learning parameters
checkpoint = None  # path to model checkpoint, None if none
batch_size = 8  # batch size
iterations = 120000  # number of iterations to train
workers = 2  # number of workers for loading data in the DataLoader
print_freq = 200  # print training status every __ batches
lr = 1e-3  # learning rate
decay_lr_at = [80000, 100000]  # decay learning rate after these many iterations
decay_lr_to = 0.1  # decay learning rate to this fraction of the existing learning rate
momentum = 0.9  # momentum
weight_decay = 5e-4  # weight decay
grad_clip = None  # clip if gradients are exploding, which may happen at larger batch sizes (sometimes at 32) - you will recognize it by a sorting error in the MuliBox loss calculation

cudnn.benchmark = True



In [None]:

global start_epoch, label_map, epoch, checkpoint, decay_lr_at

# Initialize model or load checkpoint
if checkpoint is None:
    start_epoch = 0
    model = SSD300(n_classes=n_classes)
    # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
    biases = list()
    not_biases = list()
    for param_name, param in model.named_parameters():
        if param.requires_grad:
            if param_name.endswith('.bias'):
                biases.append(param)
            else:
                not_biases.append(param)
    optimizer = torch.optim.SGD(params=[{'params': biases, 'lr': 2 * lr}, {'params': not_biases}],
                                lr=lr, momentum=momentum, weight_decay=weight_decay)

else:
    checkpoint = torch.load(checkpoint)
    start_epoch = checkpoint['epoch'] + 1
    print('\nLoaded checkpoint from epoch %d.\n' % start_epoch)
    model = checkpoint['model']
    optimizer = checkpoint['optimizer']


# Custom dataloaders
# output types of dataset
# n : number of objects wrt. each Image. 
# (torch.Size([3, 300, 300]),
#  torch.Size([n, 4]),  # Ground truth Box
#  torch.Size([n]),     # Class
#  torch.Size([n]))     # Difficulty
train_dataset = PascalVOCDataset(data_folder,
                                    split='train',
                                    keep_difficult=keep_difficult)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                                            collate_fn=train_dataset.collate_fn, num_workers=workers,
                                            pin_memory=True)  # note that we're passing the collate function here

# Move to default device
model = model.to(device)
criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy).to(device)

# Calculate total number of epochs to train and the epochs to decay learning rate at (i.e. convert iterations to epochs)
# To convert iterations to epochs, divide iterations by the number of iterations per epoch
# The paper trains for 120,000 iterations with a batch size of 32, decays after 80,000 and 100,000 iterations
epochs = iterations // (len(train_dataset) // 32)
decay_lr_at = [it // (len(train_dataset) // 32) for it in decay_lr_at]

from train import *
# Epochs
for epoch in range(start_epoch, epochs):

    # Decay learning rate at particular epochs
    if epoch in decay_lr_at:
        adjust_learning_rate(optimizer, decay_lr_to)

    # One epoch's training
    train(train_loader=train_loader,
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            epoch=epoch)

    # Save checkpoint
    save_checkpoint(epoch, model, optimizer)



### Detect objects ! (trained on VOC1207)

In [None]:
# download pretrained_ssd_model on voc (including weights and shape both) 
# i also wonder why author who made this pretrined model named filename *.tar , it's not .tar file
FILENAME = 'voc_pretrined.pth.tar'
FILEID = '1bvJfF6r_zYl2xZEpYXxgb7jLQHFZ01Qe'
!wget --load-cookies ~/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies ~/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={FILEID}' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id={FILEID}" -O {FILENAME} && rm -rf ~/cookies.txt


In [None]:
# 
# Enjoy pretrained model .
# 
import time
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
from model import SSD300, MultiBoxLoss
from datasets import PascalVOCDataset
from utils import *
from torchvision import transforms
from utils import *
from PIL import Image, ImageDraw, ImageFont

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load model checkpoint
checkpoint = FILENAME
checkpoint = torch.load(checkpoint,map_location = device)

start_epoch = checkpoint['epoch'] + 1
print('\nLoaded checkpoint from epoch %d.\n' % start_epoch)
model = checkpoint['model']
model = model.to(device)
model.eval()

# Transforms
resize = transforms.Resize((300, 300))
to_tensor = transforms.ToTensor()
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])


def detect(model,original_image, min_score=0.2, max_overlap=0.5, top_k=200, suppress=None):
    """
    Detect objects in an image with a trained SSD300, and visualize the results.

    :param original_image: image, a PIL Image
    :param min_score: minimum threshold for a detected box to be considered a match for a certain class
    :param max_overlap: maximum overlap two boxes can have so that the one with the lower score is not suppressed via Non-Maximum Suppression (NMS)
    :param top_k: if there are a lot of resulting detection across all classes, keep only the top 'k'
    :param suppress: classes that you know for sure cannot be in the image or you do not want in the image, a list
    :return: annotated image, a PIL Image
    """

    # Transform
    image = normalize(to_tensor(resize(original_image)))

    # Move to default device
    image = image.to(device)

    # Forward prop.
    predicted_locs, predicted_scores = model(image.unsqueeze(0))

    # Detect objects in SSD output
    det_boxes, det_labels, det_scores = model.detect_objects(predicted_locs, predicted_scores, min_score=min_score,
                                                             max_overlap=max_overlap, top_k=top_k)

    # Move detections to the CPU
    det_boxes = det_boxes[0].to('cpu')

    # Transform to original image dimensions
    original_dims = torch.FloatTensor(
        [original_image.width, original_image.height, original_image.width, original_image.height]).unsqueeze(0)
    det_boxes = det_boxes * original_dims

    # Decode class integer labels
    det_labels = [rev_label_map[l] for l in det_labels[0].to('cpu').tolist()]

    # If no objects found, the detected labels will be set to ['0.'], i.e. ['background'] in SSD300.detect_objects() in model.py
    if det_labels == ['background']:
        # Just return original image
        return original_image

    # Annotate
    annotated_image = original_image
    draw = ImageDraw.Draw(annotated_image)
    font = ImageFont.load_default()
    # font = ImageFont.truetype("arial.ttf", 15)
    # font = ImageFont.truetype("calibril.ttf", 15)

    # Suppress specific classes, if needed
    for i in range(det_boxes.size(0)):
        if suppress is not None:
            if det_labels[i] in suppress:
                continue

        # Boxes
        box_location = det_boxes[i].tolist()
        draw.rectangle(xy=box_location, outline=label_color_map[det_labels[i]])
        draw.rectangle(xy=[l + 1. for l in box_location], outline=label_color_map[
            det_labels[i]])  # a second rectangle at an offset of 1 pixel to increase line thickness
        # draw.rectangle(xy=[l + 2. for l in box_location], outline=label_color_map[
        #     det_labels[i]])  # a third rectangle at an offset of 1 pixel to increase line thickness
        # draw.rectangle(xy=[l + 3. for l in box_location], outline=label_color_map[
        #     det_labels[i]])  # a fourth rectangle at an offset of 1 pixel to increase line thickness

        # Text
        text_size = font.getsize(det_labels[i].upper())
        text_location = [box_location[0] + 2., box_location[1] - text_size[1]]
        textbox_location = [box_location[0], box_location[1] - text_size[1], box_location[0] + text_size[0] + 4.,
                            box_location[1]]
        draw.rectangle(xy=textbox_location, fill=label_color_map[det_labels[i]])
        draw.text(xy=text_location, text=det_labels[i].upper(), fill='white',
                  font=font)
    del draw

    return annotated_image



img_path = '/content/img/000022.jpg'
original_image = Image.open(img_path, mode='r')
original_image = original_image.convert('RGB')
anno_img = detect(model,original_image, min_score=0.2, max_overlap=0.5, top_k=200)
from IPython.display import display
display(anno_img)


In [None]:
from  PIL import Image
import requests as rq
import io
from PIL import  ImageDraw
from IPython.display import display

# why this has error
imgurl ='https://t3.daumcdn.net/thumb/R720x0/?fname=http://t1.daumcdn.net/brunch/service/user/1e0Z/image/7ulVcf_Wh5VicfYz2sVq3v_z37Y.jpg' 
imgurl = 'https://s3.ap-northeast-2.amazonaws.com/img.kormedi.com/news/article/__icsFiles/artimage/2014/08/29/c_km601/515966_540.jpg'
kokobyte = rq.get(imgurl).content

koko = Image.open(io.BytesIO(kokobyte))
anno_img = detect(model,koko, min_score=0.2, max_overlap=0.5, top_k=200)
display(anno_img)

In [None]:

"""
Training.
"""
global start_epoch, label_map, epoch, checkpoint, decay_lr_at

Initialize model or load checkpoint
if checkpoint is None:
    start_epoch = 0
    model = SSD300(n_classes=n_classes)
    # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
    biases = list()
    not_biases = list()
    for param_name, param in model.named_parameters():
        if param.requires_grad:
            if param_name.endswith('.bias'):
                biases.append(param)
            else:
                not_biases.append(param)
    optimizer = torch.optim.SGD(params=[{'params': biases, 'lr': 2 * lr}, {'params': not_biases}],
                                lr=lr, momentum=momentum, weight_decay=weight_decay)

else:
    checkpoint = torch.load(checkpoint)
    start_epoch = checkpoint['epoch'] + 1
    print('\nLoaded checkpoint from epoch %d.\n' % start_epoch)
    model = checkpoint['model']
    optimizer = checkpoint['optimizer']


# Move to default device
# model = model.to(device)
criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy).to(device)

# Custom dataloaders
train_dataset = PascalVOCDataset(data_folder,
                                    split='train',
                                    keep_difficult=keep_difficult)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                                            collate_fn=train_dataset.collate_fn, num_workers=workers,
                                            pin_memory=True)  # note that we're passing the collate function here


## Training on Kaist Multispectral Pedestrain Dataset

In [None]:
!git clone https://github.com/epsilon-deltta/LkaistPdt.git

In [None]:
import time
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
from model import SSD300, MultiBoxLoss
from datasets import PascalVOCDataset
from utils import *

#### settings
import os
repo_root = os.path.abspath('./LkaistPdt')
dt_root = os.path.abspath('./LkaistPdt/data')
anno_dir = os.path.abspath(os.path.join(dt_root,'annotations'))
img_dir  = os.path.abspath(os.path.join(dt_root,'images'))

keep_difficult = False  # use objects considered difficult to detect?
n_classes = 3  # number of different types of objects
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Learning parameters
checkpoint = None  # path to model checkpoint, None if none
batch_size = 8  # batch size
iterations = 120000  # number of iterations to train
workers = 2  # number of workers for loading data in the DataLoader
print_freq = 200  # print training status every __ batches
lr = 1e-3  # learning rate
decay_lr_at = [80000, 100000]  # decay learning rate after these many iterations
decay_lr_to = 0.1  # decay learning rate to this fraction of the existing learning rate
momentum = 0.9  # momentum
weight_decay = 5e-4  # weight decay
grad_clip = None  # clip if gradients are exploding, which may happen at larger batch sizes (sometimes at 32) - you will recognize it by a sorting error in the MuliBox loss calculation

cudnn.benchmark = True

In [None]:
import torch
import torch.nn as nn

import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import  ToTensor
from PIL import Image
import numpy as np

class KaistPdt(torch.utils.data.Dataset):

    def __init__(self,root,split_file,split='train',transforms=None,target_transform=None):
        self.root  = root
        self.split = split

        # rgb,thermal ,rgbTF,thermalTF
        with open(split_file,'r') as f:
            fnames = f.readlines()
            fnames = [x.replace('\n','') for x in fnames]
        
        self.paths = []
        self.objects = []
        for fname in fnames:

            rgb_ther = {'rgb':True,'thermal':True}

            
            rgb_path =  os.path.join(root,'images',fname.replace('.txt','.jpg') )
            thermal_path =  os.path.join(root,'images',fname.replace('.txt','.jpg').replace('visible','lwir') )
            
            rgb_ther['rgb']     = True if os.path.exists(rgb_path) else False
            rgb_ther['thermal'] = True if os.path.exists(thermal_path) else False

            if rgb_ther['rgb'] is False and rgb_ther['thermal'] is False : # when there is no any image.
                continue
            
            # make annotation variable ,not images(cuz of size)  
            
            ann_path = os.path.join(self.root,'annotations',fname)

            with open(ann_path,'r') as f:
                lines = f.readlines()
            if len(lines) > 1 :
                obj ={}
                obj['boxes']  = []
                obj['labels'] = []

                for line in lines[1:] :
                    items = line.split(' ')

                    if 'cyclist' in items[0]:   
                        label = 1
                    elif 'person' in items[0]:
                        label = 2
                    elif 'people' in items[0]:
                        label = 3 
                    xywh = list(int(x) for x in items[1:5] )

                    obj['boxes'] .append(xywh)
                    obj['labels'].append(label)
                
                obj['boxes'] = torch.tensor(obj['boxes'])
                obj['labels'] = torch.tensor(obj['labels'])

                self.objects.append(obj)
            else :
                continue

            self.paths.append(fname)

    def __len__(self):
        return len(self.paths)

    def __getitem__(self,idx):
        
        img          = self.get_img(idx)
        boxes,labels = self.get_label(idx)
        img , boxes,labels = transform(img,boxes,labels,split=self.split)
        return img, boxes,labels

    def get_img(self,idx):
        fname = self.paths[idx]

        rgb_ther = {'rgb':True,'thermal':True}
        
        rgb_path =  os.path.join(self.root,'images',fname.replace('.txt','.jpg') )
        thermal_path =  os.path.join(self.root,'images',fname.replace('.txt','.jpg').replace('visible','lwir') )
        
        rgb_ther['rgb']     = True if os.path.exists(rgb_path) else False
        rgb_ther['thermal'] = True if os.path.exists(thermal_path) else False

        rgb     = ToTensor()(Image.open(rgb_path    ) ) if rgb_ther['rgb'] else torch.zeros([3, 512, 640])
        thermal = ToTensor()(Image.open(thermal_path) ) if rgb_ther['rgb'] else torch.zeros([3, 512, 640])
        
        img = rgb+thermal
        return img
    def get_label(self,idx):
        # class : cyclist, person ,people 
        # loc : numOfobjs x 4(class_xywh)
        # cl  : numOfobjs
        print(idx)
        print(self.objects[idx])
        boxes  = self.objects[idx]['boxes'] 
        labels = self.objects[idx]['labels']
        return  boxes, labels 
        
trdt  = KaistPdt(dt_root,os.path.join(repo_root,'split','train.txt') )
valdt = KaistPdt(dt_root,os.path.join(repo_root,'split','val.txt'))
tedt  = KaistPdt(dt_root,os.path.join(repo_root,'split','test.txt'))
tr    = DataLoader(trdt,batch_size =16)
val    = DataLoader(valdt,batch_size =16)

In [None]:

global start_epoch, label_map, epoch, checkpoint, decay_lr_at

# Initialize model or load checkpoint
if checkpoint is None:
    start_epoch = 0
    model = SSD300(n_classes=n_classes)
    # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
    biases = list()
    not_biases = list()
    for param_name, param in model.named_parameters():
        if param.requires_grad:
            if param_name.endswith('.bias'):
                biases.append(param)
            else:
                not_biases.append(param)
    optimizer = torch.optim.SGD(params=[{'params': biases, 'lr': 2 * lr}, {'params': not_biases}],
                                lr=lr, momentum=momentum, weight_decay=weight_decay)

else:
    checkpoint = torch.load(checkpoint)
    start_epoch = checkpoint['epoch'] + 1
    print('\nLoaded checkpoint from epoch %d.\n' % start_epoch)
    model = checkpoint['model']
    optimizer = checkpoint['optimizer']

train_loader = tr
# Move to default device
model = model.to(device)
criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy).to(device)

epochs = iterations // (len(tr.dataset) // 32)
decay_lr_at = [it // (len(tr.dataset) // 32) for it in decay_lr_at]

from train import *
# Epochs
for epoch in range(start_epoch, epochs):

    # Decay learning rate at particular epochs
    if epoch in decay_lr_at:
        adjust_learning_rate(optimizer, decay_lr_to)

    # One epoch's training
    train(train_loader=train_loader,
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            epoch=epoch)

    # Save checkpoint
    save_checkpoint(epoch, model, optimizer)



## Testing on Pedestrian Dataset