In [None]:
'''
pip install pyyaml
pip install easydict
pip install -U pytorch_warmup
''';

In [None]:
!nvidia-smi

Sun Apr 18 20:58:41 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.67       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   45C    P0    29W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
os.chdir('/content/drive/My Drive/jupyter/detr/transformers')

In [None]:
!ls

checkpoint   coco-args.txt  logs    runs		    utils
classes.txt  config.yaml    models  transformer_coco.ipynb


In [None]:
import os, sys
import yaml
import requests
from pathlib import Path

from easydict import EasyDict as edict

import numpy as np
import torch
from torch.cuda import amp
from torch.optim import AdamW
from torch.optim.lr_scheduler import StepLR
from torch.utils.data.dataloader import DataLoader
import torchvision.transforms as transforms

from models import DETR, SetCriterion
from utils.dataset import collateFunction, COCODataset
from utils.misc import MetricsLogger, saveArguments, logMetrics, cast2Float

from utils.plots import plot_images, plot_labels, plot_results, plot_evolution
from utils.general import increment_path

from tqdm.notebook import tqdm, trange

import matplotlib.pyplot as plt
from PIL import Image

# %load_ext autoreload
# %autoreload 2

In [None]:
CURRENT_PATH = os.path.join(os.getcwd())
BASE_PATH = Path(CURRENT_PATH).parent
CONFIG = os.path.join(CURRENT_PATH, 'config.yaml')

In [None]:
CURRENT_PATH

'/content/drive/My Drive/jupyter/detr/transformers'

In [None]:
def parse_config():
    with open(CONFIG, 'r') as stream:
        try:
            return yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            raise ValueError("Failed to parse config requried")

In [None]:
args = edict(parse_config())

In [None]:
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
saveArguments(args, args.taskName)
torch.manual_seed(1337)
device = torch.device(args.device)

In [None]:
!unzip "/content/drive/My Drive/jupyter/detr/detr.zip" -d "/content/10k_coco";


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/10k_coco/8k_train/000000221547.jpg  
  inflating: /content/10k_coco/8k_train/000000221665.jpg  
  inflating: /content/10k_coco/8k_train/000000221725.jpg  
  inflating: /content/10k_coco/8k_train/000000221776.jpg  
  inflating: /content/10k_coco/8k_train/000000221829.jpg  
 extracting: /content/10k_coco/8k_train/000000221881.jpg  
 extracting: /content/10k_coco/8k_train/000000221903.jpg  
  inflating: /content/10k_coco/8k_train/000000221911.jpg  
  inflating: /content/10k_coco/8k_train/000000222056.jpg  
  inflating: /content/10k_coco/8k_train/000000222080.jpg  
  inflating: /content/10k_coco/8k_train/000000222245.jpg  
  inflating: /content/10k_coco/8k_train/000000222266.jpg  
  inflating: /content/10k_coco/8k_train/000000222295.jpg  
  inflating: /content/10k_coco/8k_train/000000222370.jpg  
  inflating: /content/10k_coco/8k_train/000000222472.jpg  
  inflating: /content/10k_coco/8k_train/0000002225

In [None]:
# train_dir = os.path.join(BASE_PATH, 'data/10k_coco/images')
# ann_dir = os.path.join(BASE_PATH, 'data/10k_coco/train_80.json')
train_dir = os.path.join('/content/10k_coco/8k_train')
ann_dir = os.path.join('/content/10k_coco/train_80.json')

dataset = COCODataset(train_dir,
                      ann_dir,
                      args.targetHeight,
                      args.targetWidth,
                      args.numClass)
dataloader = DataLoader(dataset,
                        batch_size=args.batchSize,
                        shuffle=False,
                        collate_fn=collateFunction,
                        pin_memory=True,
                        num_workers=args.numWorkers)

loading annotations into memory...
Done (t=1.28s)
creating index...
index created!


In [None]:
model = DETR(args).to(device)
criterion = SetCriterion(args).to(device)

In [None]:
# if args.weightDir and os.path.exists(args.weightDir):
#     print(f'loading pre-trained weights from {args.weightDir}')
#     model.load_state_dict(torch.load(args.weightDir, map_location=device))

# multi-GPU training
if args.multi:
    model = torch.nn.DataParallel(model)
        
# separate learning rate     
paramDicts = [
        {"params": [p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad]},
        {
            "params": [p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad],
            "lr": args.lrBackbone,
        },
    ]


In [None]:
start_epoch = 0
optimizer = AdamW(paramDicts, args.lr, weight_decay=args.weightDecay)
lr_scheduler = StepLR(optimizer, args.lrDrop)
prev_best_loss = np.inf
batches = len(dataloader)
logger = MetricsLogger()

# !pip install -U pytorch_warmup
# import pytorch_warmup as warmup
# warmup_scheduler = warmup.RAdamWarmup(optimizer)

## Resume training

In [None]:
# args.outputDir = '/content/drive/My Drive/jupyter/results/detr_200_16bs'

In [None]:
CHECKPOINT_PATH = "/content/drive/My Drive/jupyter/results/detr_200_8bs"

In [None]:
args.datasetType

'10k_coco'

In [None]:
assert os.path.isdir(CHECKPOINT_PATH)
checkpoint = torch.load(f'{CHECKPOINT_PATH}/{args.datasetType}.pt')
model.load_state_dict(checkpoint['model'])
prev_best_loss = checkpoint['loss']
start_epoch = checkpoint['epoch']
print(f'loss is: {prev_best_loss}, start from: {start_epoch}')

loss is: 31.71209770482207, start from: 73


In [None]:
losses = []
def train(epoch, prev_best_loss):
    model.train()
    criterion.train()
    scaler = amp.GradScaler()
    with tqdm(dataloader, unit='batch') as tepoch:
        for (x, y) in tepoch:
            tepoch.set_description(f'Train epoch {epoch}')
            
            x = x.to(device)
            y = [{k: v.to(device) for k, v in t.items()} for t in y]

            if args.amp:
                with amp.autocast():
                    out = model(x)
                out = cast2Float(out)
            else:
                out = model(x)

            metrics = criterion(out, y)
            
            loss = sum(v for k, v in metrics.items() if 'loss' in k)
            losses.append(loss.cpu().item())

            # MARK: - backpropagation
            optimizer.zero_grad()
            if args.amp:
                scaler.scale(loss).backward()
                if args.clipMaxNorm > 0:
                    scaler.unscale_(optimizer)
                    torch.nn.utils.clip_grad_norm_(model.parameters(), args.clipMaxNorm)
                scaler.step(optimizer)
                scaler.update()
            else:
                loss.backward()
                if args.clipMaxNorm > 0:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), args.clipMaxNorm)
                optimizer.step()
            tepoch.set_postfix(loss=loss.cpu().item()) 
            
        lr_scheduler.step()
        # warmup_scheduler.dampen()

        print(lr_scheduler.get_last_lr())
        avg_loss = np.mean(losses)

        if avg_loss < prev_best_loss:
            print('[+] Loss improved from {:.8f} to {:.8f}, saving model...'.format(prev_best_loss,
                                                                                    avg_loss))
            
            if not os.path.exists(args.outputDir):
                os.mkdir(args.outputDir)

            try:
                state = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'loss': prev_best_loss,
                'epoch': epoch
                }
            except AttributeError:
                pass
            torch.save(state, f'{args.outputDir}/{args.datasetType}.pt')
            prev_best_loss = avg_loss
        
        with open(f'{args.outputDir}/losses.txt', 'a') as fd:
          fd.write(f'{avg_loss}\n')

        return prev_best_loss
        
        # Plots    
#         if epoch < 3:
#             f = f'{args.project}/train_batch{epoch}.jpg'
#             Thread(target=plot_images, args=(x, y, None, f), daemon=True).start()
            

In [None]:
for epoch in trange(start_epoch, start_epoch+args.epochs):
    prev_best_loss = train(epoch, prev_best_loss)

HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 31.71209770 to 30.99506278, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.99506278 to 30.95868158, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.95868158 to 30.93418217, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.93418217 to 30.92615123, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.92615123 to 30.91461314, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.91461314 to 30.89850374, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.89850374 to 30.88767288, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.88767288 to 30.87419733, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.87419733 to 30.86174114, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.86174114 to 30.85049901, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.85049901 to 30.84278806, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.84278806 to 30.83500452, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.83500452 to 30.82733667, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.82733667 to 30.81900616, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.81900616 to 30.81110931, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.81110931 to 30.80317718, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.80317718 to 30.79613237, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.79613237 to 30.78939235, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.78939235 to 30.78083675, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.78083675 to 30.77161002, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.77161002 to 30.76265360, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.76265360 to 30.75430357, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.75430357 to 30.74759509, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.74759509 to 30.74169515, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.74169515 to 30.73602304, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.73602304 to 30.73110538, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.73110538 to 30.72576401, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.72576401 to 30.72090305, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.72090305 to 30.71453122, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.71453122 to 30.70833421, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.70833421 to 30.70248780, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.70248780 to 30.69720467, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.69720467 to 30.69310839, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.69310839 to 30.68840449, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.68840449 to 30.68375236, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.68375236 to 30.67947789, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.67947789 to 30.67481789, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.67481789 to 30.67003018, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.67003018 to 30.66622731, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.66622731 to 30.66163551, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.66163551 to 30.65668804, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.65668804 to 30.65220349, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.65220349 to 30.64764190, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.64764190 to 30.64366659, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.64366659 to 30.64019003, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.64019003 to 30.63621003, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.63621003 to 30.63235567, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.63235567 to 30.62919913, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.62919913 to 30.62596041, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.62596041 to 30.62376584, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.62376584 to 30.62217094, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.62217094 to 30.61969299, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.61969299 to 30.61846717, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.61846717 to 30.61625270, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.61625270 to 30.61474250, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.61474250 to 30.61306081, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.61306081 to 30.61048617, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.61048617 to 30.60813544, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.60813544 to 30.60536238, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.60536238 to 30.60347926, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.60347926 to 30.60129025, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.60129025 to 30.60024220, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]
[+] Loss improved from 30.60024220 to 30.59845067, saving model...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


[0.0001, 1e-05]


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




ValueError: ignored

# Testing

In [None]:
COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
          [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]
CLASSES = [
    'N/A', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A',
    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
    'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack',
    'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
    'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass',
    'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
    'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A',
    'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
    'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A',
    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
    'toothbrush'
]
transform2 = transforms.Compose([
#     transforms.Resize(800),
    transforms.Resize((38, 38)),
    transforms.ToTensor(),
#     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)

def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b

In [None]:
def detect(im, model, transform):
    # mean-std normalize the input image (batch-size: 1)
    img = transform(im).unsqueeze(0)

    # demo model only support by default images with aspect ratio between 0.5 and 2
    # if you want to use images with an aspect ratio outside this range
    # rescale your image so that the maximum size is at most 1333 for best results
    assert img.shape[-2] <= 1600 and img.shape[-1] <= 1600, 'demo model only supports images up to 1600 pixels on each side'

    # propagate through the model
#     img = img.to(device)
    outputs = model(img)

    # keep only predictions with 0.7+ confidence
    probas = outputs['class'].softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > 0.7

    # convert boxes from [0; 1] to image scales
    bboxes_scaled = rescale_bboxes(outputs['bbox'][0, keep], im.size)
    return probas[keep], bboxes_scaled

In [None]:
chk = torch.load('/home/daniel/Documents/coco/Modified-DETR/checkpoint/mango.pt', map_location=torch.device("cpu"))

model.load_state_dict(chk)
cpu_model = model.to('cpu')
# is_cuda(cpu_model)
model.eval();

In [None]:
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
im = Image.open(requests.get(url, stream=True).raw)

scores, boxes = detect(im, cpu_model, transform2)


In [None]:
dataiter = iter(dataloader)
images, labels = dataiter.next()

In [None]:
plt.imshow(np.transpose(images[1].numpy(), (1,2,0)))

In [None]:
CLASSES[labels[1]['labels'][-1]]

In [None]:

loaded_outputs = cpu_model(images)
probas = loaded_outputs['class'].softmax(-1)[0, :, :-1]
probas.max(-1).values


# tensor([0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997,
#         0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997,
#         0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997,
#         0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997,
#         0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997,
#         0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997,
#         0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997,
#         0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997,
#         0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997,
#         0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997,
#         0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997, 0.1997,
#         0.1997], grad_fn=<MaxBackward0>)


# Demo

In [None]:
train_dir = os.path.join(BASE_PATH, 'data/coco_mini/trainset')
ann_dir = os.path.join(BASE_PATH, 'data/coco_mini/instances_minitrain2017.json')

dataset = COCODataset(train_dir,
                      ann_dir,
                      608, # 608
                      608, # 608
                      args.numClass)
dataloader = DataLoader(dataset,
                        batch_size=1,
                        shuffle=False,
                        collate_fn=collateFunction,
                        pin_memory=True,
                        num_workers=args.numWorkers)

In [None]:
from torch import nn
class DETRdemo(nn.Module):
    """
    Demo DETR implementation.

    Demo implementation of DETR in minimal number of lines, with the
    following differences wrt DETR in the paper:
    * learned positional encoding (instead of sine)
    * positional encoding is passed at input (instead of attention)
    * fc bbox predictor (instead of MLP)
    The model achieves ~40 AP on COCO val5k and runs at ~28 FPS on Tesla V100.
    Only batch size 1 supported.
    """
    def __init__(self, num_classes, hidden_dim=256, nheads=8,
                 num_encoder_layers=6, num_decoder_layers=6):
        super().__init__()

        # create ResNet-50 backbone
        self.backbone = resnet50()
        del self.backbone.fc

        # create conversion layer
        self.conv = nn.Conv2d(2048, hidden_dim, 1)

        # create a default PyTorch transformer
        self.transformer = nn.Transformer(
            hidden_dim, nheads, num_encoder_layers, num_decoder_layers)

        # prediction heads, one extra class for predicting non-empty slots
        # note that in baseline DETR linear_bbox layer is 3-layer MLP
        self.linear_class = nn.Linear(hidden_dim, num_classes + 1)
        self.linear_bbox = nn.Linear(hidden_dim, 4)

        # output positional encodings (object queries)
        self.query_pos = nn.Parameter(torch.rand(100, hidden_dim))

        # spatial positional encodings
        # note that in baseline DETR we use sine positional encodings
        self.row_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))
        self.col_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))

    def forward(self, inputs):
        # propagate inputs through ResNet-50 up to avg-pool layer
        x = self.backbone.conv1(inputs)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        # convert from 2048 to 256 feature planes for the transformer
        h = self.conv(x)

        # construct positional encodings
        H, W = h.shape[-2:]
        pos = torch.cat([
            self.col_embed[:W].unsqueeze(0).repeat(H, 1, 1),
            self.row_embed[:H].unsqueeze(1).repeat(1, W, 1),
        ], dim=-1).flatten(0, 1).unsqueeze(1)

        # propagate through the transformer
        h = self.transformer(pos + 0.1 * h.flatten(2).permute(2, 0, 1),
                             self.query_pos.unsqueeze(1)).transpose(0, 1)
        
        # finally project transformer outputs to class labels and bounding boxes
        return {'pred_logits': self.linear_class(h), 
                'pred_boxes': self.linear_bbox(h).sigmoid()}

In [None]:
from torchvision.models import resnet50

detr = DETRdemo(num_classes=91) 
state_dict = torch.hub.load_state_dict_from_url(
    url='https://dl.fbaipublicfiles.com/detr/detr_demo-da2a99e9.pth',
    map_location='cpu', check_hash=True)
detr.load_state_dict(state_dict)
detr.eval();

In [None]:
def box_cxcywh_to_xyxy2(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)

def rescale_bboxes2(out_bbox, size):
    _, _, img_w, img_h = size
    b = box_cxcywh_to_xyxy2(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b

In [None]:
def detectDemo(im, model, transform):
    # mean-std normalize the input image (batch-size: 1)
#     img = transform(im)
    img = im

    # demo model only support by default images with aspect ratio between 0.5 and 2
    # if you want to use images with an aspect ratio outside this range
    # rescale your image so that the maximum size is at most 1333 for best results
    assert img.shape[-2] <= 1600 and img.shape[-1] <= 1600, 'demo model only supports images up to 1600 pixels on each side'

    # propagate through the model
#     img = img.to(device)
    outputs = model(img)

    # keep only predictions with 0.7+ confidence
    probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > 0.7

    # convert boxes from [0; 1] to image scales
    bboxes_scaled = rescale_bboxes2(outputs['pred_boxes'][0, keep], im.size())
    return probas[keep], bboxes_scaled

In [None]:
transform2(example[0].numpy())

In [None]:
example = iter(dataloader).next()[0]
scores, boxes = detectDemo(example, detr, transform2)

In [None]:
sample_image =  np.transpose(example[0].numpy(), (1,2,0))

In [None]:
def plot_results(pil_img, prob, boxes):
    plt.figure(figsize=(16,10))
    plt.imshow(pil_img)
    ax = plt.gca()
    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), COLORS * 100):
        ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                   fill=False, color=c, linewidth=3))
        cl = p.argmax()
        text = f'{CLASSES[cl]}: {p[cl]:0.2f}'
        ax.text(xmin, ymin, text, fontsize=15,
                bbox=dict(facecolor='yellow', alpha=0.5))
    plt.axis('off')
    plt.show()
    
plot_results(example[0][0], scores, boxes)