# Test Video Inference

In [1]:
'''load packages'''
from __future__ import print_function
from __future__ import division
#
import os
import numpy as np
import time
import copy
import math
#
import cv2
import matplotlib.pyplot as plt
#
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, models, transforms
#
from collections import defaultdict
from tqdm import tqdm
#
import yaml

In [2]:
''' go up one level (for import) '''
import sys
sys.path.insert(0, '..')

In [3]:
'''somewhat dangerous but turn off everything for the time being'''
import warnings
warnings.filterwarnings("ignore")

In [4]:
'''load classes and functions'''
from src.networks.yolo import Yolo
from src.datasets.dataset import ThermalDataset
from src.training.yoloLoss import yoloLoss 
#
from src.datasets.utils import image_weights_from_label_file
#
from src.postprocessing.predictions import Prediction
from src.postprocessing.evaluation import *

In [5]:
''' reverse mapping class index to category '''
from labels.label_utils import category2class
class2category = {v : k for (k, v) in category2class.items()}
print(class2category)
# also set up color code for display
class2color = defaultdict(lambda: [128, 0, 0])
class2color[0] = [0, 128, 0]
class2color[2] = [0, 0, 128]
print(class2color)

{0: 'person', 1: 'bike', 2: 'car', 3: 'motor', 4: 'bus', 5: 'truck', 6: 'light', 7: 'hydrant', 8: 'sign', 9: 'other_vehicle'}
defaultdict(<function <lambda> at 0x0000015B4A8823A8>, {0: [0, 128, 0], 2: [0, 0, 128]})


In [6]:
''' device assignment '''
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [7]:
''' set up the output path to save the images '''
GT_dir = 'groundtruths'
if not os.path.exists(GT_dir):
    os.mkdir(GT_dir)
det_dir = 'detections'
if not os.path.exists(det_dir):
    os.mkdir(det_dir)

### Load the configurations

In [8]:
with open('../configurations/config.yml', 'r') as f:
    configs = yaml.load(f)
# make modifications
configs['batch size'] = 32
configs['test path'] = '../' + configs['test path']
configs['annot path'] = '../' + configs['annot path']
configs['save path'] = '../' + configs['save path']

### Load the test data

In [9]:
''' data loader '''
Transform = transforms.Compose([transforms.ToTensor()])    
test_set = ThermalDataset(root = os.path.join(configs['test path'], 'data'), 
                         list_file = os.path.join(configs['annot path'], configs['test fname']), 
                         image_size = configs['image size'], 
                         anchors = configs['anchors'],
                         num_classes = configs['num class'],
                         train = False, 
                         transform = Transform)

test_loader = DataLoader(test_set, 
                        batch_size = configs['batch size'],
                        drop_last = False,
                        collate_fn = test_set.collate_fn,
                        shuffle = False, 
                        num_workers = configs['num workers'])

data init


### Retrieve the ground truths

In [10]:
with open(os.path.join(configs['annot path'], configs['test fname']), 'r') as f:
    lines = f.readlines()
file_list = [line.strip().split() for line in lines]

image_list = []

for index, image_file in enumerate(file_list):

    image_id = image_file[0]
    
    source = os.path.join(configs['test path'], 'data', image_id)
    target = os.path.join(GT_dir, image_id)
    
    image_list.append(image_id)
    
    # read the image
    image = cv2.imread(source)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # find how many objects
    num_obj = (len(image_file) - 1) // 7
    if num_obj > 0:
        for i in range(num_obj):
            x1 = int(image_file[3 + 7*i])
            y1 = int(image_file[4 + 7*i])
            x2 = int(image_file[5 + 7*i])
            y2 = int(image_file[6 + 7*i])
            
            c  = int(image_file[7 + 7*i])
            color = class2color[c]
            
            cv2.rectangle(image, (x1,y1), (x2,y2), color, 2)
            
    # save the image
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    cv2.imwrite(target, image)

### Utility functions

In [11]:
''' reshape image to target size '''
def Resize(rgb, target_size = 640):
    #
    '''find the ratio'''
    old_size = rgb.shape[:2]
    desired_size = target_size
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])
    #
    '''now resize the image'''
    rgb = cv2.resize(rgb, (new_size[1], new_size[0]))                               # remember: resize takes (W, H)
    #
    '''now find the padding size '''
    delta_w = desired_size - new_size[1]
    delta_h = desired_size - new_size[0]
    top, bottom = 0, delta_h
    left, right = 0, delta_w
    #
    '''set the padding value to data mean'''
    color = (135, 135, 135)
    #
    '''now, do the padding'''
    rgb_pad = cv2.copyMakeBorder(rgb, top, bottom, left, right, cv2.BORDER_CONSTANT, value = color)
    #
    return rgb_pad, 1.0 / ratio

###########
''' model prediction '''
configs['obj_thres'] = 0.99     # experiment
prediction = Prediction(anchors = configs['anchors'],
                        beta = configs['beta'], 
                        inp_dim = configs['image size'], 
                        num_classes = configs['num class'],  
                        obj_thres = configs['obj_thres'],
                        conf_thres = configs['conf_thres'],
                        loc_thres = configs['loc_thres'],
                        sigma = configs['sigma'],
                        nms_thres = configs['nms_thres'], 
                        top_k = configs['top_k'], 
                        CUDA = torch.cuda.is_available())

def predictions(model, img, ratio):
    
    img = Transform(img)
    img = img.unsqueeze(axis = 0)
    img = img.to(device)
    
    with torch.no_grad():
        pred = model(img)
        
    boxes = prediction(pred)[0]
    
    if boxes is None: return boxes
    
    else:
        result = []
        for i,box in enumerate(boxes):
            x1 = int(box[0]*ratio)
            x2 = int(box[2]*ratio)
            y1 = int(box[1]*ratio)
            y2 = int(box[3]*ratio)
            cls_index = int(box[-1])         # convert LongTensor to int
            prob = float(box[4])
            result.append([cls_index, prob, x1, y1, x2, y2])
        
    return result

### Set up the model

In [12]:
pth_name = 'YOLO_IR_2023_CIoU_best_map.pth'
parameter_path = os.path.join(configs['save path'], pth_name)

net = Yolo(anchors = configs['anchors'], 
           num_classes = configs['num class'],
           cls_probs = torch.Tensor(configs['category frequency']), 
           inp_dim = configs['image size'])

#
model_dict = torch.load(parameter_path)
try:
    net.load_state_dict(model_dict)
except:
     net.load_state_dict(model_dict['model_weights'])

net.to(device)    
net.eval()

Yolo(
  (backbone): Backbone(
    (m): Sequential(
      (0): Focus(
        (conv): Conv(
          (conv): Conv2d(12, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU()
        )
      )
      (1): Conv(
        (conv): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU()
      )
      (2): C3(
        (cv1): Conv(
          (conv): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU()
        )
        (cv2): Conv(
          (conv): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats

### Generate detections

In [13]:
for image_path in image_list:
    
    source = os.path.join(configs['test path'], 'data', image_path)
    target = os.path.join(det_dir, image_path)
    
    image = cv2.imread(source)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    img, ratio = Resize(image)
    h, w, _ = img.shape
    
    result = predictions(net, img, ratio)
    
    if result:
        for box in result:
            x1 = int(box[2])
            x2 = int(box[4])
            y1 = int(box[3])
            y2 = int(box[5])
            
            cls_index = box[0]
            color = class2color[cls_index]
            
            cv2.rectangle(image, (x1,y1), (x2,y2), color, 2)
            
    # save the image
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    cv2.imwrite(target, image)

## Observations
- The Recall rates for the two principal class is pretty good
- However, the precision values for the smaller object categories are quite poor
- May be I need to set the threshold for class confidence adaptively, i.e., set higher for traffic lights, hydrants, etc.