In [8]:
from traffic_cams.non_max_suppression import nms_xywh
from traffic_cams.util import targets_to_results
from traffic_cams.model import AdvancedObjectDetector

import torch
from torchvision import transforms

from glob import glob
import pandas as pd
import numpy as np
from pathlib import Path
from PIL import Image

In [9]:
if torch.cuda.is_available():
    print("GPU is available")
    device = torch.device("cuda")
else:
    print("GPU is not available, using CPU instead")
    device = torch.device("cpu")

GPU is available


In [10]:
classes = ['passenger_car', 'pedestrian', 'bus', 'tram', 'utility_vehicle', 'lorry']
anchors = [] # (K, n_anchros, 2)
for c in classes:
    anchors.append([[0.0530, 0.0304],
                    [0.0957, 0.0671],
                    [0.1610, 0.1170],
                    [0.2561, 0.1953],
                    [0.4059, 0.3488],
                    [0.6490, 0.7201]])  # Anchors relative to image size
model_img_width = int(1280/2)
model_img_height =  int(704/2)

anchors = np.array(anchors)
model = AdvancedObjectDetector(len(classes), num_anchors=anchors.shape[1], backbone='efficientnet_b7')
stride = model.get_stride()

In [12]:
checkpoint_name = 'best_efficennet_b7_70ep_baseline'
state_dict = torch.load(f'traffic_cams/baseline_submission/{checkpoint_name}.pth', map_location=torch.device('cpu'))
model.load_state_dict(state_dict['model_state_dict'])
model.eval()
history = state_dict

In [14]:
test_img_paths = glob("traffic_cams/datasets/to_kaggle/test_images/images/*.jpg")  # Replace the path here
transform = transforms.Compose([
    transforms.Resize((model_img_height, model_img_width)),
    transforms.ToTensor()
])
model.to(device)
out_data = {'image_id':[], 
            'confidence':[],
            'class_name':[],
            'x_min':[],
            'y_min':[],
            'x_max':[],
            'y_max':[]}
for i in range(len(test_img_paths)):
    path = test_img_paths[i]
    img_id = Path(path).stem
    image = Image.open(path,mode='r') 
    transf_image = transform(image).unsqueeze(0).to(device)
    pred_targets = model.forward(transf_image).cpu().detach()
    class_ids, confidences, bboxes = targets_to_results(pred_targets[0], anchors, len(classes), stride=stride,
                                    model_h=model_img_height, model_w=model_img_width, H=720, W=1280)
    class_ids, confidences, bboxes = nms_xywh(class_ids, confidences, bboxes, score_threshold=0.25, iou_threshold=0.2)
    for i in range(len(class_ids)):
        class_id = class_ids[i]
        conf = confidences[i]
        x, y, w, h = bboxes[i]
        class_id = int(class_id)
        out_data['image_id'].append(img_id)
        out_data['confidence'].append(conf)
        out_data['class_name'].append(classes[class_id])
        out_data['x_min'].append(x-w/2)
        out_data['y_min'].append(y-h/2)
        out_data['x_max'].append(x+w/2)
        out_data['y_max'].append(y+h/2)
        
df = pd.DataFrame(out_data)

# Generate submission here
df.to_csv(f'traffic_cams/baseline_submission.csv', index_label='ID', index=True)