In [1]:
import torch
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

In [2]:
# Parameters

data_path = './data/UCSD_Anomaly_Dataset.v1p2/UCSDped1/'
output = './output/UCSDped1/'

test_path = data_path + 'Test/'
train_path = data_path + 'Train/'
output_test_path = output + 'Test/'
output_train_path = output + 'Train/'

yolo_detection_conf = 0.4
DAE_in_size = 64

In [3]:
# load the model
model = torch.hub.load('ultralytics/yolov5', 'yolov5x')

# define confidence of the model
model.conf = yolo_detection_conf

# define classes to detect
# select this list based on the objects that you want to detect

# UCSD ped1 (refined for the objects of interest)
model.classes = [0, 1, 2, 3, 7, 24, 26, 36]
# UCSD ped2 (refined for the objects of interest)
#model.classes = [0,1,7,36]

Using cache found in C:\Users\basarbatu/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2021-11-3 torch 1.10.0+cu113 CUDA:0 (NVIDIA GeForce RTX 2070 SUPER, 8192MiB)

Fusing layers... 
Model Summary: 444 layers, 86705005 parameters, 0 gradients
Adding AutoShape... 


In [4]:
train_folders = [train_path+x for x in os.listdir(train_path) if os.path.isdir(os.path.join(train_path, x))]
test_folders = [test_path+x for x in os.listdir(test_path) if os.path.isdir(os.path.join(test_path, x)) and x[-2:]!='gt']
images = [x for x in os.listdir(train_folders[0]) if x[0]!='.']
input_img_path = train_folders[0] + '/' + images[0]
img = cv2.imread(input_img_path)
print(img.shape)

(158, 238, 3)


In [5]:
d = {'xmin': [], 'ymin': [], 'xmax': [], 'ymax': [], 'confidence':[], 'class':[], 'name':[], 'ori_im_path':[], 'scaled_im_path':[], 'label':[], 'frame_n':[], 'test_train':[]}
data_meta = pd.DataFrame(data=d)

frame_index = 0
yolo_obj_index = 0

# reading train images
for fold in tqdm(train_folders):
    images = [x for x in os.listdir(fold) if x[0]!='.']
    for i in range(0, len(images)):
        # read image
        input_img_path = fold + '/' + images[i]
        img = cv2.imread(input_img_path)
        # make prediction
        results = model(img)
        out = results.pandas().xyxy[0]
        # update common info
        out['test_train'] = 'train'
        out['frame_n'] = frame_index
        out['ori_im_path'] = input_img_path
        # append meta data
        data_meta = data_meta.append(out, ignore_index=True)
        # go through the all detections
        for j in range(0, len(out)):
            # get the cropped image
            xmin = int(out.loc[j, 'xmin'])
            xmax = int(out.loc[j, 'xmax'])
            ymin = int(out.loc[j, 'ymin'])
            ymax = int(out.loc[j, 'ymax'])
            # extract image
            yolo_object_img = img[ymin:ymax, xmin:xmax, :]
            # rescale the cropped image
            resized_yolo_object_img = cv2.resize(yolo_object_img, (DAE_in_size, DAE_in_size), interpolation = cv2.INTER_AREA)
            # save the scaled images
            saved_path = output_train_path + '{:06d}.png'.format(yolo_obj_index)
            cv2.imwrite(saved_path, resized_yolo_object_img)
            data_meta.loc[yolo_obj_index, 'scaled_im_path'] = saved_path
            # update meta data for label
            data_meta.loc[yolo_obj_index, 'label'] = 0
            yolo_obj_index = yolo_obj_index+1
        frame_index = frame_index+1
        
print('Object detection for training images completed...')

100%|██████████████████████████████████████████████████████████████████████████████████| 34/34 [05:05<00:00,  8.99s/it]

Object detection for training images completed...





In [6]:
# reading test images
for fold in tqdm(test_folders):
    images = [x for x in os.listdir(fold) if x[0]!='.']
    for i in range(0, len(images)):
        # read image
        input_img_path = fold + '/' + images[i]
        img = cv2.imread(input_img_path)
        # read annot
        input_annot_path = fold + '_gt/' + images[i].split('.')[0]+'.bmp'
        label = cv2.imread(input_annot_path, cv2.COLOR_BGR2GRAY)
        # for some cases in USCDped2, there label is starting with frame
        if label is None:
            input_annot_path = fold + '_gt/frame' + images[i].split('.')[0]+'.bmp'
            label = cv2.imread(input_annot_path, cv2.COLOR_BGR2GRAY)
        # for some cases there will not be ground truth
        if label is None:
            label = img[:,:,0]*0
        # make prediction
        results = model(img)
        out = results.pandas().xyxy[0]
        # update common info
        out['test_train'] = 'test'
        out['frame_n'] = frame_index
        out['ori_im_path'] = input_img_path
        # append meta data
        data_meta = data_meta.append(out, ignore_index=True)
        # go through the all detections
        for j in range(0, len(out)):
            # get the cropped image
            xmin = int(out.loc[j, 'xmin'])
            xmax = int(out.loc[j, 'xmax'])
            ymin = int(out.loc[j, 'ymin'])
            ymax = int(out.loc[j, 'ymax'])
            # extract image
            yolo_object_img = img[ymin:ymax, xmin:xmax, :]
            yolo_object_label = label[ymin:ymax, xmin:xmax]
            # rescale the cropped image
            resized_yolo_object_img = cv2.resize(yolo_object_img, (DAE_in_size, DAE_in_size), interpolation = cv2.INTER_AREA)
            # save the scaled images
            saved_path = output_test_path + '{:06d}.png'.format(yolo_obj_index)
            cv2.imwrite(saved_path, resized_yolo_object_img)
            data_meta.loc[yolo_obj_index, 'scaled_im_path'] = saved_path
            # update meta data for label
            is_abnormal = np.sum(yolo_object_label/255)/(yolo_object_label.shape[0]*yolo_object_label.shape[1])
            data_meta.loc[yolo_obj_index, 'label'] = is_abnormal
            yolo_obj_index = yolo_obj_index+1
        frame_index = frame_index+1
        
print('Object detection for test images completed...')

100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [13:51<00:00, 23.09s/it]

Object detection for test images completed...





In [7]:
# Save the meta file
data_meta.to_csv(output + 'meta_data.csv', index=False)