## Import Statements ##

In [4]:
## Libraries requied for dataset generation ##

import cv2
import os
import numpy as np
import PIL
import glob
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
import torch
from torch.utils.data import Dataset, DataLoader

## Getting raw training data ##

In [7]:
## This is the raw data from UA-DEtrac Dataset ##

directory_list = list()
for root, dirs, files in os.walk("./Insight-MVT_Annotation_Train/", topdown=False):
    for name in dirs:
        directory_list.append(os.path.join(root, name))
directory_list    

['./Insight-MVT_Annotation_Train/MVI_20011',
 './Insight-MVT_Annotation_Train/MVI_20012',
 './Insight-MVT_Annotation_Train/MVI_20032',
 './Insight-MVT_Annotation_Train/MVI_20033',
 './Insight-MVT_Annotation_Train/MVI_20034',
 './Insight-MVT_Annotation_Train/MVI_20035',
 './Insight-MVT_Annotation_Train/MVI_20051',
 './Insight-MVT_Annotation_Train/MVI_20052',
 './Insight-MVT_Annotation_Train/MVI_20061',
 './Insight-MVT_Annotation_Train/MVI_20062',
 './Insight-MVT_Annotation_Train/MVI_20063',
 './Insight-MVT_Annotation_Train/MVI_20064',
 './Insight-MVT_Annotation_Train/MVI_20065',
 './Insight-MVT_Annotation_Train/MVI_39761',
 './Insight-MVT_Annotation_Train/MVI_39771',
 './Insight-MVT_Annotation_Train/MVI_39781',
 './Insight-MVT_Annotation_Train/MVI_39801',
 './Insight-MVT_Annotation_Train/MVI_39811',
 './Insight-MVT_Annotation_Train/MVI_39821',
 './Insight-MVT_Annotation_Train/MVI_39851',
 './Insight-MVT_Annotation_Train/MVI_39861',
 './Insight-MVT_Annotation_Train/MVI_39931',
 './Insigh

### There are 60 different video sequence folders each consisting of image frames ###

In [8]:
len(directory_list)

60

## Getting annotation data / files

In [10]:
## These are the annotation files for each sequence in the UA-Detrac Dataset ##

annotation_file_list = list()
for root, dirs, files in os.walk("./DETRAC-Train-Annotations-XML/", topdown=False):
    for name in files:
        annotation_file_list.append(os.path.join(root, name))
annotation_file_list

['./DETRAC-Train-Annotations-XML/MVI_20011.xml',
 './DETRAC-Train-Annotations-XML/MVI_20012.xml',
 './DETRAC-Train-Annotations-XML/MVI_20032.xml',
 './DETRAC-Train-Annotations-XML/MVI_20033.xml',
 './DETRAC-Train-Annotations-XML/MVI_20034.xml',
 './DETRAC-Train-Annotations-XML/MVI_20035.xml',
 './DETRAC-Train-Annotations-XML/MVI_20051.xml',
 './DETRAC-Train-Annotations-XML/MVI_20052.xml',
 './DETRAC-Train-Annotations-XML/MVI_20061.xml',
 './DETRAC-Train-Annotations-XML/MVI_20062.xml',
 './DETRAC-Train-Annotations-XML/MVI_20063.xml',
 './DETRAC-Train-Annotations-XML/MVI_20064.xml',
 './DETRAC-Train-Annotations-XML/MVI_20065.xml',
 './DETRAC-Train-Annotations-XML/MVI_39761.xml',
 './DETRAC-Train-Annotations-XML/MVI_39771.xml',
 './DETRAC-Train-Annotations-XML/MVI_39781.xml',
 './DETRAC-Train-Annotations-XML/MVI_39801.xml',
 './DETRAC-Train-Annotations-XML/MVI_39811.xml',
 './DETRAC-Train-Annotations-XML/MVI_39821.xml',
 './DETRAC-Train-Annotations-XML/MVI_39851.xml',
 './DETRAC-Train-Ann

In [12]:
## There are 60 annotation files for 60 image sequence folders ##
len(annotation_file_list)

60

## Dataset Generation Code ##

In [None]:
whole_dataset = []
datapoints_pos = []
datapoints_neg = []

## Loop for reading all the image information

for flag in range(0, total_video_seq):
    imageseq_folder_name = directory_list[flag]
    annotation_file_name = annotation_file_list[flag]  
   
    ########Image reading from the directory of image files #######
    
    # In this portion of the code image files are read from the UA-Detrac dataset
    # 
    
    
    #x=0
    #image_batch= []
    frame_num = 0
    seq_image_info = []
    for file in glob.glob("{}/*.jpg".format(imageseq_folder_name)):
        
        image_dict= {}
        #image_batch.append([cv2.imread(file, cv2.IMREAD_COLOR|cv2.IMREAD_IGNORE_ORIENTATION)])
        frame_num+=1
        image_dict['frame_num'] = frame_num
        img = cv2.imread(file, cv2.IMREAD_COLOR|cv2.IMREAD_IGNORE_ORIENTATION)
        image_dict['img'] = img
        image_size= img.size
        image_dict['image_size']= image_size 
        image_shape = img.shape
        image_dict['image_shape']= image_shape
        image_x = image_shape[0]
        image_y = image_shape[1]
        image_dict['image_x'] = image_x
        image_dict['image_y'] = image_y
        image_dict['image_channel'] = image_shape[2]

        seq_image_info.append(image_dict)
        #x+=1
        #if x==10:
         #   break
    print(len(seq_image_info))  # contains all the image information per image frame in a given image sequence 

    ############ Parsing the Image Annotations and Bbox

    tree = ET.parse(annotation_file_name)
    root = tree.getroot()
    #root
    #root.tag
    frames = tree.findall('frame')
    #len(frames)
    
    all_frames_in_sequence = []
    #seq_bbox_info = []
    for frame in frames:
        frame_info = {}

        density = frame.attrib['density'] # string '7'
        frame_num = int(frame.attrib['num']) # string '1'    

        frame_info['frame_num'] = frame_num

        #all_frames_in_sequence.append(frame_num)
        #print("frame_num %s" %frame_num)


        target_list = frame.findall('target_list')
        if len(target_list)>0:
            tl = target_list[0]
            targets = tl.findall('target')
            num_of_targets_in_frame = len(targets)
            #print("number of target %s in frame %s" % (num_of_targets_in_frame, frame_num)

            target_ids = []
            targets_in_frame = []                
            for target in targets:
                target_info ={}

                target_id = int (target.attrib['id'])
                #print("target_id %s" %target_id)
                target_ids.append(target_id)            
                #print(target.find('box').attrib)
                bbox = target.find('box').attrib
                #print(bbox)
                xl = float(bbox['left'])
                yb = float(bbox['top'])
                xr = float(bbox['width'])+xl
                yt = float(bbox['height'])+ yb

                bbox_region = [xl, yb, xr, yt]
                #target_info['frame_num']=frame_num
                target_info['target_id']=target_id
                target_info['bbox_region']= bbox_region

                targets_in_frame.append(target_info)                       
                #print(yt, xr)
            frame_info['frame_targets'] = targets_in_frame
            all_frames_in_sequence.append(frame_info)


            """    
                ### Attributes in target ###
                attribute = target.find('attribute').attrib            
                orientation = attribute['orientation']
                speed = attribute['speed']
                trajectory_length = attribute['trajectory_length']
                truncation_ratio = attribute['truncation_ratio']
                vehicle_type = attribute['vehicle_type']
                oc_ids=[]

                ### Occlusion parameters in target ###
                if target.find('occlusion') != None :
                    occlusion = target.find('occlusion')
                    region_overlap = occlusion.find('region_overlap').attrib
                    oc_id = region_overlap['occlusion_id']
                    oc_status = region_overlap['occlusion_id']
                    oc_xl = region_overlap['left'] 
                    oc_yb = region_overlap['top']
                    oc_xr = oc_xl + region_overlap['width']
                    oc_yt = oc_yb + region_overlap['height']
                    oc_ids.append(oc_id)
                    #print("occlusion %s" %oc_id) 

               """     
            #print(frame_num)
            #print(target_ids)
    print(len(all_frames_in_sequence))
    all_frames_in_sequence

    ######### Image dataset generation by Cropping
    a =0
    all_targets = []
    all_frame = []
    
    for frame in all_frames_in_sequence:

        for frame_image in seq_image_info:

            if (frame['frame_num'] == frame_image['frame_num']):
                frm_info = {}
                tar_in_frame = []
                frm_info['frm_num'] = frame['frame_num']


                for target_image in frame['frame_targets']:
                    trgt_info = {}

                    tar_id = target_image['target_id']
                    tar_bbox = target_image['bbox_region']                
                    x1 = int(tar_bbox[0])
                    x2 = int(tar_bbox[2])
                    y1 = int(tar_bbox[1])
                    y2 = int(tar_bbox[3])

                    frm_img = frame_image['img']
                    tar_image = frm_img[y1:y2, x1:x2]

                    trgt_info['tar_id'] = tar_id
                    trgt_info['tar_image'] = tar_image
                    tar_in_frame.append(trgt_info)
                    all_targets.append(trgt_info)

                frm_info['frm_targets'] = tar_in_frame
        all_frame.append(frm_info)

    #print(len(all_frame))
    
    cnt = 0

    for x in range (0, len(all_frame)-2,2):
        targets_in_fst_frm = all_frame[x]['frm_targets']
        targets_in_scnd_frm = all_frame[x+1]['frm_targets']

        for frst_frm_tar in  targets_in_fst_frm:
            for scnd_frm_tar in targets_in_scnd_frm:

                data_part_pos={}
                data_part_neg= {}

                if (frst_frm_tar['tar_id'] == scnd_frm_tar ['tar_id']):

                    data_part_pos['pos1'] = frst_frm_tar['tar_image']
                    data_part_pos['pos2'] = scnd_frm_tar['tar_image']
                    datapoints_pos.append(data_part_pos)

                elif (frst_frm_tar['tar_id'] != scnd_frm_tar ['tar_id']):

                    data_part_neg['neg1'] =  frst_frm_tar['tar_image']
                    data_part_neg['neg2'] =  scnd_frm_tar['tar_image']
                    datapoints_neg.append(data_part_neg)
#datapoints_neg[0]
#datapoints_pos[0]
#len(datapoints_pos)
#len(datapoints_neg)

## Training, Validation and Test dataset creation from the generated data points ##

In [None]:
train_size = int(0.6 * len(datapoints_pos))
dev_size = int (0.5*(len(datapoints_pos)-train_size))
test_size = len(datapoints_pos) - (train_size+dev_size)
train_dataset, dev_dataset, test_dataset = torch.utils.data.random_split(datapoints_pos, [train_size, dev_size,test_size])

## Image Data set Store ##

In [None]:
#positive pair train image dataset
## for validation and test dataset replace the folder with validation (dev_dataset )or test_dataset folder

x =0
i=0
for dtpoint in train_dataset:
    img1 = dtpoint['pos1']
    img2 = dtpoint['pos2']    
    os.makedirs(os.path.join('D:/Projects/Tracking Project/Datasets/UA-Detrac Dataset/siamese_dataset/train_dataset/', 'point' + str(i)))
    cv2.imwrite("D:/Projects/Tracking Project/Datasets/UA-Detrac Dataset/siamese_dataset/train_dataset/point{}/point_{}_pos1.jpg".format(i,i), img1)
    cv2.imwrite("D:/Projects/Tracking Project/Datasets/UA-Detrac Dataset/siamese_dataset/train_dataset/point{}/point_{}_pos2.jpg".format(i,i), img2)
    i+=1
#     x+=1
#     if x == 10:
#         break



In [2]:
## Checking some of the generated datapoints

In [None]:
i = 0
cv2.imshow("cropped", datapoints_pos[i]['pos1'])
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imshow("cropped", datapoints_pos[i]['pos2'])
cv2.waitKey(0)
cv2.destroyAllWindows()