In [3]:
import torch
from IPython.display import Image  # for displaying images
import os 
import random
import shutil
import xml.etree.ElementTree as ET
from xml.dom import minidom
from tqdm import tqdm
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import cv2
from pathlib import Path 
 
def extract_info_from_xml(xml_file):
    root = ET.parse(xml_file).getroot()
    
    # Initialise the info dict 
    info_dict = {}
    info_dict['bboxes'] = []

    # Parse the XML Tree
    for elem in root:
        # Get the file name 
        if elem.tag == "filename":
            info_dict['filename'] = elem.text
            
        # Get the image size
        elif elem.tag == "size":
            image_size = []
            for subelem in elem:
                image_size.append(int(subelem.text))
            
            info_dict['image_size'] = tuple(image_size)
        
        # Get details of the bounding box 
        elif elem.tag == "object":
            bbox = {}
            for subelem in elem:
                if subelem.tag == "name":
                    bbox["class"] = subelem.text
                    
                elif subelem.tag == "bndbox":
                    for subsubelem in subelem:
                        bbox[subsubelem.tag] = int(subsubelem.text)            
            info_dict['bboxes'].append(bbox)
    
    return info_dict

In [5]:
# Dictionary that maps class names to IDs
class_name_to_id_mapping = {"vehicle":0, "rider":1, "pedestrian":2}

images = []
annotations = []

rootpath = '/root/ubi/UBI_Dataset/'        
for catalog_folder in tqdm(os.listdir(rootpath)):
    file_folder = os.path.join(rootpath, catalog_folder)
    if catalog_folder == 'Annotations':
        file_folder = file_folder + '/All'
        for i in os.listdir(file_folder):
            filepath = os.path.join(file_folder, i)
            for j in os.listdir(filepath):
                annotations.append(os.path.join(filepath, j))
    elif catalog_folder == 'JPEGImages':
        file_folder = file_folder + '/All'
        for i in os.listdir(file_folder):
            print(i)
            filepath = os.path.join(file_folder, i)
            for j in os.listdir(filepath):
                images.append(os.path.join(filepath, j))

images.sort() 
annotations.sort()
print(len(images),len(annotations))
# Split the dataset into train-valid-test splits by 7:2:1 
train_img, val_img, train_anns, val_anns = train_test_split(images, annotations, test_size = 0.2, random_state = 113)
val_img, test_img, val_anns, test_anns = train_test_split(val_img, val_anns, test_size = 0.3, random_state = 113)

print("Training set:", len(train_img), "validation set:", len(val_img), "test set:", len(test_img))
# print(train_img[0], train_anns[0].rstrip('.xml'))


100%|██████████| 4/4 [00:00<00:00, 16.10it/s]


Cityscape_06.mp4
Cityscape_05.mp4
Cityscape_04.mp4
Fog_01.mp4
Cityscape_01.mp4
Cityscape_02.mp4
Cityscape_03.mp4
Rainy_01.mp4
Rainy_02.mp4
62515 62515
Training set: 50012 validation set: 8752 test set: 3751


In [3]:
def move_files_to_folder(list_of_files, destination_folder):
    if not os.path.isdir(destination_folder):
        os.mkdir(destination_folder)
        
    for f in tqdm(list_of_files):
        try:
            shutil.copy(f, destination_folder)
        except:
            print(f)
            assert False

train_files = Path("../Data/train")
train_files.mkdir(parents=True, exist_ok=True)
val_files = Path("../Data/val")
val_files.mkdir(parents=True, exist_ok=True)
test_files = Path("../Data/test")
test_files.mkdir(parents=True, exist_ok=True)

# Move the splits into their folders
move_files_to_folder(train_anns, '../../Data/train/labels')
move_files_to_folder(train_img, '../../Data/train/images')

move_files_to_folder(val_anns, '../../Data/val/labels')
move_files_to_folder(val_img, '../../Data/val/images')

move_files_to_folder(test_anns, '../../Data/test/labels')
move_files_to_folder(test_img, '../../Data/test/images')

100%|██████████| 50012/50012 [06:02<00:00, 138.04it/s]
100%|██████████| 50012/50012 [10:02<00:00, 82.98it/s] 
100%|██████████| 8752/8752 [01:09<00:00, 125.29it/s]
100%|██████████| 8752/8752 [01:41<00:00, 86.03it/s] 
100%|██████████| 3751/3751 [00:32<00:00, 116.60it/s]
100%|██████████| 3751/3751 [00:43<00:00, 86.13it/s] 


In [4]:
rootpath = '/root/ubi/UBI_SSD/Data'        
data = {'train':[], 'val':[], 'test':[]}

for catalog_folder in tqdm(os.listdir(rootpath)):  #train、val、test
    abspath = os.path.join(rootpath, catalog_folder)
    file_folder = os.path.join(rootpath, catalog_folder, 'images')
    for filename in os.listdir(file_folder):        #filename
        data[catalog_folder].append(filename.rstrip('.jpg'))
    data[catalog_folder].sort()

100%|██████████| 3/3 [00:00<00:00, 46.92it/s]


In [5]:
for item in tqdm(data):
    
    with open(f'../Data/{item}/{item}.txt', 'w') as f:
        for line in data[item]:
            f.write(f"{line}\n")

100%|██████████| 3/3 [00:00<00:00, 179.85it/s]
