This program is used to convert the LISA Dataset CSV format to other custom dataset formats:  
* Detectron2 for Faster R-CNN  
* Darknet for <span style="color:red">YOLOv4</span>


In [6]:
#@title connect to drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#@title specify paths for conversion
#@markdown path to LISA directory
LISA_path = "/content/drive/MyDrive/thesis/LISA dataset/" #@param {type:"string"}
#@markdown path where Detectron2 directory will be made
COCO_path = "/content/drive/MyDrive/thesis/COCO" #@param {type:"string"}
#@markdown path where Darknet directory will be made
YOLO_path = "/content/drive/MyDrive/thesis/YOLO/dataset" #@param {type:"string"}

In [None]:
#@title import common python libraries
import numpy as np
import os, json, cv2, random, time, ntpath, csv
from shutil import copyfile
from google.colab.patches import cv2_imshow

In [None]:
#@title specify class names
class_names =(
"""pedestrianCrossing
signalAhead
speedLimit35
speedLimit25
keepRight
addedLane
merge
yield
laneEnds
stopAhead
speedLimit45
speedLimit30
school""")
print(class_names)

pedestrianCrossing
signalAhead
speedLimit35
speedLimit25
keepRight
addedLane
merge
yield
laneEnds
stopAhead
speedLimit45
speedLimit30
school


#Detectron2 Custom dataset format
Detectron2 dataset format explained in the detectron2 manual found [here](https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html)  
Note: the bbox_mode is set to 0 but should be BoxMode.XYXY_ABS. This is because when saved to JSON it will be converted to 0 anyways and can't be loaded as a detectron2 object. Instead the bbox_mode should be set to the correct mode when loaded from file.  


In [None]:
#@title create switch function for getting name index, if -1 error
#get unique category id for each sign type
#returns negative if not valid
#these are the classes with 200 or more instances
def get_category_num(category):
    text=class_names
    switcher={}
    i=0
    for field in text.split("\n"):
      switcher[field]=i
      i=i+1;
    #print switcher to confirm that it is formatted correctly

    return switcher.get(category, -1)

In [None]:
# this code was made to generate dict file to let trainer know the metadata for each image and annotations for each

#function to format dict for training images
def get_sign_dicts(img_dir):
    
    #all files stored in annotation file
    csv_file = os.path.join(img_dir, "allAnnotations.csv")

    with open(csv_file, newline='') as f:
      reader = csv.DictReader(f, dialect='excel', delimiter=';')
      dataset_dicts=[]
      picc=0 #picture counter
      for row in reader:
          category_id = get_category_num(row['Annotation tag'])
          # category id must be valid to add to dict
          if category_id != -1:
              record = {}
              
              filename = os.path.join(img_dir, row["Filename"])
              height, width = cv2.imread(filename).shape[:2]

              record["file_name"] = filename
              record["image_id"] = row["Filename"] # all images have unique filename
              record["height"] = height
              record["width"] = width

              #generate annotations
              obj = {
                  "bbox": [int(row["x1"]), int(row["y1"]), int(row["x2"]), int(row["y2"])],
                  "bbox_mode": 0, #should be BoxMode.XYXY_ABS but will be set on load instead
                  "category_id": category_id
              }
              record["annotations"] = [obj]

              #logging
              print(picc, record["image_id"])
              picc+=1 #1 more picture counted

              dataset_dicts.append(record)
              
      return dataset_dicts

#create dict
data = get_sign_dicts(LISA_path)

#w+ option for open forces file to be made if dne
#write to file the full data in COCO format
with open(os.path.join(LISA_path, "COCO_Annotations_Full.json"), "w+") as outfile:
    json.dump(data, outfile)

# seperate into different subsets: train, test and valid

In [None]:
#@title create json file for Test, Train and Valid sets from Full json

#load data from json
with open(os.path.join(LISA_path, "COCO_Annotations_Full.json"), "r") as outfile:
    data = json.load(outfile)

#shuffle data
random.shuffle(data)

#split data into test and train
test_ratio = 10 #10% test data
test_bp = len(data) // 10
test = data[:test_bp ]
interm_data = data[ test_bp:] #intermediate data to create train and valid set

# do a k partition of train data
k = 4
valid_bp = len(interm_data) // k
valid = interm_data[:valid_bp]
train = interm_data[valid_bp:]

#write results to files
#w+ option for open forces file to be made if dne
#write to file the full data in COCO format
with open(os.path.join(LISA_path, "COCO_Annotations_Train.json"), "w+") as outfile:
    json.dump(train, outfile)

with open(os.path.join(LISA_path, "COCO_Annotations_Valid.json"), "w+") as outfile:
    json.dump(valid, outfile)

with open(os.path.join(LISA_path, "COCO_Annotations_Test.json"), "w+") as outfile:
    json.dump(test, outfile)

#Darknet Custom dataset format
Darknet dataset format explained in AlexeyAB github https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects  
This will copy files to new dataset directory and rename them for each partition [train, valid, test]  
Uses files generated for detectron2 dataset format

In [None]:
#@title create yolo dataset from LISA in darknet format

print_time("opening files")

with open(os.path.join(LISA_path, "COCO_Annotations_Test.json"), "r") as outfile:
    test = json.load(outfile)
    outfile.close()

with open(os.path.join(LISA_path, "COCO_Annotations_Valid.json"), "r") as outfile:
    valid = json.load(outfile)
    outfile.close()

with open(os.path.join(LISA_path, "COCO_Annotations_Train.json"), "r") as outfile:
    train = json.load(outfile)
    outfile.close()

#use train and test to create YOLO format dataset

print_time("creating class.names file")

#class names for yolov4 format
names = open( os.path.join(YOLO_path,"class.names"), "w+" )
names.write(class_names)
names.close()

print_time("creating dataset folders")
#make directories for test and train pictures
train_dir  = os.path.join(YOLO_path, "train")
valid_dir  = os.path.join(YOLO_path, "valid")
test_dir   = os.path.join(YOLO_path, "test")
backup_dir = os.path.join(YOLO_path, "backup")

for x in [train_dir, valid_dir, test_dir, backup_dir]:
  os.mkdirs(x,exist_ok=True)

print_time("finished creating dataset folders")

## this function generates a darknet format of the dataset
## this uses a pytorch-yolov4 format so i made a second function to fix it
#ipath - initial path
#dpath - destination path
#data  - coco data generated for faster RCNN
#sel   - selection, train or test
def generate_files_darknet(ipath, dpath, data, sel):
    for row in data:
        category_id = row["annotations"][0]["category_id"]

        img_path = row["file_name"] #gives relative path to file
        
        #calculate width/height and centers
        x1 = row["annotations"][0]["bbox"][0]
        y1 = row["annotations"][0]["bbox"][1]
        x2 = row["annotations"][0]["bbox"][2]
        y2 = row["annotations"][0]["bbox"][3]
        img_width  = row["width"]
        img_height = row["height"]

        center_x = (x2+x1) / (2 * img_width)
        center_y = (y2+y1) / (2 * img_height)
        width  = (x2-x1) / img_width
        height = (y2-y1) / img_height

        #get the file name from the path
        filename_ext = ntpath.basename(img_path)
        filename     = os.path.splitext(filename_ext)[0]
        
        #create the text file from the file name
        text_path = os.path.join(dpath, sel, filename+".txt")

        temp   = [category_id, center_x, center_y, width, height]
        output = ' '.join(map(str, temp))
        print(output)
        text_file = open(text_path, "w+")
        text_file.write(output)
        text_file.close()

        #copy image to path
        start_path = os.path.join(ipath, img_path)           #src path to pict
        dest_path  = os.path.join(dpath, sel, filename_ext)  #dest path of pict
        copyfile(start_path, dest_path)                      #copy file to dest


   
print_time("copying pictures and creating txt files in new folders")
#copy pictures and data to new folders
generate_files_darknet(LISA_path, YOLO_path, train, "train")
generate_files_darknet(LISA_path, YOLO_path, valid, "valid")
generate_files_darknet(LISA_path, YOLO_path, test, "test")
print_time("finished copying and generating files")

print_time("creating txt files for image paths")
#create test and train text files
!ls "$test_dir/"*.png  >  "$yolo_path\test.txt"
!ls "$valid_dir/"*.png  >  "$yolo_path\valid.txt"
!ls "$train_dir/"*.png >  "$yolo_path\train.txt"
print_time("finished creating txt files")

In [70]:
import cv2, os, json
from google.colab.patches import cv2_imshow

YOLO_path = "/content/drive/MyDrive/thesis/YOLO/dataset"
filename = os.path.join(YOLO_path, 'valid', f+'.png')

flist = ['pedestrian_1323896918.avi_image17',
         'addedLane_1331865841.avi_image2',
         'speedLimit25_1333396150.avi_image3',
         'speedLimit35_1333397516.avi_image3',
         'speedLimit30_1333395349.avi_image13',
         'speedLimit45_1333393955.avi_image5',
         'pedestrianCrossing_1333395980.avi_image5',
         'laneEnds_1333394350.avi_image10',
         'merge_1331866392.avi_image22',
         'keepRight_1323823831.avi_image3',
         'stopAhead_1323819280.avi_image9',
         'school_1330547188.avi_image3',
         'yield_1323802820.avi_image0'
         ]

LISA_path = "/content/drive/MyDrive/thesis/LISA dataset/"
with open(os.path.join(LISA_path, "COCO_Annotations_Full.json"), "r") as outfile:
    data = json.load(outfile)

for row in data:
  for f in flist:
    if f in row["file_name"]:
      imgdata = row
      #calculate width/height and centers
      x1 = imgdata["annotations"][0]["bbox"][0]
      y1 = imgdata["annotations"][0]["bbox"][1]
      x2 = imgdata["annotations"][0]["bbox"][2]
      y2 = imgdata["annotations"][0]["bbox"][3]

      image = cv2.imread(imgdata["file_name"])
      cv2.imwrite("/content/drive/MyDrive/thesis/signimages/"+f.split('_')[0]+'.png' ,  image[y1:y2, x1:x2,:])
      print(f.split('_')[0])   



school
addedLane
merge
speedLimit45
laneEnds
laneEnds
speedLimit30
pedestrianCrossing
pedestrianCrossing
speedLimit25
speedLimit35
yield
stopAhead
keepRight
pedestrian
