## Convert VIA annotations export .csv file to RetinaNet .csv format

#### Greg has adapted this from the script Patrick and Maddie used

In [17]:
# import necessary modules
import os
import csv

# set directory to root where files live
os.chdir('C:\\Users\\Greg\\Documents\\GitHub\\GreySealCNN')

# set path to VIA exported CSCV file
via_path = 'data/via_SealCNN_TrainingData.csv'

#### The following loop pulls each annotation, line-by-line, from the VIA exported CSV, extracts the necessary information, reformats it into the format that RetinaNet requires (https://github.com/fizyr/keras-retinanet#annotations-format), end then reassembles a new CSV line-by-line that RetinaNet can receive

In [20]:
#GDL has overhauled this code to ingest annotations exported as CSV from VIA 2.0.10

# Create blank variable for our annotations list as we build it
image_annotations = []

with open(via_path, "r") as f:
    reader = csv.reader(f, delimiter=",")
    for line in reader: 
        # output we want
        # path/to/image.jpg,x1,y1,x2,y2,class_name
        # /data/imgs/img_001.jpg,837,346,981,456,cow
        if 'filename' in line[0]:
            # bypassing comments in csv
            continue
        if '{}' in line[5]:
            #bypassing empty images
            continue
        filename = line[0]
        
        # pulling from column named "region_shape_attributes"
        box_entry = list(str(line[5]).strip('}{').split(','))
        box_entry = [i.split(':')[1] for i in box_entry]
        # strip brackets, split and get only the values we care about, then convert all the string to int 
        top_left_x, top_left_y, width, height = list(map(int,list(map(float, box_entry[1:5]))))
        if width == 0 or height == 0:
            continue
            # skip tiny/empty boxes
        
        # convert from "top left and width/height" to "x and y values at each corner of the box"
        if top_left_x < 0:
            top_left_x = 1
        if top_left_y < 0:
            top_left_y = 1
        x1 = top_left_x
        x2 = top_left_x + width
        y1 = top_left_y
        y2 = top_left_y + height 
        
        # pulling from column named "region_attributes" to get class names
        name = list(str(line[6]).strip('}{').split(':'))[1].strip('"')

        if name == "Unknown":
            continue
            # skip unknown class

        # create the csv row
        new_row = []
        new_row.append(filename)
        new_row.append(x1)
        new_row.append(y1)
        new_row.append(x2)
        new_row.append(y2)
        new_row.append(name)

        image_annotations.append(new_row)

with open('data/annotations.csv', 'w') as fp:
    writer = csv.writer(fp)
    writer.writerows(image_annotations)
    

### Output annotations.csv and classes.csv

In [21]:
with open('data/annotations.csv', 'w', newline='') as fp:
    writer = csv.writer(fp)
    writer.writerows(image_annotations)

In [22]:
# if we were trying to generalize this, we'd be tracking new class names as they show up,
# assigning values and automating construction of our list of classes...
# but with only three classes, screw it, we'll just write them manually

detection_classes = [["Adult", 0], ["Pup", 1]]
with open('data/classes.csv', 'w', newline='') as fp:
    writer = csv.writer(fp)
    writer.writerows(detection_classes)