# Convert all PPM images to JPG format and generate YOLO annotations

### Convert PPM to JPG

In [None]:
#import libraries
import glob
import pandas as pd
import numpy as np
from PIL import Image

Read all the files with the ppm extension, then save them in JPG format

In [None]:
for filename in glob.glob('GTSDB/*.ppm'):
    #get the basename of the file
    name = filename.split("\\")
    basename = name[1].split(".")[0]
    ppm = Image.open(filename)
    ppm.save("GTSDB/images/" + basename + ".jpg")

### Read the ground truth file (.txt), save the information as csv file (for easier manipulation using pandas), and generate YOLO annotations 

Open the file to read an the file to write the annotations

In [None]:
gt_in = open("GTSDB/gt.txt", "r")
gt_out = open("gt_GTSDB.csv", "w") #will open the file if it exists; else it will create a new one

Convert from .txt to .csv

In [None]:
#header of the file
gt_out.write("file,xmin,ymin,xmax,ymax,class\n")
#actual content
for line in gt_in:
    new_line = line.replace(";",",")
    gt_out.write(new_line)

gt_out.close()

Define function to convert the annotations
Note: the size of images is known and does not vary across dataset

In [None]:
def convert (xmin, xmax, ymin, ymax, img_width, img_height):
    xcenter = (xmin + xmax) / img_width / 2
    ycenter = (ymin + ymax) / img_height / 2
    w = (xmax - xmin) / img_width
    h = (ymax - ymin) / img_height
    return xcenter, ycenter, w, h

In [None]:
#define the image sizes (constants)
IMG_WIDTH = 1360
IMG_HEIGHT = 800

In [None]:
df = pd.read_csv('gt_GTSDB.csv')
df.head(10) #show first 10 entries

Unnamed: 0,file,xmin,ymin,xmax,ymax,class
0,00000.ppm,774,411,815,446,11
1,00001.ppm,983,388,1024,432,40
2,00001.ppm,386,494,442,552,38
3,00001.ppm,973,335,1031,390,13
4,00002.ppm,892,476,1006,592,39
5,00003.ppm,742,443,765,466,4
6,00003.ppm,742,466,764,489,9
7,00003.ppm,737,412,769,443,21
8,00004.ppm,898,342,967,409,21
9,00004.ppm,906,407,955,459,2


Convert the existing classes to new class format

In [None]:
#define the new classes
stop = [14] #1
speedlimit = [0,1,2,3,4,5,7,8] #2
crosswalk = [27] #3
danger = [11,18,19,20,21,22,23,24,25,26,28,29,30,31] #4
mandatory = [33,34,35,36,37,38,39,40] #5
prohibitory = [9,10,15,16] #6
others = [6,12,13,17,32,41,42] #7

Create a new .csv file to store the values after conversion and reclassification

In [None]:
new_gt_out = open("gt_GTSDB_yolo.csv", "w")
new_gt_out.write("file,xcenter,ycenter,width,height,class\n")

40

Add content to .csv file and generate annotation files

In [None]:
initial = df['file'][0]
content = ""

for i in range(df.shape[0]):
    current = df['file'][i]
    #if all the annotations of the same file have been read, write the annotation file (.txt)
    #else continue reading
    if (current != initial):
        initial = current
        out_file = open(ann_file,"w")
        out_file.write(content)
        content = ""
     
    #convert the coordinates
    xmin = df['xmin'][i]
    xmax = df['xmax'][i]
    ymin = df['ymin'][i]
    ymax = df['ymax'][i]
    
    xcenter, ycenter, w, h = convert(xmin, xmax, ymin, ymax, IMG_WIDTH, IMG_HEIGHT)
    
    #reclassify
    cls = df['class'][i]
    if cls in stop:
        new_cls = 1
    elif cls in speedlimit:
        new_cls = 2
    elif cls in crosswalk:
        new_cls = 3
    elif cls in danger:
        new_cls = 4
    elif cls in mandatory:
        new_cls = 5
    elif cls in prohibitory:
        new_cls = 6
    elif cls in others: 
        new_cls = 7
    
    #write to csv file
    basename = df['file'][i].split(".")[0]
    new_gt_out.write(basename + ".jpg," + str(xcenter) + "," + str(ycenter) + "," + str(w) + "," + str(h) + "," + str(new_cls) + "\n")
    
    #generate the content od the annotation file
    ann_file = "GTSDB/annotations/" + basename + ".txt"
    content += str(new_cls) + " " + str(xcenter) + " " + str(ycenter) + " " + str(w) + " " + str(h) + "\n"
    
#the last element will not be written, so it will be done separately  
ann_file = "GTSDB/annotations/" + df['file'][df.shape[0]-1].split(".")[0] + ".txt"
out_file = open(ann_file, "w")
out_file.write(content)
out_file.close()

new_gt_out.close()