In [1]:
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from os import listdir
from os.path import isfile, join
from PIL import Image
from sklearn.preprocessing import StandardScaler
import matplotlib.colors as mcolors

In [2]:
#Reading .csv file containing metadata about the segmentation
print("Reading file containing metadata about the segmentation...")
metadf = pd.read_csv('dataset/categories.csv', sep=',')

#Organizing subcategories into an array, and counting subcategories
subcat = []
no_subcat = 0
for row in metadf.name:
    subcat.append(row)
no_subcat = len(subcat)

#Organizing categories into an array
cat = []
for row in metadf.category:
    cat.append(row)

#Organizing category Ids into an array
catid = []
for row in metadf.catId:
    catid.append(row)
#Counting categories
no_cat = 1
act = catid[0]
categories = [] #array containing categories without duplication
categories.append(cat[0])
for i in range(len(catid)):
    if catid[i]!=act:
        categories.append(cat[i])
        no_cat+=1
        act=catid[i]

#Organizing subcategory RGB colors into an array
col = []
for row in metadf.color:
    c = row.replace(" ", "").split(',')
    rgb = []
    for i in c:
        rgb.append(int(i))
    col.append(rgb)


print('Number of segmentation subcategories:', no_subcat)
print('Number of segmentation categories:', no_cat, "\n")
print("Subcategories and their representational colors [R, G, B]: \n")
for i in range(len(subcat)):
    print("%30s \t" % subcat[i], end ="")
    print(i, "  ", col[i])

Reading file containing metadata about the segmentation...
Number of segmentation subcategories: 41
Number of segmentation categories: 8 

Subcategories and their representational colors [R, G, B]: 

                     unlabeled 	0    [0, 0, 0]
                       dynamic 	1    [111, 74, 0]
                   ego vehicle 	2    [0, 0, 0]
                        ground 	3    [81, 0, 81]
                        static 	4    [0, 0, 0]
                       parking 	5    [250, 170, 160]
                    rail track 	6    [230, 150, 140]
                          road 	7    [128, 64, 128]
                      sidewalk 	8    [244, 35, 232]
                        bridge 	9    [150, 100, 100]
                      building 	10    [70, 70, 70]
                         fence 	11    [190, 153, 153]
                        garage 	12    [180, 100, 180]
                    guard rail 	13    [180, 165, 180]
                        tunnel 	14    [150, 120, 90]
                         wall  

In [3]:
!pip install natsort

Collecting natsort
  Downloading https://files.pythonhosted.org/packages/d8/67/9f795649f1173b18851941e288035695386ee44c33bb0960832550f8a236/natsort-5.5.0-py2.py3-none-any.whl
Installing collected packages: natsort
Successfully installed natsort-5.5.0


In [34]:
#Reading filenames

data_filenames = []
for root, dirs, files in os.walk('dataset/raw_images/'):  
    for filename in files:
        data_filenames.append(filename)

annot_filenames = []
for root, dirs, files in os.walk('dataset/class_color/'):  
    for filename in files:
        annot_filenames.append(filename)
        
catid_annot_filenames = []
for root, dirs, files in os.walk('dataset/catid_annot/'):  
    for filename in files:
        catid_annot_filenames.append(filename)

In [35]:
#checking for files in the corresponding folder
catid_annot_filenames = []
for root, dirs, files in os.walk('dataset/catid_annot/'):  
    for filename in files:
        catid_annot_filenames.append('dataset/catid_annot/'+filename)

#if all the annotationfiles exist, there's no need to create them
if len(catid_annot_filenames) == len(annot_filenames):
    print('Subcategory-Id-Annotation Files Already Exist')

#if not, then the .png annotation files should be loaded, 
#the matrices should be created, and they should be serialized
if len(catid_annot_filenames) != len(annot_filenames):
    print('Subcategory-Id-Annotation Files DO NOT Exist')
    for image in range(len(annot_filenames)): #iterationg over annotation-image filenames
        if os.path.exists('dataset/catid_annot/' + data_filenames[image]):
            print("ez mar kesz")
        else:
            print(image)
            filename = annot_filenames[image]
            #loading .png image, converting it to have RGB channels only
            img = np.array(Image.open('dataset/class_color/' + filename).convert('RGB'))
            catid_annot_img = [] #this is gonna be our new matrice
            for i, row in enumerate(img): #iterating over rows
                catid_row = []
                for j, pixel in enumerate(row): #iterating over pixels
                    catid_row.append(col.index(list(row[j]))) #appending the corresponding subcategory id
                catid_annot_img.append(catid_row) 
         
            #saving the matrices
            np.array(catid_annot_img).tofile('dataset/catid_annot/' + data_filenames[image])
    
        #double checking if all the matrices have been serialized    
        for root, dirs, files in os.walk('dataset/catid_annot/'):  
            for filename in files:
                catid_annot_filenames.append('dataset/catid_annot/'+filename)
        if len(catid_annot_filenames) == len(annot_filenames):
            print('OK')

Subcategory-Id-Annotation Files Already Exist


In [36]:
#Splitting data into train-validation-test parts with ratios 70-20-10
print("Splitting data into training data, validation data, test data")
nb_samples=len(data_filenames)
#Splitting ratios:
valid_split = 0.2
test_split = 0.1
train_split = 0.7
print("The ratios are: ")
print("\t train:\t", train_split )
print("\t validation:\t",valid_split )
print("\t test:\t",test_split)
    
#Splitting
#The serialized annotation files are on the same name but in a different directory,
#so we only need to split one of the arrays.
data_train = np.array(data_filenames[0:int(nb_samples*(1-valid_split-test_split))])
data_valid = data_filenames[int(nb_samples*(1-valid_split-test_split)):int(nb_samples*(1-test_split))]
data_test  = data_filenames[int(nb_samples*(1-test_split)):]

print("\nNumber of training samples:\t", len(data_train))
print("Number of validation samples:\t", len(data_valid))
print("Number of test samples:\t", len(data_test))

Splitting data into training data, validation data, test data
The ratios are: 
	 train:	 0.7
	 validation:	 0.2
	 test:	 0.1

Number of training samples:	 142
Number of validation samples:	 40
Number of test samples:	 21


In [37]:
import tensorflow as tf
from tensorflow.python.client import device_lib

In [38]:
import cv2
import imageio
import json