In [21]:
#import relevant libraries
import cv2
import random
import pandas as pd
import numpy as np
import json
import re
import csv
from datetime import datetime, timedelta

import os
from xml.etree.ElementTree import parse, Element, SubElement, ElementTree
import xml.etree.ElementTree as ET
from collections import defaultdict
import nltk
from nltk.corpus import wordnet
import shutil

import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from pathlib import Path
from PIL import Image, ImageOps
import tensorflow
import glob
sns.set_style('white')
sns.set_palette(sns.color_palette('Paired'))


## Random Sample 200 txt files and respective image files

In [2]:
ANNOT_TXT_PATH=r'C:\Users\mpe_03\Desktop\fiftyone_kitti\train\labels'
SAMPLE_ANNOT_TXT_PATH=r'C:\Users\mpe_03\Desktop\Fifty1\sample_kitty200_txt'
IMG_PATH=r'C:\Users\mpe_03\Desktop\fiftyone_kitti\train\data'
SAMPLE_IMG_PATH=r'C:\Users\mpe_03\Desktop\Fifty1\sample_kitty200_img'

#GET TXT SAMPLES
sample_filenames=[]

#create folder or replace any existing folder with new 200 txt samples
if os.path.isdir(SAMPLE_ANNOT_TXT_PATH):
    shutil.rmtree(SAMPLE_ANNOT_TXT_PATH)        
elif os.path.isfile(SAMPLE_ANNOT_TXT_PATH):
    os.remove(SAMPLE_ANNOT_TXT_PATH)
os.mkdir(SAMPLE_ANNOT_TXT_PATH)

rand_filenames=random.sample(os.listdir(ANNOT_TXT_PATH), 200) # numeral => no. of samples
for f in rand_filenames:
    source_path=os.path.join(ANNOT_TXT_PATH, f) #SOURCE PATH
    shutil.copy(source_path, SAMPLE_ANNOT_TXT_PATH) #SAMPLE_ANNOT_TXT_PATH is detination path
    filename=f.split('.')[0]
    sample_filenames.append(filename)


sample_filenames=pd.DataFrame(sample_filenames)
sample_filenames.to_csv('sample_filenames.txt', sep=' ', index=False, header=False) #save filenaames of sample files

sample_filenames=pd.read_csv('sample_filenames.txt', sep='\n', header=None, names=['name'], dtype=str)
sample_filenames=sample_filenames['name'].tolist() # create a list


#GET IMAGE SAMPLES
#create folder or replace any existing folder with new 200 image samples
if os.path.isdir(SAMPLE_IMG_PATH):
    shutil.rmtree(SAMPLE_IMG_PATH)        
elif os.path.isfile(SAMPLE_IMG_PATH):
    os.remove(SAMPLE_IMG_PATH)   
os.mkdir(SAMPLE_IMG_PATH)

for filename in os.listdir(IMG_PATH): 
    filename = filename.split('.')[0]
    if filename in sample_filenames: #if file is in the list
                            
        full_file_path=os.path.join(IMG_PATH, filename + '.png') #source path
        full_path_destination=os.path.join(SAMPLE_IMG_PATH, filename + '.png') #destination path
        shutil.copy(full_file_path, full_path_destination)


## Process the 200 Random Samples (create  csv dataframe)

In [3]:
SAMPLE_ANNOT_TXT_PATH=r'C:\Users\mpe_03\Desktop\Fifty1\sample_kitty200_txt'
SAMPLE_IMG_PATH=r'C:\Users\mpe_03\Desktop\Fifty1\sample_kitty200_img'


def get_kitti(SAMPLE_ANNOT_TXT_PATH):   
    
    filenames=[]
    annot=[]
    annot_fullnames=[]
    image_names=[]
    height=[]
    width=[]

    for txt_file in os.listdir(SAMPLE_ANNOT_TXT_PATH):
        if not txt_file.endswith('txt'):
            continue
        fullname=os.path.join(SAMPLE_ANNOT_TXT_PATH,txt_file)
        txt_file=txt_file.split('.')[0]
        filenames.append(txt_file)

        with open(fullname,'r') as annotData:
            lines=annotData.readlines()
            for line in lines:
                annot.append(line)
                annot_fullnames.append(txt_file) # to attain multiple annotations per file 
            print(lines)
   
            
    # CREATE FILENAME DATAFRAME
    df_filename=pd.DataFrame(annot_fullnames) #filenames from txt label files
    df_filename.rename(columns={0:'filename'}, inplace=True)
    #df_filename=df_filename.assign(filename=lambda x: df_filename['ID']+'.png' )
    #df_filename=df_filename.assign(filename=lambda x: df_filename['ID'])
    
    print(df_filename)


    # CREATE A DIMENSIONS DATAFRAME
    for png_file in os.listdir(SAMPLE_IMG_PATH):

        if not png_file.endswith('png'):
                continue
        image_name=os.path.join(SAMPLE_IMG_PATH,png_file)
        image_filename=png_file.split('.')[0]
        image_names.append(image_filename)

        image=cv2.imread(image_name)
        h,w=image.shape[:2]
        height.append(h)
        width.append(w)

    dims=pd.DataFrame({'filename':image_names,
                       'width':width,                  
                       'height':height})
    
    print(dims)


    # CREATE BBOX DATAFRAME
    df_annot=df_annot=pd.DataFrame(annot)
    df_annot=df_annot[0].str.split(' ', expand =True)  
    df_annot[14]=df_annot[14].str.replace('\n','')    
    df_annot=df_annot.iloc[:,:8]
    
    
    #MERGE THE DATAFRAMES
    df_filename_annot=pd.concat([df_filename,df_annot], axis=1)
    df_filename_annot.columns=['filename','labelname','truncated','occluded','alpha','xmin','ymin','xmax','ymax']
    
    df=pd.merge(df_filename_annot, dims, on='filename', how='outer')
    df=df[['filename', 'width', 'height', 'labelname', 'xmin', 'ymin', 'xmax', 'ymax', 'occluded', 'truncated']]
    
    df.drop_duplicates(keep='first', inplace=True)
    df.dropna(subset=['width','height'], inplace=True)
    df.dropna(subset=['labelname','xmin','ymin','xmax','ymax'], inplace=True)
    cols_int=['width','height', 'xmin', 'ymin', 'xmax', 'ymax', 'occluded', 'truncated']
    df[cols_int]=df[cols_int].replace(np.nan,0)
    df[cols_int]=df[cols_int].astype('float').astype('int64')
    df['labelname']=df['labelname'].str.lower()
    df.labelname.replace({'cyclist':'person',
             'person_sitting':'person',
             'pedestrian':'person',
              'car':'cars',
              'van':'truck'}, inplace=True)
   

    return df


out_df_kitti=get_kitti(SAMPLE_ANNOT_TXT_PATH)
out_df_kitti.to_csv('sample200_kitti.csv', index=False)


['Truck 0.00 1 1.80 633.90 155.08 704.57 211.64 2.66 2.23 6.19 3.16 1.86 37.22 1.89\n', 'Car 0.00 0 -1.67 555.45 174.38 576.17 191.19 1.53 1.59 3.64 -4.22 1.70 68.34 -1.73\n']
['Car 0.00 0 2.12 75.43 192.52 273.65 277.52 1.47 1.63 4.11 -8.95 1.92 15.21 1.59\n', 'Tram 0.00 0 -1.65 694.04 152.62 734.19 188.24 3.33 2.37 23.29 11.16 1.55 79.53 -1.51\n', 'Car 0.00 0 1.98 269.01 176.97 375.72 242.36 1.80 1.61 3.83 -8.66 1.94 21.96 1.60\n', 'Car 0.00 1 1.95 346.60 186.70 419.98 229.04 1.45 1.45 3.40 -8.46 1.99 27.06 1.65\n', 'Car 0.00 1 1.87 387.31 178.52 455.56 221.43 1.75 1.71 4.31 -8.27 2.02 31.88 1.61\n', 'Car 0.00 0 1.80 455.95 181.43 500.99 209.72 1.53 1.66 4.01 -7.54 2.03 41.53 1.62\n', 'Car 0.00 2 1.79 480.03 181.40 517.92 205.67 1.48 1.60 3.90 -7.16 2.05 46.70 1.64\n', 'DontCare -1 -1 -10 532.46 177.22 558.68 194.04 -1 -1 -1 -1000 -1000 -1000 -10\n', 'DontCare -1 -1 -10 654.37 171.38 663.83 177.60 -1 -1 -1 -1000 -1000 -1000 -10\n', 'DontCare -1 -1 -10 642.14 171.49 649.84 176.74 -1 -

['Truck 0.00 0 -1.52 597.11 140.70 649.69 200.66 3.52 2.89 10.81 1.07 1.69 48.13 -1.50\n', 'Car 0.00 2 1.88 405.40 177.53 435.69 194.90 1.58 1.64 3.88 -17.88 2.04 68.19 1.63\n', 'DontCare -1 -1 -10 438.57 175.36 505.49 191.83 -1 -1 -1 -1000 -1000 -1000 -10\n', 'DontCare -1 -1 -10 505.52 172.45 558.86 186.97 -1 -1 -1 -1000 -1000 -1000 -10\n']
['Car 0.00 0 -1.57 596.90 175.17 663.64 239.63 1.61 1.66 3.20 0.52 1.69 19.78 -1.54\n', 'Tram 0.00 0 1.46 719.00 129.26 784.88 200.47 3.63 2.17 14.86 8.39 1.48 44.42 1.65\n', 'Tram 0.00 0 1.50 711.55 143.62 751.59 194.81 3.65 2.28 10.13 9.44 1.62 56.88 1.66\n', 'Pedestrian 0.00 0 1.97 365.33 179.43 378.53 216.92 1.60 0.51 0.87 -10.40 1.89 31.30 1.65\n']
['Car 0.00 0 -1.57 597.43 175.07 625.53 202.13 1.66 1.73 3.05 0.07 1.83 46.35 -1.57\n', 'Truck 0.00 0 -1.70 627.32 162.39 668.63 193.11 2.85 2.63 12.34 3.71 1.95 73.81 -1.65\n', 'Misc 0.00 0 2.14 89.09 156.33 211.47 240.74 2.85 2.02 3.84 -16.74 2.32 26.47 1.58\n', 'Van 0.00 3 1.84 395.45 176.23 440.

    filename  width  height
0     000026   1242     375
1     000045   1242     375
2     000079   1242     375
3     000090   1242     375
4     000111   1242     375
..       ...    ...     ...
195   007329   1241     376
196   007340   1242     375
197   007418   1242     375
198   007460   1242     375
199   007472   1242     375

[200 rows x 3 columns]


## Create XML for 200 Samples

In [5]:

xml_folder = "sample_kitty200_xmls"

if not os.path.exists(xml_folder):
    os.mkdir(save_root2)

# Create dictionary to map new class names
mapping_dict={'cyclist':'person',
             'person_sitting':'person',
             'pedestrian':'person',
              'car':'cars',
              'van':'truck'
             }

def write_xml(folder, filename, bbox_list):
    root = Element('annotation')
    SubElement(root, 'folder').text = folder
    SubElement(root, 'filename').text = filename.split('.')[0]
    SubElement(root, 'path').text = './images/' +  filename.split('.')[0]
    source = SubElement(root, 'source')
    SubElement(source, 'database').text = 'Unknown'


    # Details from first entry
    e_filename, e_width, e_height, e_labelname, e_xmin, e_ymin, e_xmax, e_ymax, occluded, truncated = bbox_list[0]
    
    size = SubElement(root, 'size')
    SubElement(size, 'width').text = e_width
    SubElement(size, 'height').text = e_height
    SubElement(size, 'depth').text = '3'

    SubElement(root, 'segmented').text = '0'

    for entry in bbox_list:
        
        e_filename, e_width, e_height, e_labelname, e_xmin, e_ymin, e_xmax, e_ymax,  occluded, truncated= entry
        
        
# REMOVE UNWANTED CLASS ANNOTATIONS
        if e_labelname in  ['dontcare','misc', 'tram']:
            continue
            
# RENAME SOME OF THE CLASSES
        if e_labelname in mapping_dict:
            e_labelname=mapping_dict[e_labelname]
        
        
        obj = SubElement(root, 'object')
        SubElement(obj, 'name').text = e_labelname
        SubElement(obj, 'pose').text = 'Unspecified'
        SubElement(obj, 'truncated').text = truncated
        SubElement(obj, 'difficult').text = '0'
        SubElement(obj, 'occluded').text = occluded

        bbox = SubElement(obj, 'bndbox')
        SubElement(bbox, 'xmin').text = e_xmin
        SubElement(bbox, 'ymin').text = e_ymin
        SubElement(bbox, 'xmax').text = e_xmax
        SubElement(bbox, 'ymax').text = e_ymax

    #indent(root)
    tree = ET.ElementTree(root)
    ET.indent(tree, space='\t', level=0)
    
    xml_filename = os.path.join('.', folder, os.path.splitext(filename)[0] + '.xml')
    tree.write(xml_filename)
    
    

entries_by_filename = defaultdict(list)

with open('sample200_kitti.csv', 'r', encoding='utf-8') as f_input_csv:
    csv_input = csv.reader(f_input_csv)
    header = next(csv_input)

    for row in csv_input:
        filename, width, height, labelname, xmin, ymin, xmax, ymax,  occluded, truncated = row
        entries_by_filename[filename].append(row) #for whole csv to xml

        
for filename, entries in entries_by_filename.items():
    print(filename, len(entries))
    write_xml(xml_folder, filename, entries)


000026 2
000045 12
000079 10
000090 1
000111 14
000303 9
000382 9
000403 8
000424 15
000433 7
000441 13
000566 4
000651 9
000739 4
000806 3
000819 9
000828 1
000893 6
000967 7
001032 7
001043 16
001120 10
001137 3
001168 5
001218 4
001222 11
001244 11
001250 7
001268 8
001280 9
001300 1
001314 2
001327 3
001337 6
001384 2
001412 14
001416 6
001458 10
001522 17
001530 11
001602 10
001615 3
001623 3
001652 6
001679 6
001691 5
001721 12
001752 4
001762 5
001782 1
001790 5
001855 13
001857 6
001872 8
001885 8
001919 10
001938 6
002072 5
002096 13
002098 6
002106 16
002143 4
002264 9
002364 7
002399 17
002403 2
002411 2
002443 8
002445 3
002489 6
002550 2
002554 1
002592 7
002605 7
002642 1
002653 3
002672 6
002683 16
002696 5
002720 3
002756 7
002767 1
002826 11
002856 5
002953 9
003026 5
003036 5
003051 14
003052 4
003136 8
003141 10
003178 6
003179 8
003203 13
003236 11
003277 1
003283 11
003288 13
003310 14
003345 7
003355 8
003361 8
003366 5
003477 7
003501 6
003514 14
003569 7
003595 

# THE END