Sample script to count test files in Donkey Car test set


In [13]:
import os, os.path

# simple version for working with CWD
print( len([name for name in os.listdir('/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01') 
            if os.path.isfile(name)]))

# path joining version for other paths
DIR = '/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01'
print( len([name for name in os.listdir(DIR) 
            if os.path.isfile(os.path.join(DIR, name))]))

1
33540


Sample program that counts the number of jpg and json files using a regular expression

In [1]:
import glob
tubPath = '/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01'
print(len(glob.glob1(tubPath,"*.jpg")))
print(len(glob.glob1(tubPath,"record_*.json")))

16652
16652


Note that there is one addional JSON file for the metadata called meta.json

In [94]:
from pathlib import Path
import glob

tubPath = Path('/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01')
jpgCount = len(glob.glob1(tubPath,"*.jpg"))
print("JPG Count:", "{:,}".format(jpgCount))

JPG Count: 16,766


Now let's look at a sample data file.  We will use the open() and json.load() functions to get the file.

In [13]:
import json
from pprint import pprint

filePath = '/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01/record_1000.json'
json_file_path = Path(filePath)

with open(json_file_path) as f:
    data = json.load(f)

print("Full File:")
pprint(data)

print("")
print("Angle:", data["user/angle"])
print("Throttle:", data["user/throttle"])

Full File:
{'cam/image_array': '1000_cam-image_array_.jpg',
 'timestamp': '2019-01-05 17:09:35.184483',
 'user/angle': 0.18989955357142868,
 'user/mode': 'user',
 'user/throttle': 0.7175781250000001}

Angle: 0.18989955357142868
Throttle: 0.7175781250000001


Here is the Meta file meta.json

In [61]:
import json
from pprint import pprint

filePath = '/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01/meta.json'
json_file_path = Path(filePath)

with open(json_file_path) as f:
    data = json.load(f)

print("Meta File:")
pprint(data)

Meta File:
{'inputs': ['cam/image_array',
            'user/angle',
            'user/throttle',
            'user/mode',
            'timestamp'],
 'types': ['image_array', 'float', 'float', 'str', 'str']}


Now lets calculate the average angle.  We will count the total number of files that match the pattern record_*.json

In [85]:
import os
import json

tubPath = Path('/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01')
json_files = glob.glob1(tubPath,"record_*.json")
sample_files = json_files[:20000]
jsonCount = len(sample_files)
print("Total JSON File Count:", "{:,}".format(jsonCount))

validFileCount = 0
invalidFileCount = 0
sumAngles = 0
sumThrottle = 0
for filename in sample_files:
   try:
        data = json.load(open(os.path.join(tubPath, filename)))
        validFileCount += 1
        sumAngles += data["user/angle"]
        sumThrottle += data["user/throttle"]
   except:
        # print("Error loading:", filename)
        invalidFileCount += 1
   
print("Valid JSON File Count:", "{:,}".format(validFileCount))
print("Invalid JSON File Count:", "{:,}".format(invalidFileCount))
print("Sum of Angles:", "{:,}".format(sumAngles))
print("Sum of Throttle:", "{:,}".format(sumThrottle))
print("Average Angle:", "{:,}".format(sumAngles/validFileCount))
print("Average Throttle:", "{:,}".format(sumThrottle/validFileCount))


print("")

# for filename in sample_files:
    # print(filename)

Total JSON File Count: 16,768
Valid JSON File Count: 16,652
Invalid JSON File Count: 116
Sum of Angles: 5,873.256398065856
Sum of Trottle: 11,250.273293809607
Average Angle: 0.35270576495711364
Average Throttle: 0.6756109352515978



We would now like to remove the JSON files that are invalid.  We also want to remove the associated images.

In [88]:
import os
import json

tubPath = Path('/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01')
json_files = glob.glob1(tubPath,"record_*.json")
sample_files = json_files[:20000]
jsonCount = len(sample_files)
print("Total JSON File Count:", "{:,}".format(jsonCount))

for filename in sample_files:
    path = os.path.join(tubPath, filename)
    try:
        data = json.load(open(path))
    except:
        print("Removing Invalid JSON File", filename)
        os.remove(path)
        # format for image is {$id}_cam-image_array_.jpg
        # id = filename.replace("_cam-image_array_.jpg", "")
        # os.remove(os.path.join('id','_cam-image_array_.jpg'))


Total JSON File Count: 16,652


In [None]:
Here is how we remove a file using the os.remove() function

In [93]:
import os
tubPath = Path('/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01')
os.remove(os.path.join(tubPath, '10704' + '_cam-image_array_.jpg'))

In [None]:
Count Lines in a File

In [105]:
import os

file = open('/Users/dan/Documents/Projects/DonkeyCar/notes/invalid-file-list.txt', 'r')
print(len(file.read()))

683


In [None]:
For each line in the text file that has the ID, verify the file is present and get the file size.  The remove it.
Note that the id.strip() removes the newlines after the file ID.

In [119]:
import os

file = open('/Users/dan/Documents/Projects/DonkeyCar/notes/invalid-file-list.txt', 'r')
lines = file.readlines()
for id in lines:
    filename = '/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01/' + id.strip() + '_cam-image_array_.jpg'
    print(filename)
    print(os.path.isfile(filename))
    print(os.path.getsize(filename))
    os.remove(filename)

/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01/10641_cam-image_array_.jpg
False


FileNotFoundError: [Errno 2] No such file or directory: '/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01/10641_cam-image_array_.jpg'

Now let's list the file sizes of the remaining JPG image files.

In [130]:
import os
import glob

tubPath = Path('/Users/dan/Documents/Projects/DonkeyCar/tub/test-set-01')

sample_files = glob.glob1(tubPath, "*.jpg")[:10]

for filename in sample_files:
    print("File Name:", filename)
    print("Size (Bytes):", os.path.getsize(os.path.join(tubPath, filename)))

File Name: 3937_cam-image_array_.jpg
Size (Bytes): 3496
File Name: 1086_cam-image_array_.jpg
Size (Bytes): 4338
File Name: 6247_cam-image_array_.jpg
Size (Bytes): 3295
File Name: 1202_cam-image_array_.jpg
Size (Bytes): 2620
File Name: 8578_cam-image_array_.jpg
Size (Bytes): 3412
File Name: 16558_cam-image_array_.jpg
Size (Bytes): 3666
File Name: 11799_cam-image_array_.jpg
Size (Bytes): 3917
File Name: 7545_cam-image_array_.jpg
Size (Bytes): 4346
File Name: 4972_cam-image_array_.jpg
Size (Bytes): 3147
File Name: 10068_cam-image_array_.jpg
Size (Bytes): 3946
