In [None]:
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm.autonotebook import tqdm
import imageio

In [None]:
train = ['Chunk_3', 'Chunk_4', 'Chunk_6', 'Chunk_7', 'Chunk_9', 'Chunk_10']
val = ['Chunk_5', 'Chunk_8']
test = ['Chunk_1', 'Chunk_2']

In [None]:
def add_zero(x):

  num = x.split('/')[-1] 
  if len(num) == 2:
    return x
  else:
    return x[:-1] + '0' + num

In [None]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [None]:
def serialize_example(example):
    """Serialize an item in a dataset
    Arguments:
      example {[list]} -- list of dictionaries with fields "name" , "_type", and "data"

    Returns:
      [type] -- [description]
    """
    dset_item = {}
    for key in example.keys():
        dset_item[key] = example[key]["_type"](example[key]["data"])
        example_proto = tf.train.Example(features=tf.train.Features(feature=dset_item))
    return example_proto.SerializeToString()

In [None]:
finished_val = ['99c94dc769b5d96e|2018-07-03--14-38-26', '99c94dc769b5d96e|2018-07-06--11-17-49', '99c94dc769b5d96e|2018-07-05--20-49-41', 
                '99c94dc769b5d96e|2018-07-06--09-43-54', '99c94dc769b5d96e|2018-07-05--10-05-06', '99c94dc769b5d96e|2018-07-06--01-15-15', 
                '99c94dc769b5d96e|2018-07-04--00-20-19', '99c94dc769b5d96e|2018-07-05--19-49-34', '99c94dc769b5d96e|2018-07-05--16-30-16',
                '99c94dc769b5d96e|2018-07-06--12-32-39']

First handle segments

In [None]:
for chunk_dir in glob.glob('/content/drive/Shareddrives/ELEC 494 - Ω2Ω/Data/Comma2k19/*'): # Chunk Directory

  if chunk_dir.split('/')[-1] in test:
    print('In chunk: ', chunk_dir)

    for segment in os.listdir(chunk_dir): # Weird number Directory

        # if segment in finished_val:
        #   continue
        print(f'Currently on {segment}')

        # Prepare List
        speed_list = []
        angle_list = []
        video_list = []

        # Set up files in order
        segment_files = glob.glob(os.path.join(chunk_dir, segment, '*'))
        segment_files = sorted(list(map(add_zero, segment_files))) # Sort the files but need to add a 0 bc for example 11 is considered lower than 9 but not 09
        print('Order of files: ', list(map(lambda x: x.split('/')[-1], segment_files)))

        # Iterate through the numbers within each file
        for segment_number_file in segment_files:
            if segment_number_file.split('/')[-1][0] == '0': # remove the 0's
                segment_number_file = segment_number_file.replace('/0', '/')

            # Load arrays for each number 
            speed_values = np.load(os.path.join(segment_number_file, 'processed_log/CAN/speed/value'), mmap_mode='r')
            speed_values = speed_values.reshape(-1,)
            steering_angle_values = np.load(os.path.join(segment_number_file, 'processed_log/CAN/steering_angle/value'), mmap_mode='r')
            steering_angle_values = steering_angle_values.reshape(-1,)

            # video_numpy = video_to_numpy(os.path.join(segment_number_file, 'video.hevc'))
            try:
              vid = imageio.(os.path.join(segment_number_file, 'video.hevc'), 'ffmpeg')
              data = np.sget_readertack(list(vid.iter_data()))
              video_numpy = tf.image.resize(data, [128, 128]).numpy().astype('uint8')
            except:
              print(f"Couldn't open {os.path.join(segment_number_file, 'video.hevc')}")
              continue

            idxs_speed = np.linspace(0, speed_values.shape[0] - 1, video_numpy.shape[0]).astype("int")
            idxs_angle = np.linspace(0, steering_angle_values.shape[0] - 1, video_numpy.shape[0]).astype("int")

            # Append to list
            speed_list.append(speed_values[idxs_speed])
            angle_list.append(steering_angle_values[idxs_angle])
            video_list.append(video_numpy)

        # Gather all the items within the overall file
        speed_array = np.concatenate(speed_list)
        angle_array = np.concatenate(angle_list)
        frames_array = np.concatenate(video_list)

        num_of_items = frames_array.shape[0]

        # Write TFRecords
        # print(f'Writing {segment} TFRecord File')
        with tf.io.TFRecordWriter(f'/content/drive/Shareddrives/ELEC 494 - Ω2Ω/Data/comma2k19TF/test/{segment}.tfrecord') as writer:

            for row in tqdm(range(num_of_items)):

              speed = speed_array[row]
              angle = angle_array[row]        
              img_resized = frames_array[row]

              fields = {
                  'X': {'data': img_resized.flatten().tobytes(), '_type': _bytes_feature},
                  'speed': {'data': speed, '_type': _float_feature}, 
                  'steering_angle': {'data': angle, '_type': _float_feature},
                  }

              example = serialize_example(fields)
              writer.write(example)

    print('------------------------------------------------------------------------------------------------------------------------------')

In [None]:
len(os.listdir('/content/drive/Shareddrives/ELEC 494 - Ω2Ω/Data/comma2k19TF/test')) + len(os.listdir('/content/drive/Shareddrives/ELEC 494 - Ω2Ω/Data/comma2k19TF/val')) + len(os.listdir('/content/drive/Shareddrives/ELEC 494 - Ω2Ω/Data/comma2k19TF/train'))

In [None]:
directory = '/content/drive/Shareddrives/ELEC 494 - Ω2Ω/Data/comma2k19TF/val'
total_size = 0

for dirpath, dirnames, filenames in os.walk(directory):
    for file in filenames:
        file_path = os.path.join(dirpath, file)

        if not os.path.islink(file_path):
            total_size += os.path.getsize(file_path)

print("Total size:", total_size/1e9, "bytes")

Combine segments into chunks I believe

In [None]:
for chunk in glob.glob('/content/drive/Shareddrives/ELEC 494 - Ω2Ω/Data/Comma2k19/*'): # Chunk Directory

  print('In chunk: ', chunk)
  
  chunk_name = chunk.split('/')[-1]
  if chunk_name in test:
      i = 0
      for item in sorted(glob.glob(f'{chunk}/*/*')):
        
        name = chunk_name + '_' + str(i)

        video_path = os.path.join(item, 'video.hevc')
        angle_path = os.path.join(item, 'processed_log/CAN/steering_angle/value')
        speed_path = os.path.join(item, 'processed_log/CAN/speed/value')

        vid = imageio.get_reader(video_path, 'ffmpeg')
        data = np.stack(list(vid.iter_data()))
        img_data = tf.image.resize(data, [128, 128]).numpy().astype('uint8')

        speed_values = np.load(speed_path,  mmap_mode='r')
        speed_values = speed_values.reshape(-1,)
        angle_values = np.load(angle_path,  mmap_mode='r')
        angle_values = angle_values.reshape(-1,)

        if speed_values.shape[0] != angle_values.shape[0]:
          print(f'Speed and Angle arrays dont have same shape for {item}')
          print(f'speed shape: {speed_values.shape[0]}, angle shape: {angle_values.shape[0]}')

          min_value = min(speed_values.shape[0], angle_values.shape[0])
          idx_speed = np.linspace(0, speed_values.shape[0]-1, min_value).astype("int")
          idx_angle = np.linspace(0, angle_values.shape[0]-1, min_value).astype("int")

          speed_values = speed_values[idx_speed]
          angle_values = angle_values[idx_angle]

        idxs = np.linspace(0, speed_values.shape[0] - 1, img_data.shape[0]).astype("int")

        speed_values = speed_values[idxs]
        angle_values = angle_values[idxs]
        assert speed_values.shape[0] == angle_values.shape[0] == img_data.shape[0], 'Shapes need to be the same'

        num_of_items = img_data.shape[0]
        with tf.io.TFRecordWriter(f'/content/drive/Shareddrives/ELEC 494 - Ω2Ω/Data/comma2k19TF/test/{name}.tfrecord') as writer:

            for row in tqdm(range(num_of_items)):

              speed = speed_values[row]
              angle = angle_values[row]        
              img_resized = img_data[row]
              
              fields = {
                  'X': {'data': img_resized.flatten().tobytes(), '_type': _bytes_feature},
                  'speed': {'data': speed, '_type': _float_feature}, 
                  'steering_angle': {'data': angle, '_type': _float_feature},
                  }

              example = serialize_example(fields)
              writer.write(example)
        i+=1
      break 