Notebook to better understand TFRecords

In [1]:
path_tfrecord = './data/waymo/segment-1005081002024129653_5313_150_5333_150_with_camera_labels.tfrecord'

In [58]:
from PIL import ImageStat, Image
import numpy as np
import pandas as pd
import seaborn as sns

from collections import Counter
import tensorflow as tf

In [59]:
def get_perceived_brightness(image):
    # convert image to PIL object to calculate the perceived brightness using the formula 
    # (0.21 x R) + (0.72 x G) + (0.07 x B)
    # INPUT: we expect the input as a numpy array
    im = Image.fromarray(np.uint8(image))
    stat = ImageStat.Stat(im)
    r,g,b = stat.mean
    return np.sqrt((0.21*(r**2) + 0.72*(g**2) + 0.07*(b**2)))

In [60]:
def num_objects(path_tfrecord):
    '''This function counts the number of classes that are present in a tfrecord
       and returns a dic of the classes values
    '''
    # Read the information from a single tfrecord
    raw_dataset = tf.data.TFRecordDataset(path_tfrecord)
    # Initialize Counter of Classes to zero
    count_classes = { 1: 0, 2: 0,  4: 0 }
    brightness = []
    
    # Iterate to each element of the tfrecord
    for i, raw_record in enumerate(raw_dataset):
        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
        # Iterate to each feature item from the tfrecord
        for k, v in example.features.feature.items():
            # extract the information from the class labels
            if k == 'image/object/class/label':
                values = v.int64_list.value
                count_classes= Counter(count_classes)+ Counter(values)
                
            if k == 'image/encoded':
                # the image is encoded as bytes and originally a jped. For that reason we use the tensorflow
                # method tf.image.decode_jpeg from the tf.io API
                image = tf.image.decode_jpeg(v.bytes_list.value[0]).numpy()
                brightness.extend([get_perceived_brightness(image)])
                
                
        frames = i
    brightness = sum(brightness)/len(brightness)
    return count_classes,frames, brightness

In [63]:
classes_list = []
frames_list = []
fileName_list = []
brightness_list = []

for file in glob.glob('./data/waymo/segment*.tfrecord'):
    count_classes, frames, brightness = num_objects(file)
    classes_list.extend([count_classes])
    frames_list.extend([frames])
    brightness_list.extend([brightness])
    fileName_list.extend([file])
    

Create a dataframe out of the information for simpler handling

In [88]:
vehicles = []
pedestrians = []
cyclists = []
for idx in range(len(fileName_list)):
    vehicles.extend([classes_list[idx][1]])
    pedestrians.extend([classes_list[idx][2]])
    cyclists.extend([classes_list[idx][4]])

In [93]:
dict = {'Vehicles': vehicles,
       'Pedestrians': pedestrians,
       'Cyclists': cyclists,
       'brightness_mean': brightness_list,
       'fileName': fileName_list,
       'frames': frames}

In [98]:
df = pd.DataFrame(dict)
df.set_index('fileName', inplace = True)

In [145]:
pd.set_option('display.max_rows', None)
df

Unnamed: 0_level_0,Vehicles,Pedestrians,Cyclists,brightness_mean,frames
fileName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
./data/waymo/segment-10724020115992582208_7660_400_7680_400_with_camera_labels.tfrecord,491,36,116,17.243861,197
./data/waymo/segment-10206293520369375008_2796_800_2816_800_with_camera_labels.tfrecord,881,78,0,20.66878,197
./data/waymo/segment-10082223140073588526_6140_000_6160_000_with_camera_labels.tfrecord,2799,539,0,21.98162,197
./data/waymo/segment-10975280749486260148_940_000_960_000_with_camera_labels.tfrecord,1079,0,0,28.281604,197
./data/waymo/segment-10241508783381919015_2889_360_2909_360_with_camera_labels.tfrecord,1009,440,0,30.923921,197
./data/waymo/segment-10107710434105775874_760_000_780_000_with_camera_labels.tfrecord,621,0,0,43.795894,197
./data/waymo/segment-10226164909075980558_180_000_200_000_with_camera_labels.tfrecord,4239,1546,0,46.708783,197
./data/waymo/segment-10584247114982259878_490_000_510_000_with_camera_labels.tfrecord,1155,0,0,52.578842,197
./data/waymo/segment-11355519273066561009_5323_000_5343_000_with_camera_labels.tfrecord,2346,0,0,76.277924,197
./data/waymo/segment-11070802577416161387_740_000_760_000_with_camera_labels.tfrecord,5359,7395,0,79.930645,197


In [110]:
%matplotlib notebook

fig, axs = plt.subplots(3,1,figsize = (10,8))
sns.histplot(df['Vehicles'], ax = axs[0])
sns.histplot(df['Pedestrians'], ax = axs[1])
sns.histplot(df['Cyclists'], ax = axs[2])
plt.tight_layout()

<IPython.core.display.Javascript object>

In [112]:
plt.figure()
sns.histplot(df['brightness_mean'])

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='brightness_mean', ylabel='Count'>

Let's see what tfrecords are taken during the night

In [115]:
df.sort_values(by = 'brightness_mean', inplace = True)

In [155]:
df_night = df[df['brightness_mean']<70]
num_night_scenes = len(df_night)
print("There are " + str(num_night_scenes) + ' night scenes')
df_night

There are 8 night scenes


Unnamed: 0_level_0,Vehicles,Pedestrians,Cyclists,brightness_mean,frames
fileName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
./data/waymo/segment-10724020115992582208_7660_400_7680_400_with_camera_labels.tfrecord,491,36,116,17.243861,197
./data/waymo/segment-10206293520369375008_2796_800_2816_800_with_camera_labels.tfrecord,881,78,0,20.66878,197
./data/waymo/segment-10082223140073588526_6140_000_6160_000_with_camera_labels.tfrecord,2799,539,0,21.98162,197
./data/waymo/segment-10975280749486260148_940_000_960_000_with_camera_labels.tfrecord,1079,0,0,28.281604,197
./data/waymo/segment-10241508783381919015_2889_360_2909_360_with_camera_labels.tfrecord,1009,440,0,30.923921,197
./data/waymo/segment-10107710434105775874_760_000_780_000_with_camera_labels.tfrecord,621,0,0,43.795894,197
./data/waymo/segment-10226164909075980558_180_000_200_000_with_camera_labels.tfrecord,4239,1546,0,46.708783,197
./data/waymo/segment-10584247114982259878_490_000_510_000_with_camera_labels.tfrecord,1155,0,0,52.578842,197


Let's plot one frame of the one with the highest brightness to verify

In [156]:
file2load = df_night.iloc[-1].name

In [157]:
# Read the information from a single tfrecord
raw_dataset = tf.data.TFRecordDataset(file2load)
    
# Iterate to each element of the tfrecord
for raw_record in raw_dataset.take(1):
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    # Iterate to each feature item from the tfrecord
    for k, v in example.features.feature.items():
        # extract the information from the class labels                
            if k == 'image/encoded':
                # the image is encoded as bytes and originally a jped. For that reason we use the tensorflow
                # method tf.image.decode_jpeg from the tf.io API
                image = tf.image.decode_jpeg(v.bytes_list.value[0]).numpy()
                plt.figure(figsize = (7,7))
                plt.imshow(image)

<IPython.core.display.Javascript object>

# Creation of Splits
For the creation of the different splits we will shuffle the data using scikit-learn function `shuffle`

In [160]:
from sklearn.utils import shuffle
df = shuffle(df)
df

Unnamed: 0_level_0,Vehicles,Pedestrians,Cyclists,brightness_mean,frames
fileName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
./data/waymo/segment-10770759614217273359_1465_000_1485_000_with_camera_labels.tfrecord,6188,0,0,107.923061,197
./data/waymo/segment-1005081002024129653_5313_150_5333_150_with_camera_labels.tfrecord,2799,272,0,114.560581,197
./data/waymo/segment-1191788760630624072_3880_000_3900_000_with_camera_labels.tfrecord,3700,0,0,141.058816,197
./data/waymo/segment-10235335145367115211_5420_000_5440_000_with_camera_labels.tfrecord,6781,218,26,100.567479,197
./data/waymo/segment-10500357041547037089_1474_800_1494_800_with_camera_labels.tfrecord,5242,196,0,93.579215,197
./data/waymo/segment-10241508783381919015_2889_360_2909_360_with_camera_labels.tfrecord,1009,440,0,30.923921,197
./data/waymo/segment-10206293520369375008_2796_800_2816_800_with_camera_labels.tfrecord,881,78,0,20.66878,197
./data/waymo/segment-11070802577416161387_740_000_760_000_with_camera_labels.tfrecord,5359,7395,0,79.930645,197
./data/waymo/segment-10793018113277660068_2714_540_2734_540_with_camera_labels.tfrecord,2931,157,17,104.142669,197
./data/waymo/segment-10444454289801298640_4360_000_4380_000_with_camera_labels.tfrecord,5047,2750,89,106.994897,197


We will take to start with the classical approach of 60% / 20% / 20% and see how the distribution looks like

In [161]:
number_tf_records = len(df)

In [162]:
number_tf_records

92

In [183]:
# calculate lengths
train_len = int(number_tf_records * 0.6)
val_len = int(number_tf_records * 0.2)

# extract dataFrames
df_train = df[:train_len]
df_val = df[train_len: (train_len + val_len)]
df_test = df[(train_len + val_len):]

Plot the distributions

In [190]:
fig, axs = plt.subplots(3,1,figsize = (10,8))
sns.kdeplot(df_train['Vehicles'], ax = axs[0], label = 'train')
sns.kdeplot(df_val['Vehicles'], ax = axs[0], label = 'validation')
sns.kdeplot(df_test['Vehicles'], ax = axs[0], label = 'test')
axs[0].legend()

sns.kdeplot(df_train['Pedestrians'], ax = axs[1], label = 'train')
sns.kdeplot(df_val['Pedestrians'], ax = axs[1], label = 'validation')
sns.kdeplot(df_test['Pedestrians'], ax = axs[1], label = 'test')
axs[1].legend()

sns.kdeplot(df_train['Cyclists'], ax = axs[2], label = 'train')
sns.kdeplot(df_val['Cyclists'], ax = axs[2], label = 'validation')
sns.kdeplot(df_test['Cyclists'], ax = axs[2], label = 'test')
axs[2].legend()

plt.tight_layout()

<IPython.core.display.Javascript object>

In [191]:
plt.figure()
sns.kdeplot(df_train['brightness_mean'])
sns.kdeplot(df_val['brightness_mean'])
sns.kdeplot(df_test['brightness_mean'])

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='brightness_mean', ylabel='Density'>

# Conclusion
From the probability distributions it can be observed that the shufle procedure create similar datasets for training, validation and test. It will be necessary to evaluate later on, whether it is sufficient or if we have to make use of data augmentation to increase the features that are currently under-represented

In [196]:
# Save the split in a file for later use in create_split.py

# train
file = open('train.txt', 'w')
for row in range(len(df_train)):
    file.write(df_train.iloc[row].name + '\n')
    
file.close()

# validation
file = open('validation.txt', 'w')
for row in range(len(df_val)):
    file.write(df_val.iloc[row].name + '\n')
    
file.close()

# test
file = open('test.txt', 'w')
for row in range(len(df_test)):
    file.write(df_test.iloc[row].name + '\n')
    
file.close()

In [201]:
file = open('train.txt', 'r')
for line in file:
    print(line)

file.close()

./data/waymo/segment-10770759614217273359_1465_000_1485_000_with_camera_labels.tfrecord

./data/waymo/segment-1005081002024129653_5313_150_5333_150_with_camera_labels.tfrecord

./data/waymo/segment-1191788760630624072_3880_000_3900_000_with_camera_labels.tfrecord

./data/waymo/segment-10235335145367115211_5420_000_5440_000_with_camera_labels.tfrecord

./data/waymo/segment-10500357041547037089_1474_800_1494_800_with_camera_labels.tfrecord

./data/waymo/segment-10241508783381919015_2889_360_2909_360_with_camera_labels.tfrecord

./data/waymo/segment-10206293520369375008_2796_800_2816_800_with_camera_labels.tfrecord

./data/waymo/segment-11070802577416161387_740_000_760_000_with_camera_labels.tfrecord

./data/waymo/segment-10793018113277660068_2714_540_2734_540_with_camera_labels.tfrecord

./data/waymo/segment-10444454289801298640_4360_000_4380_000_with_camera_labels.tfrecord

./data/waymo/segment-10975280749486260148_940_000_960_000_with_camera_labels.tfrecord

./data/waymo/segment-102319