In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import configparser
import os
import shutil
import sys
import warnings
import numpy as np
import pandas as pd
import tensorflow as tf

sys.path.append("..")
from utils.data_utils import (
    _parse_function,
    dump_tfrecord,
)

warnings.filterwarnings("ignore")

In [3]:
config = configparser.ConfigParser()
_ = config.read(os.path.join("..", "conf", "config.ini"))

raw_data_path = config["project"]["raw_data_path"]
validate_by_driver = eval(config["project"]["validate_by_driver"])
n_tfrec_chunks = eval(config["project"]["n_tfrec_chunks"])

label_names = {
    "c0": "safe driving",
    "c1": "texting - right",
    "c2": "talking on the phone - right",
    "c3": "texting - left",
    "c4": "talking on the phone - left",
    "c5": "operating the radio",
    "c6": "drinking",
    "c7": "reaching behind",
    "c8": "hair and makeup",
    "c9": "talking to passenger",
}
num_classes = len(label_names)

## *TFRecord* Files Creation

In [4]:
train_raw_data_path = os.path.join(raw_data_path, "imgs", "train")

train_examples = []
for label in os.listdir(train_raw_data_path):
    img_paths = tf.io.gfile.glob(os.path.join(train_raw_data_path, label, "*.jpg"))
    for img_path in img_paths:
        file_name = os.path.join(label, os.path.basename(img_path))
        train_examples.append((img_path, label, file_name))

n_train_examples = len(train_examples)

train_proc_data_path = os.path.join(raw_data_path, "tfrec", "train")
if os.path.exists(train_proc_data_path):
    shutil.rmtree(train_proc_data_path)

if validate_by_driver:
    driver_imgs_list = pd.read_csv(os.path.join(raw_data_path, "driver_imgs_list.csv"))
    driver_imgs_list["img_path"] = (
        driver_imgs_list["classname"] + os.path.sep + driver_imgs_list["img"]
    )
    img_paths_by_driver = (
        driver_imgs_list.groupby("subject")["img_path"].apply(list).to_dict()
    )

    for key, value in img_paths_by_driver.items():
        train_examples_by_driver = [
            example for example in train_examples if example[2] in value
        ]
        np.random.shuffle(train_examples_by_driver)
        dump_tfrecord(
            train_examples_by_driver,
            os.path.join(train_proc_data_path, f"{key}.tfrec"),
            num_classes=num_classes,
        )

else:
    np.random.shuffle(train_examples)
    for i in range(n_tfrec_chunks):
        if i == 0:
            start = 0
        else:
            start = end
        if i == n_tfrec_chunks - 1:
            end = n_train_examples
        else:
            end = (i + 1) * (n_train_examples // n_tfrec_chunks)

        dump_tfrecord(
            train_examples[start:end],
            os.path.join(train_proc_data_path, f"{str(i).zfill(2)}.tfrec"),
            num_classes=num_classes, 
            is_prediction=False,
        )

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1121 [00:00<?, ?it/s]

  0%|          | 0/1125 [00:00<?, ?it/s]

In [5]:
test_raw_data_path = os.path.join(raw_data_path, "imgs", "test")

test_examples = []
img_paths = np.sort(
    tf.io.gfile.glob(os.path.join(test_raw_data_path, "*.jpg"))
).tolist()
for img_path in img_paths:
    file_name = os.path.basename(img_path)
    test_examples.append((img_path, file_name))
    
n_test_examples = len(test_examples)

test_proc_data_path = os.path.join(raw_data_path, "tfrec", "test")
if os.path.exists(test_proc_data_path):
    shutil.rmtree(test_proc_data_path)

n_tfrec_chunks *= 4    
for i in range(n_tfrec_chunks):
    if i == 0:
        start = 0
    else:
        start = end
    if i == n_tfrec_chunks - 1:
        end = n_test_examples
    else:
        end = (i + 1) * (n_test_examples // n_tfrec_chunks)

    dump_tfrecord(
        test_examples[start:end],
        os.path.join(test_proc_data_path, f"{str(i).zfill(2)}.tfrec"),
        is_prediction=True,
    )

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/996 [00:00<?, ?it/s]

  0%|          | 0/1042 [00:00<?, ?it/s]