In [None]:
import tensorflow as tf
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
target_size = (256, 256)

In [None]:
data_path = "../data/B3FD/*/*.jpg"

In [None]:
ds = tf.data.Dataset.list_files(data_path)

In [None]:
fp = ds.take(5)
for f in fp:
    print(tf.strings.split(f, os.sep)[-2:].numpy())
    print(f.numpy())

In [None]:
metadata = pd.read_csv(
    "../data/B3FD_metadata/B3FD_age.csv",
    sep=" ",
    header=None,
    names=["filepath", "age"],
)

In [None]:
metadata.head()

In [None]:
def decode_image(raw_img):
    img = tf.image.decode_jpeg(raw_img, channels=3)
    img = tf.image.resize(img, [*target_size])
    img.set_shape([*target_size, 3])
    return img

In [None]:
def get_label(file_path):
    # Convert the path to a list of path components
    parts = tf.strings.split(file_path, os.path.sep)[-2:]
    identifier = tf.strings.join(parts, os.sep)
    label = metadata[metadata.filepath == identifier.numpy().decode("utf-8")][
        "age"
    ].iloc[0]
    return label

In [None]:
def process_path(file_path):
    label = tf.py_function(get_label, inp=[file_path], Tout=tf.int64)
    # Load the raw data from the file as a string
    raw = tf.io.read_file(file_path)
    img = decode_image(raw)
    return img, label


ds = ds.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)

In [None]:
for img, label in ds.take(1).as_numpy_iterator():
    print("Image Shape:", img.shape)
    print("Label:", label)

In [None]:
plt.figure(figsize=(8, 8))
for i, (image, label) in enumerate(ds.take(9)):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(image.numpy().astype("int32"))
    plt.title(int(label))
    plt.axis("off")
plt.tight_layout()