In [137]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [138]:
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from PIL import Image
import plotly.subplots as subplots
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing import image_dataset_from_directory
import pandas as pd

In [139]:
def norm_image(arr):
    img_norm = np.array(arr)
    img_norm = (img_norm - np.min(img_norm))/np.ptp(img_norm) # Normalisation of z-values (RGB values; 3rd axis) to between 0 and 1
    return img_norm

DATA_ROOT = './drive/MyDrive/ML2DATA/data'
sample_image = Image.open(f'{DATA_ROOT}/0/0000_age_10_gender_0.jpg')
sample_image.load()
sample_image = np.array(sample_image)

img_norm = norm_image(sample_image)

fig = subplots.make_subplots(rows=1, cols=2, subplot_titles=["Unnormalised (0 to 255)", "Normalised (0 to 1)"], horizontal_spacing=0.1)
fig.append_trace(go.Image(z=sample_image), 1, 1)
fig.append_trace(go.Image(z=img_norm, zmin=[0, 0, 0, 0], zmax=[1, 1, 1, 1]), 1, 2)

fig.update_layout(title_text="Side-by-Side sample image comparison, unnormalised and normalised")

fig.show()

In [140]:
labels_csv = pd.read_csv(f'{DATA_ROOT}/metadata_lookup.csv')

# data = image_dataset_from_directory(DATA_ROOT, labels=None)

print(labels_csv.head())

datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255., validation_split=0.25)
data_generator = datagen.flow_from_dataframe(
    dataframe = labels_csv,
    directory = DATA_ROOT,
    x_col = 'file_name', 
    y_col = list(labels_csv[['age', 'gender']].columns.values), 
    class_mode='multi_output', 
    batch_size=32,
    seed=16,
    shuffle=True,
    # validate_filenames=False
)

print(data_generator.filenames)

b = 0

while b <= data_generator.batch_index:
    data = data_generator.next()
    break

    print(data)

                    file_name  age  gender
0  0/0000_age_10_gender_0.jpg   10       0
1  0/0001_age_10_gender_0.jpg   10       0
2  0/0002_age_10_gender_0.jpg   10       0
3  0/0003_age_10_gender_0.jpg   10       0
4  0/0004_age_10_gender_0.jpg   10       0
Found 5000 validated image filenames.
['0/0000_age_10_gender_0.jpg', '0/0001_age_10_gender_0.jpg', '0/0002_age_10_gender_0.jpg', '0/0003_age_10_gender_0.jpg', '0/0004_age_10_gender_0.jpg', '0/0005_age_10_gender_0.jpg', '0/0006_age_10_gender_0.jpg', '0/0007_age_10_gender_0.jpg', '0/0008_age_10_gender_0.jpg', '0/0009_age_10_gender_0.jpg', '0/0010_age_10_gender_0.jpg', '0/0011_age_10_gender_0.jpg', '0/0012_age_10_gender_0.jpg', '0/0013_age_10_gender_0.jpg', '0/0014_age_10_gender_0.jpg', '0/0015_age_11_gender_0.jpg', '0/0016_age_11_gender_0.jpg', '0/0017_age_11_gender_0.jpg', '0/0018_age_11_gender_0.jpg', '0/0019_age_11_gender_0.jpg', '0/0020_age_11_gender_0.jpg', '0/0021_age_11_gender_0.jpg', '0/0022_age_11_gender_0.jpg', '0/0023_age_1