In [12]:
import os
from PIL import Image

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split

In [13]:
def load_and_preprocess_dataset(dataset_name='tf_flowers'):
    dataset, info = tfds.load(dataset_name, with_info=True, as_supervised=True)
    
    images = []
    labels = []
    for img, lbl in tfds.as_numpy(dataset['train']):
        img = (img * 255).astype(np.uint8)  # Convert to 8-bit integer
        images.append(img)
        labels.append(lbl)

    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)
    return (X_train, y_train), (X_test, y_test)


def save_images(images, labels, data_folder):
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)

    image_paths = []
    for i, (img, label) in enumerate(zip(images, labels)):
        img = Image.fromarray(img)
        img_path = os.path.join(data_folder, f'image_{i}.png')
        img.save(img_path)
        image_paths.append((img_path, label))

    with open(os.path.join(data_folder, 'labels.txt'), 'w') as f:
        for img_path, label in image_paths:
            f.write(f"{img_path},{label}\n")

In [14]:
(X_train, y_train), (X_test, y_test) = load_and_preprocess_dataset('tf_flowers')


In [None]:
save_images(X_train, y_train, os.path.join("..", "data", "train"))
save_images(X_test, y_test, os.path.join("..", "data", "test"))