In [8]:
import os
import pandas as pd
from translate import translate  # your translate.py file
import tensorflow as tf
from sklearn.model_selection import train_test_split
from PIL import Image

# -----------------------------
# 1️⃣ Build DataFrame with translated labels
# -----------------------------
dataset_path = r"C:\Users\rache\Downloads\archive\raw-img"

data = []

for folder in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, folder)
    if os.path.isdir(folder_path):
        label = translate.get(folder, folder)  # translate folder name if available
        for file in os.listdir(folder_path):
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                img_path = os.path.join(folder_path, file)
                data.append([img_path, label])

df = pd.DataFrame(data, columns=["image_path", "label"])
print(df.head())
print("Total images:", len(df))
print(df['label'].value_counts())

# -----------------------------
# 2️⃣ Train / Test Split (90/10)
# -----------------------------
train_df, test_df = train_test_split(
    df,
    test_size=0.1,
    stratify=df['label'],
    random_state=42
)
print("Train size:", len(train_df))
print("Test size:", len(test_df))

# -----------------------------
# 3️⃣ Function to convert DataFrame to tf.data.Dataset
# -----------------------------
IMG_SIZE = 224
BATCH_SIZE = 32

def df_to_dataset(df, shuffle=True, batch_size=32):
    paths = df['image_path'].values
    labels = df['label'].astype('category').cat.codes.values  # string -> integer
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    
    def process_path(path, label):
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
        img = img / 255.0  # normalize pixels to [0,1]
        return img, label
    
    ds = ds.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)
    
    if shuffle:
        ds = ds.shuffle(buffer_size=len(df))
    
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    
    return ds

# -----------------------------
# 4️⃣ Create TensorFlow Datasets
# -----------------------------
train_dataset = df_to_dataset(train_df, batch_size=BATCH_SIZE)
test_dataset = df_to_dataset(test_df, shuffle=False, batch_size=BATCH_SIZE)

# -----------------------------
# 5️⃣ Quick verification
# -----------------------------
for images, labels in train_dataset.take(1):
    print("Image batch shape:", images.shape)
    print("Label batch shape:", labels.shape)
    print("Example labels:", labels[:10].numpy())
    print("Min pixel:", images.numpy().min())
    print("Max pixel:", images.numpy().max())

# Optional: visualize the first image
img = images[0].numpy()
Image.fromarray((img * 255).astype("uint8")).show()













                                          image_path label
0  C:\Users\rache\Downloads\archive\raw-img\cane\...   dog
1  C:\Users\rache\Downloads\archive\raw-img\cane\...   dog
2  C:\Users\rache\Downloads\archive\raw-img\cane\...   dog
3  C:\Users\rache\Downloads\archive\raw-img\cane\...   dog
4  C:\Users\rache\Downloads\archive\raw-img\cane\...   dog
Total images: 26179
label
dog          4863
ragno        4821
chicken      3098
horse        2623
butterfly    2112
cow          1866
squirrel     1862
sheep        1820
cat          1668
elephant     1446
Name: count, dtype: int64
Train size: 23561
Test size: 2618
Image batch shape: (32, 224, 224, 3)
Label batch shape: (32,)
Example labels: [1 7 8 7 3 4 4 5 4 1]
Min pixel: 0.0
Max pixel: 1.0


In [3]:
for images, labels in dataset.take(1):
    print("Shape des images :", images.shape)
    print("Shape des labels :", labels.shape)
    print("Exemple labels :", labels[:10].numpy())


Shape des images : (32, 224, 224, 3)
Shape des labels : (32,)
Exemple labels : [9 8 5 1 1 8 4 6 6 8]


In [4]:
for images, labels in dataset.take(1):
    print("Min pixel :", images.numpy().min())
    print("Max pixel :", images.numpy().max())


Min pixel : 0.0
Max pixel : 1.0


In [None]:
from PIL import Image

for images, labels in dataset.take(1):
    img = images[0].numpy()
    Image.fromarray((img * 255).astype("uint8")).show()

# x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=42)