# Q2 — Data Loading & Augmentation Using Keras
Creates `all_image_paths`, `temp`, `custom_data_generator` (batch size = 8), and a validation generator with batch size 8. Uses dummy folders.

In [None]:
import os, glob, numpy as np
from pathlib import Path
from PIL import Image
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# ensure dummy dataset exists (reuse from Q1)
root = Path("images_dataSAT")
classes = ['class_0_non_agri','class_1_agri']

all_image_paths = []
all_labels = []
for idx, cls in enumerate(classes):
    p = root/cls
    for f in sorted(p.glob('*')):
        if f.suffix.lower() in {'.jpg','.jpeg','.png'}:
            all_image_paths.append(str(f))
            all_labels.append(idx)

print('Total images found:', len(all_image_paths))

# temp: zip paths and labels and show sample
temp = list(zip(all_image_paths, all_labels))
print('\nFirst 6 entries in temp:')
for it in temp[:6]:
    print(it)

# custom_data_generator yielding batches of size 8
def load_img_array(path, target_size=(64,64)):
    img = Image.open(path).convert('RGB').resize(target_size)
    return np.array(img)/255.0

def custom_data_generator(paths, labels, batch_size=8, shuffle=True):
    idxs = np.arange(len(paths))
    while True:
        if shuffle:
            np.random.shuffle(idxs)
        for start in range(0, len(paths), batch_size):
            batch_idx = idxs[start:start+batch_size]
            X = [load_img_array(paths[i]) for i in batch_idx]
            y = [labels[i] for i in batch_idx]
            yield np.stack(X), tf.keras.utils.to_categorical(y, num_classes=2)

# show one batch of size 8
gen = custom_data_generator(all_image_paths, all_labels, batch_size=8)
Xb, yb = next(gen)
print('\nCustom batch shapes:', Xb.shape, yb.shape)

# validation generator with batch size 8 using ImageDataGenerator
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
val_gen = datagen.flow_from_directory(str(root), target_size=(64,64), batch_size=8, subset='validation', class_mode='categorical', shuffle=False)
Xv, yv = next(val_gen)
print('Validation batch shapes (batch_size=8):', Xv.shape, yv.shape)