# Module 1: Data Loading and Augmentation Using Keras
---

In [None]:
# Install missing packages if needed
!pip install scikit-learn pillow matplotlib numpy

In [None]:
import os
import glob
import random
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split

In [None]:
base_dir = './images_dataSAT/'
dir_non_agri = os.path.join(base_dir, 'class_0_non_agri')
dir_agri = os.path.join(base_dir, 'class_1_agri')
IMG_SIZE = (64, 64)
BATCH_SIZE = 8

## Task 1: Create the list all_image_paths containing the full file paths of all images from both class_0_non_agri and class_1_agri directories in the base directory.

In [None]:
# Task 1: Create all_image_paths
non_agri_paths = sorted(glob.glob(os.path.join(dir_non_agri, '*')))
agri_paths = sorted(glob.glob(os.path.join(dir_agri, '*')))

all_image_paths = non_agri_paths + agri_paths

# Create corresponding labels: 0 for non-agri, 1 for agri
non_agri_labels = [0] * len(non_agri_paths)
agri_labels = [1] * len(agri_paths)
all_labels = non_agri_labels + agri_labels

print('Total non-agricultural images:', len(non_agri_paths))
print('Total agricultural images:', len(agri_paths))
print('Total images in all_image_paths:', len(all_image_paths))
print('Total labels:', len(all_labels))
print('First 3 paths:', all_image_paths[:3])
print('Last 3 paths:', all_image_paths[-3:])

## Task 2: Create a temporary list temp by binding the image paths and labels using the zip function. Next, randomly select and print 5 image paths and their corresponding labels.

In [None]:
# Task 2: Create temp list using zip and randomly display 5 samples
temp = list(zip(all_image_paths, all_labels))

random.seed(42)
random.shuffle(temp)

random_samples = random.sample(temp, 5)

print('5 Randomly Selected Image Paths and Labels:')
print('=' * 60)
for i, (path, label) in enumerate(random_samples):
    label_name = 'Agricultural' if label == 1 else 'Non-Agricultural'
    print(f'{i+1}. Path: {path}')
    print(f'   Label: {label} ({label_name})')
    print()

In [None]:
# Unzip shuffled data and split into train/validation
all_image_paths_shuffled, all_labels_shuffled = zip(*temp)
all_image_paths_shuffled = list(all_image_paths_shuffled)
all_labels_shuffled = list(all_labels_shuffled)

train_paths, val_paths, train_labels, val_labels = train_test_split(
    all_image_paths_shuffled, all_labels_shuffled, test_size=0.2, random_state=42
)

print('Training samples:', len(train_paths))
print('Validation samples:', len(val_paths))

In [None]:
# Helper function to load and resize image using PIL (replaces tensorflow load_img/img_to_array)
def load_and_preprocess_image(image_path, target_size=(64, 64)):
    img = Image.open(image_path).convert('RGB')
    img = img.resize(target_size)
    img_array = np.array(img, dtype=np.float32) / 255.0
    return img_array

# Define the custom_data_generator function
def custom_data_generator(image_paths, labels, batch_size, img_size=(64, 64), augment=False):
    num_samples = len(image_paths)
    while True:
        indices = np.arange(num_samples)
        np.random.shuffle(indices)
        for start in range(0, num_samples, batch_size):
            end = min(start + batch_size, num_samples)
            batch_indices = indices[start:end]
            batch_images = []
            batch_labels = []
            for idx in batch_indices:
                img_array = load_and_preprocess_image(image_paths[idx], target_size=img_size)
                if augment:
                    if np.random.random() > 0.5:
                        img_array = np.fliplr(img_array)
                    if np.random.random() > 0.5:
                        img_array = np.flipud(img_array)
                batch_images.append(img_array)
                batch_labels.append(labels[idx])
            yield np.array(batch_images), np.array(batch_labels)

print('custom_data_generator function defined successfully.')

## Task 3: Generate a data batch of size 8 using the custom_data_generator function.

In [None]:
# Task 3: Generate a batch of training data with batch_size = 8
train_generator = custom_data_generator(train_paths, train_labels, batch_size=8, img_size=(64, 64), augment=True)

batch_images, batch_labels = next(train_generator)

print('Batch images shape:', batch_images.shape)
print('Batch labels shape:', batch_labels.shape)
print('Batch labels:', batch_labels)

fig, axes = plt.subplots(1, 8, figsize=(20, 4))
for i in range(8):
    axes[i].imshow(batch_images[i])
    label_name = 'Agri' if batch_labels[i] == 1 else 'Non-Agri'
    axes[i].set_title(f'Label: {batch_labels[i]}\n({label_name})')
    axes[i].axis('off')
plt.suptitle('Training Batch (batch_size=8)', fontsize=14)
plt.tight_layout()
plt.show()

## Task 4: Create the validation data using a batch size of 8.

In [None]:
# Task 4: Create validation data with batch_size = 8
val_generator = custom_data_generator(val_paths, val_labels, batch_size=8, img_size=(64, 64), augment=False)

val_batch_images, val_batch_labels = next(val_generator)

print('Validation batch images shape:', val_batch_images.shape)
print('Validation batch labels shape:', val_batch_labels.shape)
print('Validation batch labels:', val_batch_labels)

fig, axes = plt.subplots(1, 8, figsize=(20, 4))
for i in range(8):
    axes[i].imshow(val_batch_images[i])
    label_name = 'Agri' if val_batch_labels[i] == 1 else 'Non-Agri'
    axes[i].set_title(f'Label: {val_batch_labels[i]}\n({label_name})')
    axes[i].axis('off')
plt.suptitle('Validation Batch (batch_size=8)', fontsize=14)
plt.tight_layout()
plt.show()

---
## All 4 tasks completed successfully.