# Task
Build an image classification model using a Convolutional Neural Network (CNN) with a limited dataset (10 to 20 images per category).

## Load and preprocess data

### Subtask:
Load your image data, focusing on techniques like data augmentation to artificially increase the size of your dataset.


**Reasoning**:
Define the dataset path, configure the ImageDataGenerator for data augmentation, specify target size and batch size, and create data generators for training and validation sets.



In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

# 1. Define the path to your image dataset.
# Assuming your data is in a directory named 'image_dataset' with subfolders for categories
DATASET_PATH = 'nic_dataset'

# Create dummy directories and files for demonstration purposes if they don't exist
if not os.path.exists(DATASET_PATH):
    os.makedirs(os.path.join(DATASET_PATH, 'nic_old'))
    os.makedirs(os.path.join(DATASET_PATH, 'nic_new'))
    # Create some dummy files
    with open(os.path.join(DATASET_PATH, 'nic_old', 'image1.jpg'), 'w') as f:
        pass
    with open(os.path.join(DATASET_PATH, 'nic_new', 'image2.jpg'), 'w') as f:
        pass
    print(f"Created dummy dataset directory: {DATASET_PATH}")


# 2. Use an image data generator from a deep learning library (e.g., Keras ImageDataGenerator) to load the images from the defined path.
# Configure the generator to perform data augmentation techniques suitable for your limited dataset.
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2 # 5. If your data is split into training and validation sets, create separate generators for each.
)

# 3. Specify the target size for the images to ensure uniformity for the CNN input.
TARGET_SIZE = (150, 150)

# 4. Set the batch size for processing the images during training.
BATCH_SIZE = 32

# Create the training data generator
train_generator = train_datagen.flow_from_directory(
    DATASET_PATH,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

# Create the validation data generator
validation_generator = train_datagen.flow_from_directory(
    DATASET_PATH,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

print("Data generators created.")
print(f"Training data found: {train_generator.samples} images belonging to {train_generator.num_classes} classes.")
print(f"Validation data found: {validation_generator.samples} images belonging to {validation_generator.num_classes} classes.")


Created dummy dataset directory: nic_dataset
Found 2 images belonging to 2 classes.
Found 0 images belonging to 2 classes.
Data generators created.
Training data found: 2 images belonging to 2 classes.
Validation data found: 0 images belonging to 2 classes.
