In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img

In [None]:


# Define the directory paths
original_dataset_dir = './dataset'
augmented_dataset_dir = './augmented_dataset'

# Create directories if they don't exist
if not os.path.exists(augmented_dataset_dir):
    os.makedirs(augmented_dataset_dir)

# Image target size for InceptionV3
target_size = (299, 299)

# Loop through each class directory
for class_name in os.listdir(original_dataset_dir):
    class_dir = os.path.join(original_dataset_dir, class_name)
    if os.path.isdir(class_dir):
        augmented_class_dir = os.path.join(augmented_dataset_dir, class_name)
        os.makedirs(augmented_class_dir, exist_ok=True)
        
        # Loop through each image in the class directory
        for i, filename in enumerate(os.listdir(class_dir)):
            if filename.endswith('.jpg') or filename.endswith('.JPG'):
                # Load the image
                img_path = os.path.join(class_dir, filename)
                img = load_img(img_path, target_size=target_size)
                img_array = img_to_array(img)
                img_array = np.expand_dims(img_array, axis=0)

                datagen = ImageDataGenerator(
                                rotation_range=40,
                                shear_range=0.2,
                                zoom_range=0.2,
                                width_shift_range = 0.1,
                                height_shift_range = 0.1,
                                horizontal_flip=True,
                                fill_mode='nearest'
                            )
                
                # Apply the augmentation and save the result
                aug_iter = datagen.flow(img_array, batch_size=32)
                aug_img = next(aug_iter)[0].astype('uint8')

                # Save the augmented image
                x = filename.split('.')
                aug_img = array_to_img(aug_img)
                aug_img.save(os.path.join(augmented_class_dir, f'{x[0]}_{i}_aug.{x[1]}'))

                # Save the original image as well
                original_img = array_to_img(img_array[0].astype('uint8'))
                original_img.save(os.path.join(augmented_class_dir, f'{x[0]}_{i}_ori.{x[1]}'))


In [None]:
import tensorflow as tf 

print('Original Dataset')
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    original_dataset_dir,
)
print('Augmented Dataset')
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    augmented_dataset_dir,
)

In [None]:
from collections import Counter

def class_distribution(dir):
    pass
    class_dirs = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
    class_counts = Counter()

    for class_dir in class_dirs:
        class_path = os.path.join(dir, class_dir)
        class_counts[class_dir] = len([name for name in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, name))])
        
    total = 0 
    for class_name, count in class_counts.items():
        total += count
        print(f'Class {class_name}: {count} images')

    print(total)
    import matplotlib.pyplot as plt

    plt.figure(figsize=(5,2))
    plt.bar(class_counts.keys(), class_counts.values())
    plt.xlabel('Class')
    plt.ylabel('Number of images')
    plt.title('Class Distribution')
    plt.xticks(rotation=10)
    plt.show()


In [None]:
class_distribution(augmented_dataset_dir)

In [None]:
class_distribution(original_dataset_dir)