In [2]:
import os
from PIL import Image
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to your original training dataset in Google Drive
dataset_path = "/content/drive/My Drive/fruittrain"  # Replace with your actual path


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:

# Define the augmentation pipeline
augmentation_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),   # Randomly flip images horizontally
    transforms.RandomRotation(degrees=15),    # Randomly rotate images by up to 15 degrees
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # Random crop and resize
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Random color adjustments
    transforms.ToTensor(),                    # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])    # Normalize
])

# Load the original dataset
dataset = datasets.ImageFolder(root=dataset_path, transform=augmentation_transforms)

# Create a DataLoader for the dataset
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Augment images and save them into the original folder
num_augmented_images = 1000  # Define how many augmented images to generate
image_count = 0

for images, labels in dataloader:
    for i in range(images.size(0)):
        # Convert tensor to image
        img = images[i].numpy().transpose((1, 2, 0))  # Rearrange dimensions
        img = img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])  # Denormalize
        img = np.clip(img, 0, 1)  # Clip pixel values
        img = Image.fromarray((img * 255).astype('uint8'))  # Convert to uint8

        # Get the class name for the image
        class_name = dataset.classes[labels[i]]

        # Define the save path in the original folder
        save_path = os.path.join(dataset_path, class_name)
        os.makedirs(save_path, exist_ok=True)  # Ensure the class folder exists

        # Save the augmented image
        img_save_path = os.path.join(save_path, f"{class_name}_aug_{image_count}.jpg")
        img.save(img_save_path)

        image_count += 1
        if image_count >= num_augmented_images:
            break
    if image_count >= num_augmented_images:
        break

print(f"Saved {image_count} augmented images into the original training folder.")


Saved 1000 augmented images into the original training folder.


In [None]:
import os
import random
from PIL import Image
import matplotlib.pyplot as plt

# Define the path to the original dataset (training folder)
original_dataset_path = "/content/drive/My Drive/fruitvalidation"  # Replace with your dataset path

# Function to randomly select images from different classes
def create_image_grid(num_rows=10, num_cols=5):
    # Get all class folders
    class_folders = [f for f in os.listdir(original_dataset_path) if os.path.isdir(os.path.join(original_dataset_path, f))]

    # Randomly scatter images from different classes
    selected_images = []
    for _ in range(num_rows * num_cols):
        class_folder = random.choice(class_folders)
        class_path = os.path.join(original_dataset_path, class_folder)
        images = os.listdir(class_path)
        if images:
            selected_images.append((class_folder, os.path.join(class_path, random.choice(images))))

    # Create the grid
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 20))
    axes = axes.flatten()
    for ax, (class_name, img_path) in zip(axes, selected_images):
        img = Image.open(img_path)
        ax.imshow(img)
        ax.set_title(class_name, fontsize=8)
        ax.axis('off')

    plt.tight_layout()
    plt.show()

# Create and display a 10x5 grid
create_image_grid(num_rows=10, num_cols=5)
