In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import albumentations as A

In [10]:
TARGET_COUNT = 200
DATASET_DIR = "Faulty_solar_panel"

In [11]:
augment = A.Compose([
    A.RandomResizedCrop(size=(1024, 1024), scale=(0.6, 1.0)),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.Rotate(limit=45, p=0.7),
    A.RandomBrightnessContrast(brightness_limit=0.4, contrast_limit=0.4, p=0.7),
    A.ColorJitter(p=0.6),
    A.GaussNoise(p=0.4),
    A.GaussianBlur(p=0.3),
])

In [12]:
def load_image(path):
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

def save_image(path, image):
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    cv2.imwrite(path, image)

def remove_augmented_images(dataset_dir, prefix="aug_"):
    removed = 0

    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.startswith(prefix):
                file_path = os.path.join(root, file)
                os.remove(file_path)
                removed += 1

    return removed

In [13]:
remove_count = remove_augmented_images(DATASET_DIR)
print(f"Removed {remove_count} previously generated augmented images.")

Removed 0 previously generated augmented images.


In [14]:
for cls in os.listdir(DATASET_DIR):
    class_dir = os.path.join(DATASET_DIR, cls)
    if not os.path.isdir(class_dir):
        continue

    images = os.listdir(class_dir)
    n_existing = len(images)

    print(f"\nClass: {cls} | Count: {n_existing}")

    if n_existing >= TARGET_COUNT:
        continue

    n_to_generate = TARGET_COUNT - n_existing

    for i in tqdm(range(n_to_generate)):
        img_name = np.random.choice(images)
        img_path = os.path.join(class_dir, img_name)

        img = load_image(img_path)

        augmented = augment(image=img)["image"]

        new_name = f"aug_{i}_{img_name}"
        save_image(os.path.join(class_dir, new_name), augmented)



Class: Bird-drop | Count: 207

Class: Clean | Count: 193


100%|██████████| 7/7 [00:00<00:00, 36.46it/s]



Class: Dusty | Count: 190


100%|██████████| 10/10 [00:00<00:00, 16.97it/s]



Class: Electrical-damage | Count: 103


100%|██████████| 97/97 [00:03<00:00, 25.87it/s]



Class: Physical-Damage | Count: 69


100%|██████████| 131/131 [00:04<00:00, 26.51it/s]



Class: Snow-Covered | Count: 123


100%|██████████| 77/77 [00:02<00:00, 31.18it/s]
