In [13]:
!pip install --user albumentations


Collecting albumentations
  Using cached albumentations-2.0.7-py3-none-any.whl.metadata (43 kB)
Collecting albucore==0.0.24 (from albumentations)
  Using cached albucore-0.0.24-py3-none-any.whl.metadata (5.3 kB)
Collecting opencv-python-headless>=4.9.0.80 (from albumentations)
  Using cached opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached albumentations-2.0.7-py3-none-any.whl (367 kB)
Using cached albucore-0.0.24-py3-none-any.whl (15 kB)
Using cached opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl (39.4 MB)
Installing collected packages: opencv-python-headless, albucore, albumentations

   ---------------------------------------- 0/3 [opencv-python-headless]
   ---------------------------------------- 0/3 [opencv-python-headless]
   ---------------------------------------- 0/3 [opencv-python-headless]
   ---------------------------------------- 0/3 [opencv-python-headless]
   ---------------------------------------- 0/3 [opencv-python-hea



In [7]:
!python -m pip install --upgrade pip


Collecting pip
  Downloading pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-25.1.1-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------- ----------------- 1.0/1.8 MB 6.3 MB/s eta 0:00:01
   ---------------------------------------- 1.8/1.8 MB 4.8 MB/s eta 0:00:00
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 25.0.1
    Uninstalling pip-25.0.1:
      Successfully uninstalled pip-25.0.1
Successfully installed pip-25.1.1




In [15]:
# === STEP 1: Install Albumentations (only once)
# !pip install -q albumentations  # Uncomment if albumentations not installed

# === STEP 2: Import Libraries
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from albumentations import Compose, HorizontalFlip, Rotate, RandomBrightnessContrast
import zipfile

# === STEP 3: CONFIG for Jupyter
ORIG_BASE = 'graded_dataset/train'               # 👈 Local dataset path
TARGET_BASE = 'graded_dataset_augmented'         # Output folder for augmented + split data
GRADES = ['Grade_2', 'Grade_5']                   # Grades to augment
AUG_PER_IMAGE = 3                                 # How many augmentations per image
SPLIT_RATIO = [0.7, 0.15, 0.15]                   # Train/Val/Test split

augment = Compose([
    HorizontalFlip(p=0.5),
    Rotate(limit=15, p=0.7),
    RandomBrightnessContrast(p=0.4)
])

# === STEP 4: Load + Augment Images
def load_and_augment_images(orig_dir, augment_count=3):
    all_images = []
    for fname in os.listdir(orig_dir):
        if fname.lower().endswith(('.jpg', '.png', '.jpeg')):
            img_path = os.path.join(orig_dir, fname)
            img = cv2.imread(img_path)
            if img is not None:
                all_images.append(img)
                for i in range(augment_count):
                    aug_img = augment(image=img)['image']
                    all_images.append(aug_img)
    return all_images

# === STEP 5: Save to train/val/test folders
def save_images_to_split(split_names, base_save_path, grade_name):
    for split, imgs in split_names.items():
        save_path = os.path.join(base_save_path, split, grade_name)
        os.makedirs(save_path, exist_ok=True)
        for i, img in enumerate(imgs):
            save_file = os.path.join(save_path, f"{grade_name}_{split}_{i}.jpg")
            cv2.imwrite(save_file, img)

# === STEP 6: Process Each Grade
for grade in GRADES:
    orig_path = os.path.join(ORIG_BASE, grade)
    all_imgs = load_and_augment_images(orig_path, AUG_PER_IMAGE)
    
    train_imgs, temp_imgs = train_test_split(all_imgs, test_size=1 - SPLIT_RATIO[0], random_state=42)
    val_imgs, test_imgs = train_test_split(temp_imgs, test_size=SPLIT_RATIO[2]/(SPLIT_RATIO[1]+SPLIT_RATIO[2]), random_state=42)

    save_images_to_split(
        split_names={'train': train_imgs, 'val': val_imgs, 'test': test_imgs},
        base_save_path=TARGET_BASE,
        grade_name=grade
    )

print("✅ Done augmenting & saving Grade_2 and Grade_5")

# === STEP 7: ZIP Folder
zip_path = 'graded_dataset_augmented.zip'

with zipfile.ZipFile(zip_path, 'w') as zipf:
    for root, _, files in os.walk(TARGET_BASE):
        for file in files:
            full_path = os.path.join(root, file)
            zipf.write(full_path, os.path.relpath(full_path, TARGET_BASE))

print(f"📦 Zipped dataset ready: {zip_path}")




✅ Done augmenting & saving Grade_2 and Grade_5
📦 Zipped dataset ready: graded_dataset_augmented.zip
