In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import os
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm

In [2]:
pano_path = '/content/drive/MyDrive/capstone_periodontal/data_all_fin/pano'
peri_path = '/content/drive/MyDrive/capstone_periodontal/data_all_fin/peri'
output_path = '/content/drive/MyDrive/capstone_periodontal/augmented_datasets'

In [3]:
pano_count = sum(1 for file in os.listdir(pano_path) if file.endswith(".jpg"))
peri_count = sum(1 for file in os.listdir(peri_path) if file.endswith(".jpg"))

print(pano_count)
print(peri_count)

189
146


# Augment
- blur + noise
- random contrast CLAHE
- brightness

In [8]:
transform = A.Compose([
    #contrast and brightness
    A.RingingOvershoot(p=0.5), #mimic kodak high contrast
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7),
    A.CLAHE(tile_grid_size=(4, 4), p=0.3),
    #Blur
    A.AdvancedBlur(noise_limit=(0.9,2.5), p=0.5),
    A.MedianBlur(blur_limit=(3,5), p=0.5),
    #noise
    A.AdditiveNoise(p=0.7),
    #gray scale
    A.ToGray(p=1.0)
])

In [9]:
def augment_and_save(image_path, save_dir, base_name):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Failed to read {image_path}")
        return
    save_path = os.path.join(save_dir, f"{base_name}.png")
    cv2.imwrite(save_path, image)
    for i in range(4):
        augmented = transform(image=image)['image']
        save_path = os.path.join(save_dir, f"{base_name}_aug_{i}.png")
        cv2.imwrite(save_path, augmented)

In [10]:
def process_folder(input_path, output_path):
    for filename in tqdm(os.listdir(input_path)):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            full_path = os.path.join(input_path, filename)
            base_name = os.path.splitext(filename)[0]
            augment_and_save(full_path, output_path, base_name)

In [11]:
process_folder(pano_path, os.path.join(output_path, 'pano'))
process_folder(peri_path, os.path.join(output_path, 'peri'))

100%|██████████| 189/189 [05:03<00:00,  1.61s/it]
100%|██████████| 146/146 [00:54<00:00,  2.68it/s]
