In [2]:
import os
import shutil
import random
import pandas as pd
import cv2
import numpy as np
import albumentations as A
import augraphy
from matplotlib import pyplot as plt
%matplotlib inline

pre_path = '/kkh/'
train_kr_image_path = pre_path + 'data/train_kr'
train_kr_aug_image_path = pre_path + 'data/train_kr_aug'
meta_kr_csv_path = pre_path + 'data/meta_kr.csv'
meta_kr_df = pd.read_csv(meta_kr_csv_path)
train_kr_csv_path = pre_path + 'data/train_kr.csv'
train_kr_df = pd.read_csv(train_kr_csv_path)
train_kr_aug_csv_path = pre_path + 'data/train_kr_aug.csv'
train_kr_aug_df = pd.read_csv(train_kr_aug_csv_path)

# 각 이미지당 생성할 증강 이미지 개수
AUGMENTATIONS_PER_IMAGE = 2  # 이 값을 변경하여 증강 개수 조절

ModuleNotFoundError: No module named 'albumentations'

In [2]:
def setup_directories():
    if os.path.exists(train_kr_aug_image_path):
        shutil.rmtree(train_kr_aug_image_path)
    os.makedirs(train_kr_aug_image_path, exist_ok=True)
    
    if os.path.exists(train_kr_aug_df):
        os.remove(train_kr_aug_df)

# 이미지 증강 함수
def augment_image(image):
    # Albumentations를 사용한 증강
    albu_aug = A.Compose([
        A.RandomBrightnessContrast(p=0.5),
        A.RandomGamma(p=0.5),
        A.GaussNoise(p=0.5),
        A.Rotate(limit=10, p=0.5),
    ])
    
    # Augraphy를 사용한 증강
    aug_pipeline = augraphy.default_pipeline()
    
    # Albumentations 적용
    augmented = albu_aug(image=image)
    albu_result = augmented['image']
    
    # Augraphy 적용
    aug_result = aug_pipeline.augment(albu_result)
    
    return aug_result

In [6]:
setup_directories()

# CSV 파일 읽기
df = pd.read_csv(train_kr_csv_path)

augmented_data = []

for _, row in df.iterrows():
    img_path = os.path.join(train_kr_image_path, row['ID'])
    target = row['target']
    
    # 원본 이미지 읽기
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # 원본 이미지 저장
    cv2.imwrite(os.path.join(train_kr_aug_image_path, row['ID']), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
    augmented_data.append({'ID': row['ID'], 'target': target})
    
    # 증강된 이미지 생성 및 저장
    for i in range(AUGMENTATIONS_PER_IMAGE):
        augmented_image = augment_image(image)
        aug_filename = f"{os.path.splitext(row['ID'])[0]}_aug_{i+1}.jpg"
        cv2.imwrite(os.path.join(train_kr_aug_image_path, aug_filename), cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR))
        augmented_data.append({'ID': aug_filename, 'target': target})

# 증강된 데이터를 CSV 파일로 저장
augmented_df = pd.DataFrame(augmented_data)
augmented_df.to_csv(train_kr_aug_csv_path, index=False)

In [None]:
# 랜덤하게 3개의 이미지 선택하여 비교
sample_images = random.sample(df['ID'].tolist(), 3)

fig, axes = plt.subplots(3, AUGMENTATIONS_PER_IMAGE + 1, figsize=(5*(AUGMENTATIONS_PER_IMAGE + 1), 15))

for i, img_name in enumerate(sample_images):
    # 원본 이미지
    original = cv2.imread(os.path.join(train_kr_image_path, img_name))
    original = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)
    
    # 원본 이미지 표시
    axes[i, 0].imshow(original)
    axes[i, 0].set_title('Original')
    axes[i, 0].axis('off')
    
    # 증강된 이미지들 표시
    for j in range(AUGMENTATIONS_PER_IMAGE):
        aug_img = cv2.imread(os.path.join(train_kr_aug_image_path, f"{os.path.splitext(img_name)[0]}_aug_{j+1}.jpg"))
        aug_img = cv2.cvtColor(aug_img, cv2.COLOR_BGR2RGB)
        
        axes[i, j+1].imshow(aug_img)
        axes[i, j+1].set_title(f'Augmented {j+1}')
        axes[i, j+1].axis('off')

plt.tight_layout()
plt.show()