In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Library

In [2]:
import os
import random
import glob
from tqdm import tqdm
from collections import defaultdict
import numpy as np
import shutil
import matplotlib.pyplot as plt
import cv2
import imageio.v2 as iio  # Import imageio.v2 instead of imageio
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model

# 1. Data Preprocessing

## 1) 데이터 수 확인

In [3]:
import os

def count_files_in_directory(directory_path):
    return len([f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))])

directory_path = '/content/drive/MyDrive/eardrum/data/'

folders = ['Normal', 'AOM', 'CSOM', 'Earwax', 'OE']

for folder in folders:
  path = directory_path + folder
  print(folder, ':', count_files_in_directory(path))

Normal : 534
AOM : 119
CSOM : 63
Earwax : 140
OE : 41


- 파일 format 확인

In [4]:
def unique_file_formats_in_directory(directory_path):
    file_formats = {os.path.splitext(f)[1].lower() for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))}
    return file_formats

for folder in folders:
    path = directory_path + folder
    unique_formats = unique_file_formats_in_directory(path)
    print(folder, ":", unique_formats)

Normal : {'.tiff'}
AOM : {'.tiff'}
CSOM : {'.tiff'}
Earwax : {'.tiff'}
OE : {'.tiff'}


## 2) 데이터 분할 -> ※ 다시 실행 하지 말것!!!

In [None]:
import os
import random
from collections import defaultdict

# Define the categories and their corresponding image counts
categories = {
    "Normal": 534,
    "AOM": 119,
    "CSOM": 63,
    "Earwax": 140,
    "OE": 41,
    "Tympanoskleros": 28
}

# Define the desired proportions for train, validation, and test sets
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Calculate the total number of images in the dataset
total_images = sum(categories.values())
print(total_images)

925


In [None]:
import os
import numpy as np
import shutil

# Defining your directories
base_dir = '/content/drive/MyDrive/eardrum/data/'
base_dirs = [os.path.join(base_dir, d) for d in os.listdir(base_dir)]

train_dir = '/content/drive/MyDrive/eardrum/new_data/train'
val_dir = '/content/drive/MyDrive/eardrum/new_data/val'
test_dir = '/content/drive/MyDrive/eardrum/new_data/test'

In [None]:
base_dirs

['/content/drive/MyDrive/eardrum/data/Earwax',
 '/content/drive/MyDrive/eardrum/data/Normal',
 '/content/drive/MyDrive/eardrum/data/CSOM',
 '/content/drive/MyDrive/eardrum/data/AOM',
 '/content/drive/MyDrive/eardrum/data/OE',
 '/content/drive/MyDrive/eardrum/data/Tympanoskleros']

In [None]:
# Ratio of splitting
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Iterate over all directories (classes) in your base directory
for dir_path in base_dirs: # -> 1분 40초 소요
    class_name = os.path.basename(dir_path)

    # Create corresponding directories in train, val, test directories
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)

    # Get all file names and shuffle them
    files = os.listdir(dir_path)
    np.random.shuffle(files)

    # Calculate split indices
    train_idx = int(len(files) * train_ratio)
    val_idx = int(len(files) * (train_ratio + val_ratio))

    # Split file names
    train_files = files[:train_idx]
    val_files = files[train_idx:val_idx]
    test_files = files[val_idx:]

    # Move files to corresponding directories
    for file_name in train_files:
        shutil.copy(os.path.join(dir_path, file_name), os.path.join(train_dir, class_name, file_name))

    for file_name in val_files:
        shutil.copy(os.path.join(dir_path, file_name), os.path.join(val_dir, class_name, file_name))

    for file_name in test_files:
        shutil.copy(os.path.join(dir_path, file_name), os.path.join(test_dir, class_name, file_name))

In [None]:
new_data_path = '/content/drive/MyDrive/eardrum/new_data/'

folders = ['Normal', 'AOM', 'CSOM', 'Earwax', 'OE', 'Tympanoskleros']

print("Train Set:")
for folder in folders:
  path = new_data_path + 'train/'+folder
  print(folder, ':', count_files_in_directory(path))

print("\nVal Set:")
for folder in folders:
  path = new_data_path + 'val/'+folder
  print(folder, ':', count_files_in_directory(path))

print("\nTest Set:")
for folder in folders:
  path = new_data_path + 'test/'+folder
  print(folder, ':', count_files_in_directory(path))

Train Set:
Normal : 373
AOM : 83
CSOM : 44
Earwax : 98
OE : 28
Tympanoskleros : 19

Val Set:
Normal : 80
AOM : 18
CSOM : 9
Earwax : 21
OE : 6
Tympanoskleros : 4

Test Set:
Normal : 81
AOM : 18
CSOM : 10
Earwax : 21
OE : 7
Tympanoskleros : 5


## 3) ImageDataGenerator

- 임의로 뽑아서 시각화

In [6]:
train_dir = '/content/drive/MyDrive/eardrum2/data_4_png/train/OE'
# val_dir = '/content/drive/MyDrive/eardrum2/ori_data_4/val'
test_dir = '/content/drive/MyDrive/eardrum2/data_4_png/test/OE'

- image shape : (500, 500, 3)

In [21]:
from PIL import Image
def tiff_to_png(image_path):
    # 이미지 로드
    original_image = Image.open(image_path)

    # 이미지 저장 경로 설정
    base_path, ext = os.path.splitext(image_path)

    # 증강된 이미지 저장
    new_path = base_path+'.png'
    # new_path = new_path.replace('eardrum', 'eardrum2')
    # new_path = new_path.replace('ori_data_4', 'aug_data_4_png')
    original_image.save(new_path)
    # print(new_path)

### train

In [17]:
img_paths = glob.glob(train_dir+"/*.tiff")
print(len(img_paths))

34


In [18]:
# img_paths = glob.glob(train_dir+"/*/*.tiff")
# print(len(img_paths))

In [19]:
img_paths[0]

'/content/drive/MyDrive/eardrum2/data_4_png/train/OE/otitexterna_30.tiff'

In [20]:
tiff_to_png(img_paths[0])

/content/drive/MyDrive/eardrum2/data_4_png/train/OE/otitexterna_30.png


In [22]:
import os
from PIL import Image


# 이미지 경로에 있는 tiff 파일들을 png로 변환하여 새로운 경로에 저장
for img_path in tqdm(img_paths):
  tiff_to_png(img_path)

print("이미지 변환이 완료되었습니다.")

100%|██████████| 34/34 [00:20<00:00,  1.64it/s]

이미지 변환이 완료되었습니다.





### val

In [None]:
img_paths = glob.glob(val_dir+"/*/*.tiff")
print(len(img_paths))

128


In [None]:
# 이미지 경로에 있는 tiff 파일들을 png로 변환하여 새로운 경로에 저장
for img_path in tqdm(img_paths):
  tiff_to_png(img_path)

print("이미지 변환이 완료되었습니다.")

100%|██████████| 128/128 [01:08<00:00,  1.88it/s]

이미지 변환이 완료되었습니다.





### test

In [23]:
img_paths = glob.glob(test_dir+"/*.tiff")
print(len(img_paths))

7


In [None]:
# img_paths = glob.glob(test_dir+"/*/*.tiff")
# print(len(img_paths))

130


In [24]:
# 이미지 경로에 있는 tiff 파일들을 png로 변환하여 새로운 경로에 저장
for img_path in tqdm(img_paths):
  tiff_to_png(img_path)

print("이미지 변환이 완료되었습니다.")

100%|██████████| 7/7 [00:04<00:00,  1.65it/s]

이미지 변환이 완료되었습니다.





# 데이터 증강 -> 6배

In [None]:
!pwd

/content


In [None]:
!ls /content/drive/MyDrive/eardrum2/data_4_png/train/AOM

aom_101.png  aom_116.png  aom_20.png  aom_36.png  aom_49.png  aom_63.png  aom_79.png  aom_94.png
aom_102.png  aom_117.png  aom_21.png  aom_37.png  aom_50.png  aom_64.png  aom_80.png  aom_96.png
aom_104.png  aom_118.png  aom_22.png  aom_38.png  aom_53.png  aom_65.png  aom_82.png  aom_97.png
aom_108.png  aom_12.png   aom_23.png  aom_39.png  aom_55.png  aom_66.png  aom_83.png  aom_98.png
aom_10.png   aom_13.png   aom_24.png  aom_40.png  aom_56.png  aom_67.png  aom_85.png  aom_99.png
aom_110.png  aom_14.png   aom_27.png  aom_41.png  aom_57.png  aom_6.png   aom_86.png  aom_9.png
aom_111.png  aom_16.png   aom_29.png  aom_42.png  aom_59.png  aom_71.png  aom_88.png
aom_112.png  aom_17.png   aom_30.png  aom_43.png  aom_5.png   aom_72.png  aom_8.png
aom_113.png  aom_18.png   aom_32.png  aom_44.png  aom_60.png  aom_73.png  aom_90.png
aom_114.png  aom_19.png   aom_33.png  aom_47.png  aom_61.png  aom_76.png  aom_91.png
aom_115.png  aom_1.png	  aom_34.png  aom_48.png  aom_62.png  aom_77.png  aom_93.

In [None]:
train_dir = '/content/drive/MyDrive/eardrum2/data_4_png/train'

In [None]:
img_paths = glob.glob(train_dir+"/*/*.png")
print(len(img_paths))

598


In [None]:
img_paths[0]

'/content/drive/MyDrive/eardrum2/data_4_png/train/Normal/normal_527.png'

In [None]:
from PIL import Image
import os

def augment_image(image_path):
    # 이미지 로드
    original_image = Image.open(image_path)

    # 이미지 저장 경로 설정
    base_path, ext = os.path.splitext(image_path)

    # Flip 증강 (수평 및 수직)
    horizontal_flip = original_image.transpose(Image.FLIP_LEFT_RIGHT)
    vertical_flip = original_image.transpose(Image.FLIP_TOP_BOTTOM)

    # Rotate 증강 (90, 180, 270 도)
    rotate_90 = original_image.rotate(90)
    rotate_180 = original_image.rotate(180)
    rotate_270 = original_image.rotate(270)

    # 증강된 이미지 저장
    horizontal_flip.save(f'{base_path}_horizontal_flip{ext}')
    vertical_flip.save(f'{base_path}_vertical_flip{ext}')
    rotate_90.save(f'{base_path}_rotate_90{ext}')
    rotate_180.save(f'{base_path}_rotate_180{ext}')
    rotate_270.save(f'{base_path}_rotate_270{ext}')

In [None]:
for img_path in tqdm(img_paths):
  augment_image(img_path)

100%|██████████| 598/598 [07:06<00:00,  1.40it/s]


In [None]:
import os

def count_files_in_directory(directory_path):
    return len([f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))])

directory_path = '/content/drive/MyDrive/eardrum/aug_data_4/train/'

folders = ['Normal', 'AOM', 'CSOM', 'Earwax']

for folder in folders:
  path = directory_path + folder
  print(folder, ':', count_files_in_directory(path))

Normal : 2238
AOM : 498
CSOM : 264
Earwax : 588


In [None]:
aug_img_paths = glob.glob(train_dir+"/*/*.png")
print(len(aug_img_paths))

3588
