# **Dataset Load**

**Train Dataset**
- Normal: 500
- Type1(FR50): 25
- Type2(Fan): 25

**Test Dataset**
- Normal: 100
- Type1(FR50): 100
- Type2(Fan): 100

> *cf. Normal Dataset Augmentation*  
  기존의 Normal 305장을 좌우반전 Data Augmentation 후 610장에서 샘플링하여 Train/Test Dataset 구축  
  Type1/Type2는 기존 197장, 153장에서 바로 샘플링하여 구축

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os
from PIL import Image
from sklearn.model_selection import train_test_split

# Define base paths for image classes
normal_dir_path = '/content/drive/MyDrive/산업공학종합설계2/datasets/Normal'
type1_dir_path = '/content/drive/MyDrive/산업공학종합설계2/datasets/Underfill_50FR'
type2_dir_path = '/content/drive/MyDrive/산업공학종합설계2/datasets/Underfill_Fan'

Mounted at /content/drive


In [2]:
# Nomral img: Augmentation + Sampling + Processing

normal_images_processed = []

for filename in os.listdir(normal_dir_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(normal_dir_path, filename)
        try:
            img = Image.open(img_path).convert('RGB')
            normal_images_processed.append(img) # Add original image

            flipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
            normal_images_processed.append(flipped_img) # Add horizontally flipped image
        except IOError:
            print(f"Error loading or processing image: {img_path}")

print(f"Total processed images (original + augmented) for 'Normal' class: {len(normal_images_processed)}")

if len(normal_images_processed) < 600:
    print(f"Warning: Not enough 'Normal' images ({len(normal_images_processed)}) available for the desired split (600 needed).")
else:
    normal_train_images, normal_test_images = train_test_split(
        normal_images_processed,
        train_size=500,
        test_size=100,
        random_state=42,
        shuffle=True
    )

    print(f"Number of 'Normal' images for training: {len(normal_train_images)}")
    print(f"Number of 'Normal' images for testing: {len(normal_test_images)}")

Total processed images (original + augmented) for 'Normal' class: 610
Number of 'Normal' images for training: 500
Number of 'Normal' images for testing: 100


In [3]:
# Type1 img: Sampling + Processing

type1_images_processed = []

for filename in os.listdir(type1_dir_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(type1_dir_path, filename)
        try:
            img = Image.open(img_path).convert('RGB')
            type1_images_processed.append(img)
        except IOError:
            print(f"Error loading image: {img_path}")

print(f"Total processed images for 'Type1' class: {len(type1_images_processed)}")

if len(type1_images_processed) < 125:
    print(f"Warning: Not enough 'Type1' images ({len(type1_images_processed)}) available for the desired split (150 needed).")
else:
    type1_train_images, type1_test_images = train_test_split(
        type1_images_processed,
        train_size=25,
        test_size=100,
        random_state=42,
        shuffle=True
    )

    print(f"Number of 'Type1' images for training: {len(type1_train_images)}")
    print(f"Number of 'Type1' images for testing: {len(type1_test_images)}")

Total processed images for 'Type1' class: 197
Number of 'Type1' images for training: 25
Number of 'Type1' images for testing: 100


In [4]:
# Type2 img: Sampling + Processing

type2_images_processed = []

for filename in os.listdir(type2_dir_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(type2_dir_path, filename)
        try:
            img = Image.open(img_path).convert('RGB')
            type2_images_processed.append(img)
        except IOError:
            print(f"Error loading image: {img_path}")

print(f"Total processed images for 'Type2' class: {len(type2_images_processed)}")

if len(type2_images_processed) < 125:
    print(f"Warning: Not enough 'Type2' images ({len(type2_images_processed)}) available for the desired split (150 needed).")
else:
    type2_train_images, type2_test_images = train_test_split(
        type2_images_processed,
        train_size=25,
        test_size=100,
        random_state=42,
        shuffle=True
    )

    print(f"Number of 'Type2' images for training: {len(type2_train_images)}")
    print(f"Number of 'Type2' images for testing: {len(type2_test_images)}")

Total processed images for 'Type2' class: 153
Number of 'Type2' images for training: 25
Number of 'Type2' images for testing: 100


In [5]:
# Train Dataset
train_set = []

# Add Normal training images with 'normal' label
for img in normal_train_images:
    train_set.append((img, 'normal'))

# Add Type1 training images with 'type1' label
for img in type1_train_images:
    train_set.append((img, 'type1'))

# Add Type2 training images with 'type2' label
for img in type2_train_images:
    train_set.append((img, 'type2'))

print(f"Total images in train_set: {len(train_set)}")

Total images in train_set: 550


In [6]:
test_set = []

# Add Normal test images with 'normal' label
for img in normal_test_images:
    test_set.append((img, 'normal'))

# Add Type1 test images with 'type1' label
for img in type1_test_images:
    test_set.append((img, 'type1'))

# Add Type2 test images with 'type2' label
for img in type2_test_images:
    test_set.append((img, 'type2'))

print(f"Total images in test_set: {len(test_set)}")

Total images in test_set: 300


In [7]:
# Storing Train/Test Dataset

import os
import uuid

def save_dataset(dataset_list, root_dir):
    os.makedirs(root_dir, exist_ok=True)

    for img, label in dataset_list:
        label_dir = os.path.join(root_dir, label)
        os.makedirs(label_dir, exist_ok=True)

        # uuid로 고유 파일명 생성
        filename = f"{uuid.uuid4().hex}.png"

        # 무손실 PNG 저장 → 해상도 보존
        img.save(os.path.join(label_dir, filename))

save_dataset(train_set, "./AM_Datasets/train")
save_dataset(test_set, "./AM_Datasets/test")