#### I. 바운딩 박스 크롭 및 패딩(데이터 전처리)

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.applications import VGG16
import numpy as np
import os
from sklearn.model_selection import train_test_split
import pandas as pd

# 데이터 디렉토리 설정
data_dir = '/content/drive/MyDrive/DL/project/Teamproject/padded_images'

# 클래스 이름을 폴더 이름과 동일하게 설정
classes = ['tie_abnormal', 'tie_normal']

# 모든 이미지 경로와 레이블 가져오기
all_images = []
all_labels = []
for class_name in classes:
    class_dir = os.path.join(data_dir, class_name)
    for fname in os.listdir(class_dir):
        if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
            all_images.append(os.path.join(class_dir, fname))
            all_labels.append(class_name)

# 데이터를 학습, 검증, 테스트 세트로 분할
train_images, test_images, train_labels, test_labels = train_test_split(all_images, all_labels, test_size=0.2, random_state=42)
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.25, random_state=42)
print(len(train_labels), len(val_labels), len(test_labels))

14688 4896 4897


In [None]:
import json
import os
from PIL import Image
import glob

# 상위 폴더 경로 설정
base_json_dir = '/content/drive/My Drive/Deeplearning/Teamproject/TL_일반철도_이상'
base_image_dir = '/content/drive/My Drive/Deeplearning/Teamproject/원천_folder'
output_dir = '/content/drive/My Drive/Deeplearning/Teamproject/cropped_images'

# 출력 폴더 내 클래스 및 상태별 폴더 생성 함수
def create_folders(categories, base_dir):
    for cat_id, cat_name in categories.items():
        os.makedirs(os.path.join(base_dir, f'{cat_name}_normal'), exist_ok=True)
        os.makedirs(os.path.join(base_dir, f'{cat_name}_abnormal'), exist_ok=True)

# JSON 파일 처리 함수
def process_json_file(json_file_path, image_file_path, categories, output_dir):
    with open(json_file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # 바운딩 박스와 클래스 정보 가져오기
    annotations = data['annotations']
    bounding_boxes_info = [
        {
            'bbox': ann['bbox'],
            'category': categories.get(ann['category_id'], 'Unknown'),
            'status': ann.get('status', 'unknown')  # 'status' 필드를 가져옵니다.
        }
        for ann in annotations if 'bbox' in ann and ann['bbox']
    ]

    # 이미지 열기
    image = Image.open(image_file_path)

    # 바운딩 박스별로 크롭하고 저장
    for idx, bbox_info in enumerate(bounding_boxes_info):
        bbox = bbox_info['bbox']
        category = bbox_info['category']
        status = bbox_info['status']
        left, top, width, height = bbox
        right = left + width
        bottom = top + height

        # Unknown 카테고리 무시
        if category == 'Unknown':
            continue

        # 바운딩 박스 크롭
        cropped_image = image.crop((left, top, right, bottom))

        # 상태에 따른 폴더 결정
        if status == 'normal':
            save_dir = os.path.join(output_dir, f'{category}_normal')
        elif status == 'abnormal':
            save_dir = os.path.join(output_dir, f'{category}_abnormal')
        else:
            continue  # status가 정상 또는 비정상이 아닌 경우 무시

        # 저장 경로 설정
        save_path = os.path.join(save_dir, f'{category}_{os.path.splitext(os.path.basename(json_file_path))[0]}_{idx}.jpg')
        cropped_image.save(save_path)
        print(f'Saved cropped image to {save_path}')

# 카테고리 정보 추출
sample_json_file = glob.glob(os.path.join(base_json_dir, '*.json'))[0]
with open(sample_json_file, 'r', encoding='utf-8') as f:
    data = json.load(f)
categories = {cat['id']: cat['name'] for cat in data['categories']}

# 폴더 생성
create_folders(categories, output_dir)

# 상위 폴더 내의 모든 JSON 파일 처리
json_files = glob.glob(os.path.join(base_json_dir, '*.json'))

for json_file in json_files:
    image_file_name = os.path.basename(json_file).replace('.json', '.jpg')
    image_file_path = os.path.join(base_image_dir, image_file_name)

    if os.path.exists(image_file_path):
        process_json_file(json_file, image_file_path, categories, output_dir)
    else:
        print(f'Image file {image_file_path} does not exist.')

print('Cropping and saving images from all JSON files completed.')


[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
Saved cropped image to /content/drive/My Drive/Deeplearning/Teamproject/cropped_images/tie_normal/tie_일반철도_220831_광명8_2552850_2.jpg
Saved cropped image to /content/drive/My Drive/Deeplearning/Teamproject/cropped_images/tie_normal/tie_일반철도_220831_광명8_2552850_3.jpg
Saved cropped image to /content/drive/My Drive/Deeplearning/Teamproject/cropped_images/pandrol_e-clip_normal/pandrol_e-clip_일반철도_220831_광명8_2552850_4.jpg
Saved cropped image to /content/drive/My Drive/Deeplearning/Teamproject/cropped_images/pandrol_e-clip_normal/pandrol_e-clip_일반철도_220831_광명8_2552850_5.jpg
Saved cropped image to /content/drive/My Drive/Deeplearning/Teamproject/cropped_images/pandrol_e-clip_normal/pandrol_e-clip_일반철도_220831_광명8_2552850_6.jpg
Saved cropped image to /content/drive/My Drive/Deeplearning/Teamproject/cropped_images/pandrol_e-clip_normal/pandrol_e-clip_일반철도_220831_광명8_2552850_7.jpg
Save

ValueError: cannot write empty image as JPEG

In [None]:
from google.colab import drive
import os
from PIL import Image, ImageOps

# Google Drive 마운트
drive.mount('/content/gdrive')

def pad_image(image, size):
    # 이미지를 원하는 크기로 패딩합니다
    return ImageOps.pad(image, size, method=Image.BICUBIC, color=(0, 0, 0), centering=(0.5, 0.5))

def process_directory(directory, output_directory, size):
    if not os.path.exists(directory):
        print(f"Directory not found: {directory}")
        return

    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    processed_files = set(os.listdir(output_directory))
    processed_count = 0

    for filename in os.listdir(directory):
        if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp')) and filename not in processed_files:
            img_path = os.path.join(directory, filename)
            try:
                img = Image.open(img_path)
                padded_img = pad_image(img, size)
                padded_img.save(os.path.join(output_directory, filename))
                processed_count += 1
            except Exception as e:
                print(f"Error processing file {filename}: {e}")

    return processed_count

# 디렉토리와 크기 정의
normal_path = '/content/gdrive/MyDrive/DL/project/Teamproject/cropped_images/tie_normal'
abnormal_path = '/content/gdrive/MyDrive/DL/project/Teamproject/cropped_images/tie_abnormal'

normal_output_path = '/content/gdrive/MyDrive/DL/project/Teamproject/padded_images/tie_normal'
abnormal_output_path = '/content/gdrive/MyDrive/DL/project/Teamproject/padded_images/tie_abnormal'

desired_size = (224, 224)  # 추천 사이즈

# 경로 존재 여부 확인 및 경로 출력
print(f"Normal Path: {normal_path}, Exists: {os.path.exists(normal_path)}")
print(f"Abnormal Path: {abnormal_path}, Exists: {os.path.exists(abnormal_path)}")
print(f"Normal Output Path: {normal_output_path}, Exists: {os.path.exists(normal_output_path)}")
print(f"Abnormal Output Path: {abnormal_output_path}, Exists: {os.path.exists(abnormal_output_path)}")

# 디렉토리 처리
normal_processed = process_directory(normal_path, normal_output_path, desired_size)
abnormal_processed = process_directory(abnormal_path, abnormal_output_path, desired_size)

print('Normal images padded and saved:', normal_processed)
print('Abnormal images padded and saved:', abnormal_processed)


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Normal Path: /content/gdrive/MyDrive/DL/project/Teamproject/cropped_images/tie_normal, Exists: True
Abnormal Path: /content/gdrive/MyDrive/DL/project/Teamproject/cropped_images/tie_abnormal, Exists: True
Normal Output Path: /content/gdrive/MyDrive/DL/project/Teamproject/padded_images/tie_normal, Exists: True
Abnormal Output Path: /content/gdrive/MyDrive/DL/project/Teamproject/padded_images/tie_abnormal, Exists: True
