In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

def precise_error_edge_detection(gray_image):
    sobelx = cv2.Sobel(gray_image, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(gray_image, cv2.CV_64F, 0, 1, ksize=3)
    
    magnitude = np.sqrt(sobelx**2 + sobely**2)
    
    threshold = np.mean(magnitude) + 2 * np.std(magnitude)
    binary_edges = (magnitude > threshold).astype(np.uint8) * 255
    
    return binary_edges

def preprocess_image(image):
    # 1. Grayscale 변환
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 2. Gaussian Blur (노이즈 감소)
    blurred = cv2.GaussianBlur(gray, (7, 7), 1)
    
    # 3. Bilateral Filter (엣지 보존)
    filtered = cv2.bilateralFilter(blurred, 20, 90, 130)
    
    # 4. CLAHE (대비 개선)
    clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(4, 4))
    enhanced = clahe.apply(filtered)
    
    # 5. Precise Error Edge Detection
    edges = precise_error_edge_detection(enhanced)
    return gray, blurred, filtered, enhanced, edges

def process_and_save_images(input_folder, output_folder):
    # 출력 폴더들 생성
    stages = ['gray', 'blur', 'filter', 'enhance', 'edge']
    output_folders = {}
    for stage in stages:
        stage_folder = os.path.join(output_folder, stage)
        Path(stage_folder).mkdir(parents=True, exist_ok=True)
        output_folders[stage] = stage_folder
    
    # BMP 파일 목록 가져오기
    image_files = [f for f in os.listdir(input_folder) if f.lower().endswith('.bmp')]
    
    processed_images = []
    print(f"\n총 처리할 이미지 개수: {len(image_files)}")
    print("\n이미지 처리 및 저장 시작...")
    print("-" * 50)
    
    for i, filename in enumerate(image_files, 1):
        input_path = os.path.join(input_folder, filename)
        
        # 이미지 읽기
        image = cv2.imread(input_path)
        
        if image is not None:
            # 이미지 전처리
            gray, blurred, filtered, enhanced, edges = preprocess_image(image)
            
            # 각 단계별 이미지 저장
            stages_images = {
                'gray': gray,
                'blur': blurred,
                'filter': filtered,
                'enhance': enhanced,
                'edge': edges
            }
            
            for stage, img in stages_images.items():
                output_path = os.path.join(output_folders[stage], f'{stage}_{filename}')
                cv2.imwrite(output_path, img)
            
            # 로그 출력
            print(f"[{i}/{len(image_files)}] 처리완료: {filename}")
            print(f"    원본크기: {image.shape}")
            for stage in stages:
                stage_path = os.path.join(output_folders[stage], f'{stage}_{filename}')
                print(f"    {stage} 저장위치: {stage_path}")
                print(f"    {stage} 파일크기: {os.path.getsize(stage_path)/1024:.1f}KB")
            print("-" * 50)
            
            # 시각화를 위해 모든 단계 이미지 저장
            processed_images.append({
                'original': image,
                'gray': gray,
                'blur': blurred,
                'filter': filtered,
                'enhance': enhanced,
                'edge': edges,
                'filename': filename
            })
        else:
            print(f"[!] 에러: {filename} 파일을 읽을 수 없습니다.")
            print("-" * 50)
    
    print("\n처리 완료 요약:")
    print(f"성공적으로 처리된 이미지: {len(processed_images)}/{len(image_files)}")
    print(f"저장 위치: {output_folder}")
    
    return processed_images

def visualize_image_pairs(processed_images):
    n_images = len(processed_images)
    n_cols = 6  # 한 줄에 6개 이미지
    n_rows = n_images * 2  # 각 이미지마다 2줄 사용
    
    plt.figure(figsize=(20, 5 * n_rows))
    
    for idx, img_data in enumerate(processed_images):
        base_idx = idx * 12  # 각 이미지는 12개의 subplot 위치 사용
        
        # 첫 번째 줄
        plt.subplot(n_rows, n_cols, base_idx + 1)
        plt.imshow(cv2.cvtColor(img_data['original'], cv2.COLOR_BGR2RGB))
        plt.title(f"Original - {img_data['filename']}")
        plt.axis('off')
        
        plt.subplot(n_rows, n_cols, base_idx + 2)
        plt.imshow(img_data['gray'], cmap='gray')
        plt.title('Grayscale')
        plt.axis('off')
        
        plt.subplot(n_rows, n_cols, base_idx + 3)
        plt.imshow(img_data['blur'], cmap='gray')
        plt.title('Blurred')
        plt.axis('off')
        
        # 두 번째 줄
        plt.subplot(n_rows, n_cols, base_idx + 7)
        plt.imshow(img_data['filter'], cmap='gray')
        plt.title('Filtered')
        plt.axis('off')
        
        plt.subplot(n_rows, n_cols, base_idx + 8)
        plt.imshow(img_data['enhance'], cmap='gray')
        plt.title('Enhanced')
        plt.axis('off')
        
        plt.subplot(n_rows, n_cols, base_idx + 9)
        plt.imshow(img_data['edge'], cmap='gray')
        plt.title('Edges')
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()

# 실행 코드
input_folder = '/home/ec2-user/SageMaker/data/Final_Data/OK/'
output_folder = '/home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/'

# 이미지 처리 및 저장
processed_images = process_and_save_images(input_folder, output_folder)

# 처리된 첫 5개 이미지만 시각화
visualize_image_pairs(processed_images[:5])


총 처리할 이미지 개수: 4386

이미지 처리 및 저장 시작...
--------------------------------------------------
[1/4386] 처리완료: 1701.bmp
    원본크기: (1403, 1435, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_1701.bmp
    gray 파일크기: 1968.5KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_1701.bmp
    blur 파일크기: 1968.5KB
    filter 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/filter/filter_1701.bmp
    filter 파일크기: 1968.5KB
    enhance 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/enhance/enhance_1701.bmp
    enhance 파일크기: 1968.5KB
    edge 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/edge/edge_1701.bmp
    edge 파일크기: 1968.5KB
--------------------------------------------------
[2/4386] 처리완료: 1464.bmp
    원본크기: (1430, 1416, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_1464.bmp
    gray 파일크기: 1978.5KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Fina

[13/4386] 처리완료: 1641.bmp
    원본크기: (1439, 1403, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_1641.bmp
    gray 파일크기: 1974.1KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_1641.bmp
    blur 파일크기: 1974.1KB
    filter 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/filter/filter_1641.bmp
    filter 파일크기: 1974.1KB
    enhance 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/enhance/enhance_1641.bmp
    enhance 파일크기: 1974.1KB
    edge 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/edge/edge_1641.bmp
    edge 파일크기: 1974.1KB
--------------------------------------------------
[14/4386] 처리완료: 614.bmp
    원본크기: (1461, 1453, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_614.bmp
    gray 파일크기: 2078.4KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_614.bmp
    blur 파일크기: 2078.4KB
    filter 저장위치: /home/ec

[25/4386] 처리완료: 3248.bmp
    원본크기: (1451, 1397, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_3248.bmp
    gray 파일크기: 1984.8KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_3248.bmp
    blur 파일크기: 1984.8KB
    filter 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/filter/filter_3248.bmp
    filter 파일크기: 1984.8KB
    enhance 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/enhance/enhance_3248.bmp
    enhance 파일크기: 1984.8KB
    edge 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/edge/edge_3248.bmp
    edge 파일크기: 1984.8KB
--------------------------------------------------
[26/4386] 처리완료: 885.bmp
    원본크기: (1465, 1490, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_885.bmp
    gray 파일크기: 2135.6KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_885.bmp
    blur 파일크기: 2135.6KB
    filter 저장위치: /home/ec

[37/4386] 처리완료: 4712.bmp
    원본크기: (1418, 1386, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_4712.bmp
    gray 파일크기: 1923.1KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_4712.bmp
    blur 파일크기: 1923.1KB
    filter 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/filter/filter_4712.bmp
    filter 파일크기: 1923.1KB
    enhance 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/enhance/enhance_4712.bmp
    enhance 파일크기: 1923.1KB
    edge 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/edge/edge_4712.bmp
    edge 파일크기: 1923.1KB
--------------------------------------------------
[38/4386] 처리완료: 2300.bmp
    원본크기: (1404, 1414, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_2300.bmp
    gray 파일크기: 1942.5KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_2300.bmp
    blur 파일크기: 1942.5KB
    filter 저장위치: /home

[50/4386] 처리완료: 1037.bmp
    원본크기: (1431, 1472, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_1037.bmp
    gray 파일크기: 2058.1KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_1037.bmp
    blur 파일크기: 2058.1KB
    filter 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/filter/filter_1037.bmp
    filter 파일크기: 2058.1KB
    enhance 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/enhance/enhance_1037.bmp
    enhance 파일크기: 2058.1KB
    edge 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/edge/edge_1037.bmp
    edge 파일크기: 2058.1KB
--------------------------------------------------
[51/4386] 처리완료: 1473.bmp
    원본크기: (1389, 1401, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_1473.bmp
    gray 파일크기: 1905.5KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_1473.bmp
    blur 파일크기: 1905.5KB
    filter 저장위치: /home

[62/4386] 처리완료: 3221.bmp
    원본크기: (1396, 1424, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_3221.bmp
    gray 파일크기: 1942.4KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_3221.bmp
    blur 파일크기: 1942.4KB
    filter 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/filter/filter_3221.bmp
    filter 파일크기: 1942.4KB
    enhance 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/enhance/enhance_3221.bmp
    enhance 파일크기: 1942.4KB
    edge 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/edge/edge_3221.bmp
    edge 파일크기: 1942.4KB
--------------------------------------------------
[63/4386] 처리완료: 2169.bmp
    원본크기: (1407, 1406, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_2169.bmp
    gray 파일크기: 1935.7KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_2169.bmp
    blur 파일크기: 1935.7KB
    filter 저장위치: /home

[74/4386] 처리완료: 430.bmp
    원본크기: (1399, 1418, 3)
    gray 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/gray/gray_430.bmp
    gray 파일크기: 1941.1KB
    blur 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/blur/blur_430.bmp
    blur 파일크기: 1941.1KB
    filter 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/filter/filter_430.bmp
    filter 파일크기: 1941.1KB
    enhance 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/enhance/enhance_430.bmp
    enhance 파일크기: 1941.1KB
    edge 저장위치: /home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/edge/edge_430.bmp
    edge 파일크기: 1941.1KB
--------------------------------------------------


In [None]:
import os
import shutil

# 소스 폴더와 대상 폴더 경로
src_folder = '/home/ec2-user/SageMaker/data/edge_NG_156'
dst_folder = '/home/ec2-user/SageMaker/data/edge_NG_143'

# 복사할 파일 리스트
files_to_copy = [
    'edge_971.bmp', 'edge_4439.bmp', 'edge_4447.bmp', 'edge_2659.bmp', 
    'edge_2667.bmp', 'edge_2675.bmp', 'edge_883.bmp', 'edge_4563.bmp',
    'edge_4875.bmp', 'edge_1755.bmp', 'edge_4987.bmp', 'edge_2642.bmp',
    'edge_4683.bmp', 'edge_4748.bmp', 'edge_4597.bmp', 'edge_4588.bmp',
    'edge_4322.bmp', 'edge_4377.bmp', 'edge_4383.bmp', 'edge_4345.bmp',
    'edge_4343.bmp', 'edge_4612.bmp', 'edge_4347.bmp', 'edge_4362.bmp',
    'edge_3940.bmp', 'edge_4306.bmp', 'edge_4483.bmp', 'edge_4491.bmp'
]

# 파일 복사
copied_count = 0
for filename in files_to_copy:
    src_path = os.path.join(src_folder, filename)
    dst_path = os.path.join(dst_folder, filename)
    if os.path.exists(src_path):
        shutil.copy2(src_path, dst_path)
        print(f"복사됨: {filename}")
        copied_count += 1
    else:
        print(f"파일을 찾을 수 없음: {filename}")

print(f"\n복사 완료! 총 {copied_count}개 파일이 복사되었습니다.")

In [None]:
from skimage.feature import graycomatrix, graycoprops
from sklearn.manifold import TSNE

def load_images_from_folder(folder_path):
    """폴더에서 이미지를 로드합니다."""
    images = []
    valid_extensions = ['.bmp', '.jpg', '.jpeg', '.png']
    
    for filename in os.listdir(folder_path):
        if any(filename.lower().endswith(ext) for ext in valid_extensions):
            image_path = os.path.join(folder_path, filename)
            img = cv2.imread(image_path)
            if img is not None:
                images.append({
                    'image': img,
                    'filename': filename
                })
    
    return images

def visualize_plots(images, predictions, probabilities, filenames):
    """결과 시각화 함수"""
    # 클래스별 예측 개수 시각화
    plt.figure(figsize=(12, 5))
    
    # 1. 클래스별 분포 그래프
    plt.subplot(121)
    classes = ['dent', 'torn', 'bubble', 'foreignsub']
    class_counts = [np.sum(predictions == i) for i in range(len(classes))]
    
    plt.bar(classes, class_counts)
    plt.title('Distribution of Predicted Classes')
    plt.ylabel('Number of Images')
    plt.xticks(rotation=45)
    
    # 2. t-SNE를 사용한 군집 시각화
    plt.subplot(122)
    tsne = TSNE(n_components=2, random_state=42)
    features_2d = tsne.fit_transform(probabilities)
    
    colors = ['red', 'blue', 'green', 'purple']
    for i, class_name in enumerate(classes):
        mask = predictions == i
        plt.scatter(features_2d[mask, 0], features_2d[mask, 1], 
                   c=colors[i], label=class_name, alpha=0.6)
    
    plt.title('Clustering Visualization')
    plt.legend()
    plt.tight_layout()
    plt.show()
    
    return {
        'class_distribution': dict(zip(classes, class_counts)),
        'clustering': features_2d
    }




def visualize_results(images, predictions, probabilities, filenames):
    """결과를 시각화합니다. 한 줄에 6개의 이미지를 표시합니다."""
    categories = ['dent', 'torn', 'bubble', 'foreignsub']
    n_images = len(images)
    n_cols = 6
    n_rows = math.ceil(n_images / n_cols)
    
    # 결과를 저장할 리스트 생성
    results = []
    
    # 이미지 그리드 생성
    fig = plt.figure(figsize=(20, 3*n_rows))
    for i, img_data in enumerate(images):
        plt.subplot(n_rows, n_cols, i + 1)
        plt.imshow(img_data['image'], cmap='gray')
        
        # 예측 결과 및 신뢰도
        pred_category = categories[predictions[i]]
        prob = probabilities[i][predictions[i]] * 100
        
        # 결과 저장
        results.append({
            'file': filenames[i],
            'prediction': pred_category,
            'confidence': prob
        })
        
        # 타이틀에 파일명과 예측 결과 표시
        plt.title(f"{filenames[i]}\n{pred_category}\n{prob:.1f}%", 
                 fontsize=8, pad=5)
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # 예측 결과 출력
    print("\n예측 결과:")
    for r in results:
        print(f"File: {r['file']:<20} Prediction: {r['prediction']:<12} Confidence: {r['confidence']:.1f}%")
    
    return results

In [None]:
import os
import shutil
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.semi_supervised import LabelPropagation
from sklearn.metrics.pairwise import rbf_kernel
import cv2
import math
from skimage.feature import graycomatrix, graycoprops
from sklearn.manifold import TSNE

def create_training_data(images, defect_images):
    """학습 데이터를 생성합니다."""
    features = []
    labels = []
    filenames = []
    categories = {'dent': 0, 'torn': 1, 'bubble': 2, 'foreignsub': 3}
    
    # 레이블이 있는 데이터와 없는 데이터 구분
    labeled_files = {d['file'].lower(): d['category'] for d in defect_images}
    
    labeled_count = 0
    for img_data in images:
        features.append(extract_features(img_data['image']))
        filenames.append(img_data['filename'])
        
        # 파일명을 소문자로 변환하여 비교
        if img_data['filename'].lower() in labeled_files:
            labels.append(categories[labeled_files[img_data['filename'].lower()]])
            labeled_count += 1
        else:
            labels.append(-1)
    
    print(f"Found {labeled_count} labeled images out of {len(images)} total images")
    
    if labeled_count == 0:
        raise ValueError("No labeled data found! Please check if the image filenames match with defect_images.")
    
    return np.array(features), np.array(labels), filenames

def extract_features(image):
    """특징 추출 함수 개선"""
    features = []
    
    # 1. 엣지 검출 강화
    edges = cv2.Canny(image, 50, 150)
    edge_density = np.mean(edges) / 255.0
    
    # 2. 로컬 컨트라스트 특징
    local_std = np.std(image.astype(float))
    
    # 3. 텍스처 특징 추출
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
    glcm = graycomatrix(gray, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4])
    contrast = graycoprops(glcm, 'contrast')[0]
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0]
    
    # 4. 블롭 검출
    params = cv2.SimpleBlobDetector_Params()
    params.minThreshold = 10
    params.maxThreshold = 200
    params.filterByArea = True
    params.minArea = 20
    detector = cv2.SimpleBlobDetector_create(params)
    keypoints = detector.detect(gray)
    blob_count = len(keypoints)
    
    # 5. 엣지 방향성 분석
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    edge_angles = np.arctan2(sobely, sobelx)
    angle_hist, _ = np.histogram(edge_angles, bins=8, range=(-np.pi, np.pi))
    
    # 특징 벡터 구성
    features.extend([
        edge_density,
        local_std,
        *contrast,
        *dissimilarity,
        blob_count,
        *angle_hist
    ])
    
    return np.array(features)

def train_semi_supervised(features, labels):
    """반지도 학습 모델 개선"""
    # 데이터 증강
    augmented_features = []
    augmented_labels = []
    for i, (feat, label) in enumerate(zip(features, labels)):
        if label != -1:  # 레이블이 있는 데이터만 증강
            noise = np.random.normal(0, 0.1, feat.shape)
            augmented_features.append(feat + noise)
            augmented_labels.append(label)
    
    features = np.vstack([features, augmented_features])
    labels = np.concatenate([labels, augmented_labels])
    
    # 모델 파라미터 조정
    model = LabelPropagation(
        kernel='rbf',
        gamma=3,  # gamma 값 조정
        max_iter=2000,
        tol=1e-6
    )
    
    # 모델 학습
    model.fit(features, labels)
    
    # 예측 및 확률 계산
    predictions = model.predict(features)
    probabilities = model.predict_proba(features)
    
    return predictions, probabilities


def compute_class_weights(labels):
    """클래스 불균형 처리를 위한 가중치 계산"""
    unique_labels = np.unique(labels[labels != -1])
    class_counts = np.array([np.sum(labels == label) for label in unique_labels])
    weights = 1.0 / class_counts
    weights = weights / np.sum(weights) * len(unique_labels)
    return dict(zip(unique_labels, weights))

def main(folder_path, defect_images):
    """메인 실행 함수"""
    print("Loading images...")
    images = load_images_from_folder(folder_path)
    
    print(f"Loaded {len(images)} images")
    print("\nImage filenames in folder:")
    for img in images[:5]:  # 처음 5개 파일명만 출력
        print(f"- {img['filename']}")
    
    print("\nLabeled images we're looking for:")
    for defect in defect_images:
        print(f"- {defect['file']} ({defect['category']})")
    
    print("\nExtracting features...")
    features, labels, filenames = create_training_data(images, defect_images)
    
    print("\nStandardizing features...")
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)
    
    print("Reducing dimensionality...")
    pca = PCA(n_components=min(20, scaled_features.shape[1]))
    reduced_features = pca.fit_transform(scaled_features)
    print(f"Reduced features shape: {reduced_features.shape}")
    
    print("\nTraining semi-supervised model...")
    predictions, probabilities = train_semi_supervised(reduced_features, labels)
    
    print("Visualizing plots...")
    results = visualize_plots(images, predictions, probabilities, filenames)
    
    print("Visualizing results...")
    results = visualize_results(images, predictions, probabilities, filenames)
    
    return results


if __name__ == "__main__":
    folder_path = '/home/ec2-user/SageMaker/data/edge_NG_143_add_28'
    defect_images = [
    # dent 예시 이미지들
    {'file': 'edge_830.bmp', 'category': 'dent'},
    {'file': 'edge_733.bmp', 'category': 'dent'},
    {'file': 'edge_4447.bmp', 'category': 'dent'},
    {'file': 'edge_2659.bmp', 'category': 'dent'},
    {'file': 'edge_725.bmp', 'category': 'dent'},
    {'file': 'edge_2675.bmp', 'category': 'dent'},
    
    # torn 예시 이미지들
    {'file': 'edge_997.bmp', 'category': 'torn'},
    {'file': 'edge_1102.bmp', 'category': 'torn'},
    {'file': 'edge_988.bmp', 'category': 'torn'},
    {'file': 'edge_856.bmp', 'category': 'torn'},
    {'file': 'edge_4987.bmp', 'category': 'torn'},
    {'file': 'edge_2642.bmp', 'category': 'torn'},
    {'file': 'edge_4683.bmp', 'category': 'torn'},
    
    # bubble 예시 이미지들
    {'file': 'edge_4748.bmp', 'category': 'bubble'},
    {'file': 'edge_4597.bmp', 'category': 'bubble'},
    {'file': 'edge_4588.bmp', 'category': 'bubble'},
    {'file': 'edge_4322.bmp', 'category': 'bubble'},
    {'file': 'edge_4377.bmp', 'category': 'bubble'},
    {'file': 'edge_4383.bmp', 'category': 'bubble'},
    {'file': 'edge_4345.bmp', 'category': 'bubble'},
    {'file': 'edge_4343.bmp', 'category': 'bubble'},
    {'file': 'edge_4612.bmp', 'category': 'bubble'},
    {'file': 'edge_4347.bmp', 'category': 'bubble'},
    {'file': 'edge_4362.bmp', 'category': 'bubble'},
    
    # foreignsub 예시 이미지들
    {'file': 'edge_3940.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4306.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4483.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4491.bmp', 'category': 'foreignsub'},
]
    main(folder_path, defect_images)

In [None]:
import pandas as pd
import re

# Create lists to store the data
files = []
predictions = []

# Get the actual data from the document_content
data = '''File: edge_4362.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_1172.bmp        Prediction: bubble       Confidence: 71.5%
File: edge_4825.bmp        Prediction: bubble       Confidence: 60.2%
File: edge_997.bmp         Prediction: torn         Confidence: 99.6%
File: edge_856.bmp         Prediction: torn         Confidence: 100.0%
File: edge_4937.bmp        Prediction: bubble       Confidence: 50.5%
File: edge_4923.bmp        Prediction: bubble       Confidence: 62.9%
File: edge_2675.bmp        Prediction: dent         Confidence: 100.0%
File: edge_4860.bmp        Prediction: bubble       Confidence: 68.3%
File: edge_4880.bmp        Prediction: bubble       Confidence: 55.6%
File: edge_725.bmp         Prediction: dent         Confidence: 96.7%
File: edge_4936.bmp        Prediction: bubble       Confidence: 54.0%
File: edge_998.bmp         Prediction: torn         Confidence: 80.0%
File: edge_4848.bmp        Prediction: bubble       Confidence: 60.7%
File: edge_988.bmp         Prediction: torn         Confidence: 94.6%
File: edge_4862.bmp        Prediction: bubble       Confidence: 60.5%
File: edge_4985.bmp        Prediction: bubble       Confidence: 65.3%
File: edge_4861.bmp        Prediction: bubble       Confidence: 63.9%
File: edge_4845.bmp        Prediction: bubble       Confidence: 60.4%
File: edge_4946.bmp        Prediction: bubble       Confidence: 49.7%
File: edge_886.bmp         Prediction: bubble       Confidence: 78.0%
File: edge_4892.bmp        Prediction: bubble       Confidence: 53.7%
File: edge_5194.bmp        Prediction: torn         Confidence: 61.1%
File: edge_4859.bmp        Prediction: torn         Confidence: 42.5%
File: edge_757.bmp         Prediction: torn         Confidence: 66.5%
File: edge_4563.bmp        Prediction: bubble       Confidence: 47.5%
File: edge_4905.bmp        Prediction: bubble       Confidence: 59.0%
File: edge_4894.bmp        Prediction: bubble       Confidence: 55.0%
File: edge_4906.bmp        Prediction: bubble       Confidence: 69.5%
File: edge_996.bmp         Prediction: bubble       Confidence: 66.9%
File: edge_4840.bmp        Prediction: bubble       Confidence: 60.0%
File: edge_4855.bmp        Prediction: bubble       Confidence: 62.7%
File: edge_4931.bmp        Prediction: bubble       Confidence: 60.2%
File: edge_4929.bmp        Prediction: bubble       Confidence: 58.7%
File: edge_4832.bmp        Prediction: bubble       Confidence: 59.8%
File: edge_4844.bmp        Prediction: bubble       Confidence: 60.3%
File: edge_4893.bmp        Prediction: bubble       Confidence: 53.7%
File: edge_830.bmp         Prediction: dent         Confidence: 99.8%
File: edge_4748.bmp        Prediction: bubble       Confidence: 97.9%
File: edge_4947.bmp        Prediction: bubble       Confidence: 55.5%
File: edge_4841.bmp        Prediction: bubble       Confidence: 59.0%
File: edge_4913.bmp        Prediction: bubble       Confidence: 58.3%
File: edge_4907.bmp        Prediction: torn         Confidence: 40.7%
File: edge_4976.bmp        Prediction: bubble       Confidence: 59.8%
File: edge_4920.bmp        Prediction: bubble       Confidence: 73.4%
File: edge_4872.bmp        Prediction: bubble       Confidence: 64.5%
File: edge_4824.bmp        Prediction: bubble       Confidence: 79.7%
File: edge_4987.bmp        Prediction: torn         Confidence: 94.5%
File: edge_4930.bmp        Prediction: bubble       Confidence: 49.5%
File: edge_4483.bmp        Prediction: foreignsub   Confidence: 88.5%
File: edge_4306.bmp        Prediction: foreignsub   Confidence: 99.8%
File: edge_4865.bmp        Prediction: bubble       Confidence: 58.0%
File: edge_4597.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_763.bmp         Prediction: dent         Confidence: 90.1%
File: edge_4839.bmp        Prediction: bubble       Confidence: 67.0%
File: edge_4899.bmp        Prediction: bubble       Confidence: 67.9%
File: edge_4857.bmp        Prediction: bubble       Confidence: 60.4%
File: edge_4875.bmp        Prediction: bubble       Confidence: 95.8%
File: edge_772.bmp         Prediction: dent         Confidence: 54.8%
File: edge_4863.bmp        Prediction: bubble       Confidence: 62.8%
File: edge_4928.bmp        Prediction: bubble       Confidence: 50.8%
File: edge_4383.bmp        Prediction: bubble       Confidence: 99.0%
File: edge_733.bmp         Prediction: dent         Confidence: 95.3%
File: edge_4889.bmp        Prediction: bubble       Confidence: 70.1%
File: edge_3940.bmp        Prediction: foreignsub   Confidence: 92.5%
File: edge_4831.bmp        Prediction: bubble       Confidence: 60.9%
File: edge_4810.bmp        Prediction: bubble       Confidence: 63.6%
File: edge_4888.bmp        Prediction: bubble       Confidence: 55.8%
File: edge_5970.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_971.bmp         Prediction: bubble       Confidence: 54.1%
File: edge_4927.bmp        Prediction: bubble       Confidence: 57.4%
File: edge_4925.bmp        Prediction: bubble       Confidence: 47.4%
File: edge_4912.bmp        Prediction: bubble       Confidence: 57.6%
File: edge_4868.bmp        Prediction: bubble       Confidence: 59.8%
File: edge_748.bmp         Prediction: torn         Confidence: 65.7%
File: edge_4897.bmp        Prediction: bubble       Confidence: 72.5%
File: edge_4876.bmp        Prediction: bubble       Confidence: 55.4%
File: edge_4816.bmp        Prediction: bubble       Confidence: 81.2%
File: edge_848.bmp         Prediction: torn         Confidence: 77.3%
File: edge_4871.bmp        Prediction: bubble       Confidence: 70.4%
File: edge_4838.bmp        Prediction: bubble       Confidence: 93.0%
File: edge_4903.bmp        Prediction: bubble       Confidence: 71.5%
File: edge_4886.bmp        Prediction: bubble       Confidence: 72.5%
File: edge_5020.bmp        Prediction: bubble       Confidence: 65.0%
File: edge_4864.bmp        Prediction: bubble       Confidence: 63.8%
File: edge_4377.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_4878.bmp        Prediction: bubble       Confidence: 60.4%
File: edge_4322.bmp        Prediction: bubble       Confidence: 96.9%
File: edge_4885.bmp        Prediction: bubble       Confidence: 66.5%
File: edge_4898.bmp        Prediction: bubble       Confidence: 71.5%
File: edge_1755.bmp        Prediction: bubble       Confidence: 49.5%
File: edge_5021.bmp        Prediction: bubble       Confidence: 69.1%
File: edge_4343.bmp        Prediction: bubble       Confidence: 93.7%
File: edge_4822.bmp        Prediction: bubble       Confidence: 64.2%
File: edge_4058.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_4345.bmp        Prediction: bubble       Confidence: 95.3%
File: edge_1094.bmp        Prediction: torn         Confidence: 73.8%
File: edge_4879.bmp        Prediction: foreignsub   Confidence: 50.1%
File: edge_4945.bmp        Prediction: bubble       Confidence: 55.4%
File: edge_4833.bmp        Prediction: bubble       Confidence: 60.4%
File: edge_4447.bmp        Prediction: dent         Confidence: 100.0%
File: edge_1102.bmp        Prediction: torn         Confidence: 100.0%
File: edge_4881.bmp        Prediction: bubble       Confidence: 56.3%
File: edge_4911.bmp        Prediction: bubble       Confidence: 56.5%
File: edge_4846.bmp        Prediction: bubble       Confidence: 60.5%
File: edge_719.bmp         Prediction: bubble       Confidence: 34.7%
File: edge_2659.bmp        Prediction: dent         Confidence: 81.2%
File: edge_4874.bmp        Prediction: bubble       Confidence: 82.1%
File: edge_1116.bmp        Prediction: torn         Confidence: 73.9%
File: edge_1164.bmp        Prediction: bubble       Confidence: 55.8%
File: edge_4491.bmp        Prediction: foreignsub   Confidence: 96.9%
File: edge_4922.bmp        Prediction: torn         Confidence: 43.4%
File: edge_4918.bmp        Prediction: bubble       Confidence: 81.9%
File: edge_2667.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_887.bmp         Prediction: torn         Confidence: 78.6%
File: edge_4873.bmp        Prediction: torn         Confidence: 47.1%
File: edge_4938.bmp        Prediction: foreignsub   Confidence: 33.6%
File: edge_4877.bmp        Prediction: bubble       Confidence: 58.3%
File: edge_4870.bmp        Prediction: bubble       Confidence: 60.5%
File: edge_4837.bmp        Prediction: bubble       Confidence: 60.2%
File: edge_1148.bmp        Prediction: bubble       Confidence: 46.6%
File: edge_4683.bmp        Prediction: torn         Confidence: 96.7%
File: edge_756.bmp         Prediction: foreignsub   Confidence: 70.4%
File: edge_4883.bmp        Prediction: bubble       Confidence: 55.5%
File: edge_1140.bmp        Prediction: torn         Confidence: 99.9%
File: edge_973.bmp         Prediction: bubble       Confidence: 100.0%
File: edge_4828.bmp        Prediction: bubble       Confidence: 73.2%
File: edge_831.bmp         Prediction: bubble       Confidence: 84.9%
File: edge_4919.bmp        Prediction: bubble       Confidence: 54.2%
File: edge_1027.bmp        Prediction: dent         Confidence: 45.5%
File: edge_4829.bmp        Prediction: bubble       Confidence: 64.0%
File: edge_4887.bmp        Prediction: bubble       Confidence: 68.3%
File: edge_2642.bmp        Prediction: torn         Confidence: 94.7%
File: edge_4347.bmp        Prediction: bubble       Confidence: 96.8%
File: edge_2758.bmp        Prediction: torn         Confidence: 60.8%
File: edge_4926.bmp        Prediction: torn         Confidence: 49.2%
File: edge_4821.bmp        Prediction: torn         Confidence: 64.0%
File: edge_4830.bmp        Prediction: bubble       Confidence: 60.7%
File: edge_2759.bmp        Prediction: bubble       Confidence: 67.6%
File: edge_4856.bmp        Prediction: bubble       Confidence: 57.1%
File: edge_4954.bmp        Prediction: bubble       Confidence: 56.0%
File: edge_4439.bmp        Prediction: bubble       Confidence: 81.6%
File: edge_4849.bmp        Prediction: bubble       Confidence: 59.6%
File: edge_4915.bmp        Prediction: bubble       Confidence: 62.1%
File: edge_4814.bmp        Prediction: bubble       Confidence: 58.3%
File: edge_4823.bmp        Prediction: bubble       Confidence: 61.3%
File: edge_1500.bmp        Prediction: torn         Confidence: 70.5%
File: edge_4921.bmp        Prediction: bubble       Confidence: 61.9%
File: edge_778.bmp         Prediction: bubble       Confidence: 53.4%
File: edge_4847.bmp        Prediction: bubble       Confidence: 60.3%
File: edge_4818.bmp        Prediction: bubble       Confidence: 68.9%
File: edge_4853.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_4896.bmp        Prediction: bubble       Confidence: 77.8%
File: edge_4948.bmp        Prediction: bubble       Confidence: 51.7%
File: edge_6070.bmp        Prediction: bubble       Confidence: 79.5%
File: edge_808.bmp         Prediction: bubble       Confidence: 62.2%
File: edge_4588.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_4836.bmp        Prediction: bubble       Confidence: 85.6%
File: edge_1028.bmp        Prediction: torn         Confidence: 99.4%
File: edge_4852.bmp        Prediction: bubble       Confidence: 65.8%
File: edge_1179.bmp        Prediction: dent         Confidence: 55.6%
File: edge_4904.bmp        Prediction: bubble       Confidence: 60.3%
File: edge_734.bmp         Prediction: dent         Confidence: 97.7%
File: edge_4826.bmp        Prediction: bubble       Confidence: 64.0%
File: edge_883.bmp         Prediction: bubble       Confidence: 64.6%
File: edge_4910.bmp        Prediction: bubble       Confidence: 85.8%
File: edge_4895.bmp        Prediction: bubble       Confidence: 66.7%
File: edge_4854.bmp        Prediction: bubble       Confidence: 89.2%
File: edge_4869.bmp        Prediction: bubble       Confidence: 70.3%
File: edge_4612.bmp        Prediction: bubble       Confidence: 97.9%'''  # 전체 데이터를 여기에 넣어주세요

# Process each line
for line in data.strip().split('\n'):
    if line:  # 빈 줄 건너뛰기
        try:
            # Extract number from filename using regex and prediction
            number = re.search(r'edge_(\d+)\.bmp', line).group(1)
            prediction = line.split('Prediction:')[1].split('Confidence:')[0].strip()
            
            # Append to lists
            files.append(number)
            predictions.append(prediction)
        except AttributeError:
            print(f"Skipping problematic line: {line}")
            continue

# Create DataFrame
df = pd.DataFrame({
    'number': files,
    'prediction': predictions
})

# Save to CSV without index
df.to_csv('143+28_predictions.csv', index=False)

# Display first few rows without index
print("Data preview:")
print(df.to_string(index=False))

In [None]:
import os
import shutil

# 소스 폴더와 대상 폴더 경로
src_folder = '/home/ec2-user/SageMaker/data/NG'
dst_folder = '/home/ec2-user/SageMaker/data/NG_v2'

# 복사할 파일 리스트
files_to_copy = [
    '4483.bmp',
    '4306.bmp',
    '4383.bmp',
    '3940.bmp',
    '971.bmp',
    '4377.bmp',
    '4322.bmp',
    '4491.bmp',
    '2667.bmp',
    '4683.bmp',
    '2642.bmp',
    '4347.bmp',
    '4439.bmp',
    '4588.bmp',
    '883.bmp',
    '4612.bmp',
    '4362.bmp'
]

# 파일 복사
copied_count = 0
for filename in files_to_copy:
    src_path = os.path.join(src_folder, filename)
    dst_path = os.path.join(dst_folder, filename)
    if os.path.exists(src_path):
        shutil.copy2(src_path, dst_path)
        print(f"복사됨: {filename}")
        copied_count += 1
    else:
        print(f"파일을 찾을 수 없음: {filename}")

print(f"\n복사 완료! 총 {copied_count}개 파일이 복사되었습니다.")

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.semi_supervised import LabelPropagation
from sklearn.metrics.pairwise import rbf_kernel
import cv2
import math
from skimage.feature import graycomatrix, graycoprops
from sklearn.manifold import TSNE

def load_images_from_folder(folder_path):
    """폴더에서 이미지를 로드합니다."""
    images = []
    valid_extensions = ['.bmp', '.jpg', '.jpeg', '.png']
    
    for filename in os.listdir(folder_path):
        if any(filename.lower().endswith(ext) for ext in valid_extensions):
            image_path = os.path.join(folder_path, filename)
            img = cv2.imread(image_path)
            if img is not None:
                images.append({
                    'image': img,
                    'filename': filename
                })
    
    return images

def create_training_data(images, defect_images):
    """학습 데이터를 생성합니다."""
    features = []
    labels = []
    filenames = []
    categories = {'dent': 0, 'torn': 1, 'bubble': 2, 'foreignsub': 3}
    
    # 레이블이 있는 데이터와 없는 데이터 구분
    labeled_files = {d['file'].lower(): d['category'] for d in defect_images}
    
    labeled_count = 0
    for img_data in images:
        features.append(extract_features(img_data['image']))
        filenames.append(img_data['filename'])
        
        # 파일명을 소문자로 변환하여 비교
        if img_data['filename'].lower() in labeled_files:
            labels.append(categories[labeled_files[img_data['filename'].lower()]])
            labeled_count += 1
        else:
            labels.append(-1)
    
    print(f"Found {labeled_count} labeled images out of {len(images)} total images")
    
    if labeled_count == 0:
        raise ValueError("No labeled data found! Please check if the image filenames match with defect_images.")
    
    return np.array(features), np.array(labels), filenames

def extract_features(image):
    """특징 추출 함수"""
    features = []
    
    # 1. 엣지 검출 강화
    edges = cv2.Canny(image, 50, 150)
    edge_density = np.mean(edges) / 255.0
    
    # 2. 로컬 컨트라스트 특징
    local_std = np.std(image.astype(float))
    
    # 3. 텍스처 특징 추출
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
    glcm = graycomatrix(gray, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4])
    contrast = graycoprops(glcm, 'contrast')[0]
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0]
    
    # 4. 블롭 검출
    params = cv2.SimpleBlobDetector_Params()
    params.minThreshold = 10
    params.maxThreshold = 200
    params.filterByArea = True
    params.minArea = 20
    detector = cv2.SimpleBlobDetector_create(params)
    keypoints = detector.detect(gray)
    blob_count = len(keypoints)
    
    # 5. 엣지 방향성 분석
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    edge_angles = np.arctan2(sobely, sobelx)
    angle_hist, _ = np.histogram(edge_angles, bins=8, range=(-np.pi, np.pi))
    
    # 특징 벡터 구성
    features.extend([
        edge_density,
        local_std,
        *contrast,
        *dissimilarity,
        blob_count,
        *angle_hist
    ])
    
    return np.array(features)

def train_semi_supervised(features, labels):
    """반지도 학습 모델"""
    # 원본 feature 크기 저장
    original_size = len(features)
    
    # 데이터 증강
    augmented_features = []
    augmented_labels = []
    for i, (feat, label) in enumerate(zip(features, labels)):
        if label != -1:  # 레이블이 있는 데이터만 증강
            noise = np.random.normal(0, 0.1, feat.shape)
            augmented_features.append(feat + noise)
            augmented_labels.append(label)
    
    # 학습용 데이터 구성
    train_features = np.vstack([features, augmented_features])
    train_labels = np.concatenate([labels, augmented_labels])
    
    # 모델 파라미터 조정
    model = LabelPropagation(
        kernel='rbf',
        gamma=3,
        max_iter=2000,
        tol=1e-6
    )
    
    # 모델 학습
    model.fit(train_features, train_labels)
    
    # 원본 데이터에 대해서만 예측
    predictions = model.predict(features)
    probabilities = model.predict_proba(features)
    
    return predictions, probabilities

def compute_class_weights(labels):
    """클래스 불균형 처리를 위한 가중치 계산"""
    unique_labels = np.unique(labels[labels != -1])
    class_counts = np.array([np.sum(labels == label) for label in unique_labels])
    weights = 1.0 / class_counts
    weights = weights / np.sum(weights) * len(unique_labels)
    return dict(zip(unique_labels, weights))

def visualize_results(images, predictions, probabilities, filenames, original_folder_path='/home/ec2-user/SageMaker/sein/NG_2'):
    """불량품 탐지 결과 시각화"""
    # 카테고리 매핑
    categories = {0: 'dent', 1: 'torn', 2: 'bubble', 3:'foreignsub' }
    
    # 결과를 저장할 리스트
    results = []
    
    # 각 이미지에 대해 결과 시각화
    for idx, (image, pred, prob, filename) in enumerate(zip(images, predictions, probabilities, filenames)):
        if pred != -1:  # 예측된 이미지만 처리
            # 원본 이미지 경로 (edge_ 접두사 제거)
            original_filename = filename.replace('edge_', '')
            original_path = os.path.join(original_folder_path, original_filename)
            
            # 원본 이미지 로드
            original_img = cv2.imread(original_path)
            if original_img is None:
                print(f"Warning: Cannot load original image {original_filename}")
                continue
                
            # 결과 시각화를 위한 figure 생성
            plt.figure(figsize=(12, 6))
            
            # 원본 이미지 표시
            plt.subplot(1, 2, 1)
            plt.imshow(cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB))
            plt.title('Original Image')
            plt.axis('off')
            
            # 엣지 처리된 이미지 표시
            plt.subplot(1, 2, 2)
            plt.imshow(cv2.cvtColor(image['image'], cv2.COLOR_BGR2RGB))
            
            # 예측 결과 및 확률 표시
            category = categories[pred]
            confidence = prob[pred] * 100
            plt.title(f'Detected: {category}\nConfidence: {confidence:.2f}%')
            plt.axis('off')
            
            # 결과 저장
            results.append({
                'filename': original_filename,
                'category': category,
                'confidence': confidence
            })
            
            plt.tight_layout()
            plt.show()
            plt.close()
            
            # 상세 정보 출력
            print(f"\nResults for {original_filename}:")
            print(f"Detected defect: {category}")
            print(f"Confidence: {confidence:.2f}%")
            print("-" * 50)
    
    return results

def main(edge_folder_path, original_folder_path, defect_images):
    """메인 실행 함수"""
    print("Loading images...")
    images = load_images_from_folder(edge_folder_path)
    
    print(f"Loaded {len(images)} images")
    print("\nImage filenames in folder:")
    for img in images[:5]:
        print(f"- {img['filename']}")
    
    print("\nLabeled images we're looking for:")
    for defect in defect_images:
        print(f"- {defect['file']} ({defect['category']})")
    
    print("\nExtracting features...")
    features, labels, filenames = create_training_data(images, defect_images)
    
    print("\nStandardizing features...")
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)
    
    print("Reducing dimensionality...")
    pca = PCA(n_components=min(20, scaled_features.shape[1]))
    reduced_features = pca.fit_transform(scaled_features)
    print(f"Reduced features shape: {reduced_features.shape}")
    
    print("\nTraining semi-supervised model...")
    predictions, probabilities = train_semi_supervised(reduced_features, labels)
    
    print("\nVisualizing results...")
    results = visualize_results(images, predictions, probabilities, filenames, original_folder_path)
    
    return results

if __name__ == "__main__":
    edge_folder_path = '/home/ec2-user/SageMaker/data/edge_NG_143_add_28'
    original_folder_path = '/home/ec2-user/SageMaker/data/NG_v2'
    
    defect_images = [
    # dent 예시 이미지들
    {'file': 'edge_830.bmp', 'category': 'dent'},
    {'file': 'edge_733.bmp', 'category': 'dent'},
    {'file': 'edge_4447.bmp', 'category': 'dent'},
    {'file': 'edge_2659.bmp', 'category': 'dent'},
    {'file': 'edge_725.bmp', 'category': 'dent'},
    {'file': 'edge_2675.bmp', 'category': 'dent'},
    
    # torn 예시 이미지들
    {'file': 'edge_997.bmp', 'category': 'torn'},
    {'file': 'edge_1102.bmp', 'category': 'torn'},
    {'file': 'edge_988.bmp', 'category': 'torn'},
    {'file': 'edge_856.bmp', 'category': 'torn'},
    {'file': 'edge_4987.bmp', 'category': 'torn'},
    {'file': 'edge_2642.bmp', 'category': 'torn'},
    {'file': 'edge_4683.bmp', 'category': 'torn'},
    
    # bubble 예시 이미지들
    {'file': 'edge_4748.bmp', 'category': 'bubble'},
    {'file': 'edge_4597.bmp', 'category': 'bubble'},
    {'file': 'edge_4588.bmp', 'category': 'bubble'},
    {'file': 'edge_4322.bmp', 'category': 'bubble'},
    {'file': 'edge_4377.bmp', 'category': 'bubble'},
    {'file': 'edge_4383.bmp', 'category': 'bubble'},
    {'file': 'edge_4345.bmp', 'category': 'bubble'},
    {'file': 'edge_4343.bmp', 'category': 'bubble'},
    {'file': 'edge_4612.bmp', 'category': 'bubble'},
    {'file': 'edge_4347.bmp', 'category': 'bubble'},
    {'file': 'edge_4362.bmp', 'category': 'bubble'},
    
    # foreignsub 예시 이미지들
    {'file': 'edge_3940.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4306.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4483.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4491.bmp', 'category': 'foreignsub'},
]
    
    results = main(edge_folder_path, original_folder_path, defect_images)