In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

def precise_error_edge_detection(gray_image):
    sobelx = cv2.Sobel(gray_image, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(gray_image, cv2.CV_64F, 0, 1, ksize=3)
    
    magnitude = np.sqrt(sobelx**2 + sobely**2)
    
    threshold = np.mean(magnitude) + 2 * np.std(magnitude)
    binary_edges = (magnitude > threshold).astype(np.uint8) * 255
    
    return binary_edges

def preprocess_image(image):
    # 1. Grayscale 변환
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 2. Gaussian Blur (노이즈 감소)
    blurred = cv2.GaussianBlur(gray, (7, 7), 1)
    
    # 3. Bilateral Filter (엣지 보존)
    filtered = cv2.bilateralFilter(blurred, 20, 90, 130)
    
    # 4. CLAHE (대비 개선)
    clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(4, 4))
    enhanced = clahe.apply(filtered)
    
    # 5. Precise Error Edge Detection
    edges = precise_error_edge_detection(enhanced)
    return edges

def process_and_save_images(input_folder, output_folder):
    # 출력 폴더 생성
    Path(output_folder).mkdir(parents=True, exist_ok=True)
    
    # BMP 파일 목록 가져오기
    image_files = [f for f in os.listdir(input_folder) if f.lower().endswith('.bmp')]
    
    print(f"총 처리할 이미지 개수: {len(image_files)}")
    
    for i, filename in enumerate(image_files, 1):
        input_path = os.path.join(input_folder, filename)
        image = cv2.imread(input_path)
        
        if image is not None:
            # 이미지 전처리
            edges = preprocess_image(image)
            
            # 최종 결과 저장
            output_path = os.path.join(output_folder, f'edge_{filename}')
            cv2.imwrite(output_path, edges)
            
            # 진행 상태 출력
            print(f"[{i}/{len(image_files)}] 처리완료: {filename}")
        else:
            print(f"[!] 에러: {filename} 파일을 읽을 수 없습니다.")
    
    print("모든 이미지 처리가 완료되었습니다.")

# 실행 코드
input_folder = '/home/ec2-user/SageMaker/data/Final_Data/OK/'
output_folder = '/home/ec2-user/SageMaker/data/Final_Data/OK_processed_v2/'

# 이미지 처리 및 저장
processed_images = process_and_save_images(input_folder, output_folder)



총 처리할 이미지 개수: 4386
[1/4386] 처리완료: 1701.bmp
[2/4386] 처리완료: 1464.bmp
[3/4386] 처리완료: 1529.bmp
[4/4386] 처리완료: 4428.bmp
[5/4386] 처리완료: 3931.bmp
[6/4386] 처리완료: 1397.bmp
[7/4386] 처리완료: 4897.bmp
[8/4386] 처리완료: 1158.bmp
[9/4386] 처리완료: 4668.bmp
[10/4386] 처리완료: 3496.bmp
[11/4386] 처리완료: 563.bmp
[12/4386] 처리완료: 4140.bmp
[13/4386] 처리완료: 1641.bmp
[14/4386] 처리완료: 614.bmp
[15/4386] 처리완료: 3421.bmp
[16/4386] 처리완료: 4173.bmp
[17/4386] 처리완료: 396.bmp
[18/4386] 처리완료: 1637.bmp
[19/4386] 처리완료: 3352.bmp
[20/4386] 처리완료: 4030.bmp
[21/4386] 처리완료: 4762.bmp
[22/4386] 처리완료: 3552.bmp
[23/4386] 처리완료: 3807.bmp
[24/4386] 처리완료: 4689.bmp
[25/4386] 처리완료: 3248.bmp
[26/4386] 처리완료: 885.bmp
[27/4386] 처리완료: 2081.bmp
[28/4386] 처리완료: 2799.bmp
[29/4386] 처리완료: 4949.bmp
[30/4386] 처리완료: 273.bmp
[31/4386] 처리완료: 1794.bmp
[32/4386] 처리완료: 4208.bmp
[33/4386] 처리완료: 4437.bmp
[34/4386] 처리완료: 3120.bmp
[35/4386] 처리완료: 3810.bmp
[36/4386] 처리완료: 3761.bmp
[37/4386] 처리완료: 4712.bmp
[38/4386] 처리완료: 2300.bmp
[39/4386] 처리완료: 167.bmp
[40/4386] 처리완료: 1989.

[323/4386] 처리완료: 4038.bmp
[324/4386] 처리완료: 2005.bmp
[325/4386] 처리완료: 2516.bmp
[326/4386] 처리완료: 1543.bmp
[327/4386] 처리완료: 224.bmp
[328/4386] 처리완료: 2440.bmp
[329/4386] 처리완료: 4637.bmp
[330/4386] 처리완료: 4537.bmp
[331/4386] 처리완료: 574.bmp
[332/4386] 처리완료: 632.bmp
[333/4386] 처리완료: 2082.bmp
[334/4386] 처리완료: 5091.bmp
[335/4386] 처리완료: 4774.bmp
[336/4386] 처리완료: 4639.bmp
[337/4386] 처리완료: 2751.bmp
[338/4386] 처리완료: 1438.bmp
[339/4386] 처리완료: 3992.bmp
[340/4386] 처리완료: 3855.bmp
[341/4386] 처리완료: 1462.bmp
[342/4386] 처리완료: 1972.bmp
[343/4386] 처리완료: 4210.bmp
[344/4386] 처리완료: 1518.bmp
[345/4386] 처리완료: 597.bmp
[346/4386] 처리완료: 551.bmp
[347/4386] 처리완료: 376.bmp
[348/4386] 처리완료: 4147.bmp
[349/4386] 처리완료: 2277.bmp
[350/4386] 처리완료: 905.bmp
[351/4386] 처리완료: 1376.bmp
[352/4386] 처리완료: 1086.bmp
[353/4386] 처리완료: 65.bmp
[354/4386] 처리완료: 3055.bmp
[355/4386] 처리완료: 4468.bmp
[356/4386] 처리완료: 4616.bmp
[357/4386] 처리완료: 2638.bmp
[358/4386] 처리완료: 4405.bmp
[359/4386] 처리완료: 4583.bmp
[360/4386] 처리완료: 5085.bmp
[361/4386] 처리완료: 5076

[642/4386] 처리완료: 831.bmp
[643/4386] 처리완료: 3391.bmp
[644/4386] 처리완료: 1673.bmp
[645/4386] 처리완료: 3836.bmp
[646/4386] 처리완료: 20.bmp
[647/4386] 처리완료: 1654.bmp
[648/4386] 처리완료: 1722.bmp
[649/4386] 처리완료: 1456.bmp
[650/4386] 처리완료: 5037.bmp
[651/4386] 처리완료: 3881.bmp
[652/4386] 처리완료: 2421.bmp
[653/4386] 처리완료: 4487.bmp
[654/4386] 처리완료: 1130.bmp
[655/4386] 처리완료: 1231.bmp
[656/4386] 처리완료: 4976.bmp
[657/4386] 처리완료: 2744.bmp
[658/4386] 처리완료: 3174.bmp
[659/4386] 처리완료: 1574.bmp
[660/4386] 처리완료: 1790.bmp
[661/4386] 처리완료: 1974.bmp
[662/4386] 처리완료: 2528.bmp
[663/4386] 처리완료: 35.bmp
[664/4386] 처리완료: 1351.bmp
[665/4386] 처리완료: 3101.bmp
[666/4386] 처리완료: 2006.bmp
[667/4386] 처리완료: 3941.bmp
[668/4386] 처리완료: 2730.bmp
[669/4386] 처리완료: 3631.bmp
[670/4386] 처리완료: 1764.bmp
[671/4386] 처리완료: 2468.bmp
[672/4386] 처리완료: 1193.bmp
[673/4386] 처리완료: 2557.bmp
[674/4386] 처리완료: 1746.bmp
[675/4386] 처리완료: 1097.bmp
[676/4386] 처리완료: 5065.bmp
[677/4386] 처리완료: 3562.bmp
[678/4386] 처리완료: 1630.bmp
[679/4386] 처리완료: 1956.bmp
[680/4386] 처리완료: 

[960/4386] 처리완료: 2089.bmp
[961/4386] 처리완료: 2879.bmp
[962/4386] 처리완료: 3993.bmp
[963/4386] 처리완료: 1890.bmp
[964/4386] 처리완료: 2166.bmp
[965/4386] 처리완료: 3305.bmp
[966/4386] 처리완료: 1853.bmp
[967/4386] 처리완료: 4141.bmp
[968/4386] 처리완료: 3744.bmp
[969/4386] 처리완료: 4024.bmp
[970/4386] 처리완료: 4910.bmp
[971/4386] 처리완료: 1244.bmp
[972/4386] 처리완료: 1567.bmp
[973/4386] 처리완료: 2340.bmp
[974/4386] 처리완료: 286.bmp
[975/4386] 처리완료: 564.bmp
[976/4386] 처리완료: 1092.bmp
[977/4386] 처리완료: 2791.bmp
[978/4386] 처리완료: 2258.bmp
[979/4386] 처리완료: 2413.bmp
[980/4386] 처리완료: 1924.bmp
[981/4386] 처리완료: 1207.bmp
[982/4386] 처리완료: 4644.bmp
[983/4386] 처리완료: 3546.bmp
[984/4386] 처리완료: 3658.bmp
[985/4386] 처리완료: 1815.bmp
[986/4386] 처리완료: 4152.bmp
[987/4386] 처리완료: 5025.bmp
[988/4386] 처리완료: 541.bmp
[989/4386] 처리완료: 1292.bmp
[990/4386] 처리완료: 434.bmp
[991/4386] 처리완료: 4917.bmp
[992/4386] 처리완료: 4857.bmp
[993/4386] 처리완료: 693.bmp
[994/4386] 처리완료: 4294.bmp
[995/4386] 처리완료: 1906.bmp
[996/4386] 처리완료: 369.bmp
[997/4386] 처리완료: 3106.bmp
[998/4386] 처리완료: 2

[1268/4386] 처리완료: 1136.bmp
[1269/4386] 처리완료: 2334.bmp
[1270/4386] 처리완료: 800.bmp
[1271/4386] 처리완료: 3777.bmp
[1272/4386] 처리완료: 2680.bmp
[1273/4386] 처리완료: 4274.bmp
[1274/4386] 처리완료: 4107.bmp
[1275/4386] 처리완료: 4510.bmp
[1276/4386] 처리완료: 370.bmp
[1277/4386] 처리완료: 1216.bmp
[1278/4386] 처리완료: 1004.bmp
[1279/4386] 처리완료: 1548.bmp
[1280/4386] 처리완료: 950.bmp
[1281/4386] 처리완료: 4935.bmp
[1282/4386] 처리완료: 3123.bmp
[1283/4386] 처리완료: 2453.bmp
[1284/4386] 처리완료: 4212.bmp
[1285/4386] 처리완료: 438.bmp
[1286/4386] 처리완료: 4723.bmp
[1287/4386] 처리완료: 3534.bmp
[1288/4386] 처리완료: 858.bmp
[1289/4386] 처리완료: 2986.bmp
[1290/4386] 처리완료: 3540.bmp
[1291/4386] 처리완료: 882.bmp
[1292/4386] 처리완료: 2968.bmp
[1293/4386] 처리완료: 940.bmp
[1294/4386] 처리완료: 847.bmp
[1295/4386] 처리완료: 2540.bmp
[1296/4386] 처리완료: 4410.bmp
[1297/4386] 처리완료: 5022.bmp
[1298/4386] 처리완료: 2285.bmp
[1299/4386] 처리완료: 3427.bmp
[1300/4386] 처리완료: 384.bmp
[1301/4386] 처리완료: 4076.bmp
[1302/4386] 처리완료: 1070.bmp
[1303/4386] 처리완료: 2573.bmp
[1304/4386] 처리완료: 1410.bmp
[1305/4386

[1575/4386] 처리완료: 928.bmp
[1576/4386] 처리완료: 1391.bmp
[1577/4386] 처리완료: 3896.bmp
[1578/4386] 처리완료: 303.bmp
[1579/4386] 처리완료: 3776.bmp
[1580/4386] 처리완료: 1881.bmp
[1581/4386] 처리완료: 3521.bmp
[1582/4386] 처리완료: 1168.bmp
[1583/4386] 처리완료: 1936.bmp
[1584/4386] 처리완료: 4777.bmp
[1585/4386] 처리완료: 3722.bmp
[1586/4386] 처리완료: 3922.bmp
[1587/4386] 처리완료: 2296.bmp
[1588/4386] 처리완료: 1486.bmp
[1589/4386] 처리완료: 2344.bmp
[1590/4386] 처리완료: 2813.bmp
[1591/4386] 처리완료: 36.bmp
[1592/4386] 처리완료: 1248.bmp
[1593/4386] 처리완료: 4481.bmp
[1594/4386] 처리완료: 1855.bmp
[1595/4386] 처리완료: 1125.bmp
[1596/4386] 처리완료: 3648.bmp
[1597/4386] 처리완료: 2997.bmp
[1598/4386] 처리완료: 4416.bmp
[1599/4386] 처리완료: 1952.bmp
[1600/4386] 처리완료: 5048.bmp
[1601/4386] 처리완료: 1008.bmp
[1602/4386] 처리완료: 600.bmp
[1603/4386] 처리완료: 2374.bmp
[1604/4386] 처리완료: 2781.bmp
[1605/4386] 처리완료: 1965.bmp
[1606/4386] 처리완료: 2071.bmp
[1607/4386] 처리완료: 311.bmp
[1608/4386] 처리완료: 2522.bmp
[1609/4386] 처리완료: 1300.bmp
[1610/4386] 처리완료: 1970.bmp
[1611/4386] 처리완료: 2304.bmp
[1612/4

[1881/4386] 처리완료: 3247.bmp
[1882/4386] 처리완료: 4180.bmp
[1883/4386] 처리완료: 2847.bmp
[1884/4386] 처리완료: 1476.bmp
[1885/4386] 처리완료: 4297.bmp
[1886/4386] 처리완료: 4300.bmp
[1887/4386] 처리완료: 5028.bmp
[1888/4386] 처리완료: 4219.bmp
[1889/4386] 처리완료: 2664.bmp
[1890/4386] 처리완료: 4198.bmp
[1891/4386] 처리완료: 550.bmp
[1892/4386] 처리완료: 788.bmp
[1893/4386] 처리완료: 2701.bmp
[1894/4386] 처리완료: 2131.bmp
[1895/4386] 처리완료: 697.bmp
[1896/4386] 처리완료: 1326.bmp
[1897/4386] 처리완료: 1492.bmp
[1898/4386] 처리완료: 1005.bmp
[1899/4386] 처리완료: 414.bmp
[1900/4386] 처리완료: 333.bmp
[1901/4386] 처리완료: 1552.bmp
[1902/4386] 처리완료: 1720.bmp
[1903/4386] 처리완료: 761.bmp
[1904/4386] 처리완료: 2944.bmp
[1905/4386] 처리완료: 2860.bmp
[1906/4386] 처리완료: 4882.bmp
[1907/4386] 처리완료: 3041.bmp
[1908/4386] 처리완료: 1094.bmp
[1909/4386] 처리완료: 4525.bmp
[1910/4386] 처리완료: 2420.bmp
[1911/4386] 처리완료: 4599.bmp
[1912/4386] 처리완료: 3850.bmp
[1913/4386] 처리완료: 4190.bmp
[1914/4386] 처리완료: 3343.bmp
[1915/4386] 처리완료: 485.bmp
[1916/4386] 처리완료: 3738.bmp
[1917/4386] 처리완료: 22.bmp
[1918/4386

[2187/4386] 처리완료: 1061.bmp
[2188/4386] 처리완료: 821.bmp
[2189/4386] 처리완료: 1181.bmp
[2190/4386] 처리완료: 3933.bmp
[2191/4386] 처리완료: 4165.bmp
[2192/4386] 처리완료: 1870.bmp
[2193/4386] 처리완료: 2136.bmp
[2194/4386] 처리완료: 4634.bmp
[2195/4386] 처리완료: 1287.bmp
[2196/4386] 처리완료: 3218.bmp
[2197/4386] 처리완료: 1982.bmp
[2198/4386] 처리완료: 462.bmp
[2199/4386] 처리완료: 5019.bmp
[2200/4386] 처리완료: 1586.bmp
[2201/4386] 처리완료: 4614.bmp
[2202/4386] 처리완료: 565.bmp
[2203/4386] 처리완료: 1830.bmp
[2204/4386] 처리완료: 4960.bmp
[2205/4386] 처리완료: 1937.bmp
[2206/4386] 처리완료: 3697.bmp
[2207/4386] 처리완료: 3606.bmp
[2208/4386] 처리완료: 2882.bmp
[2209/4386] 처리완료: 1844.bmp
[2210/4386] 처리완료: 3160.bmp
[2211/4386] 처리완료: 1820.bmp
[2212/4386] 처리완료: 1909.bmp
[2213/4386] 처리완료: 3688.bmp
[2214/4386] 처리완료: 1878.bmp
[2215/4386] 처리완료: 3574.bmp
[2216/4386] 처리완료: 11.bmp
[2217/4386] 처리완료: 2940.bmp
[2218/4386] 처리완료: 4927.bmp
[2219/4386] 처리완료: 3317.bmp
[2220/4386] 처리완료: 2837.bmp
[2221/4386] 처리완료: 657.bmp
[2222/4386] 처리완료: 223.bmp
[2223/4386] 처리완료: 5097.bmp
[2224/43

[2494/4386] 처리완료: 2148.bmp
[2495/4386] 처리완료: 45.bmp
[2496/4386] 처리완료: 3171.bmp
[2497/4386] 처리완료: 4815.bmp
[2498/4386] 처리완료: 5050.bmp
[2499/4386] 처리완료: 3492.bmp
[2500/4386] 처리완료: 1386.bmp
[2501/4386] 처리완료: 3032.bmp
[2502/4386] 처리완료: 1441.bmp
[2503/4386] 처리완료: 1224.bmp
[2504/4386] 처리완료: 1063.bmp
[2505/4386] 처리완료: 2682.bmp
[2506/4386] 처리완료: 4545.bmp
[2507/4386] 처리완료: 1642.bmp
[2508/4386] 처리완료: 2921.bmp
[2509/4386] 처리완료: 4044.bmp
[2510/4386] 처리완료: 2476.bmp
[2511/4386] 처리완료: 1258.bmp
[2512/4386] 처리완료: 4950.bmp
[2513/4386] 처리완료: 2670.bmp
[2514/4386] 처리완료: 1672.bmp
[2515/4386] 처리완료: 1917.bmp
[2516/4386] 처리완료: 4707.bmp
[2517/4386] 처리완료: 4315.bmp
[2518/4386] 처리완료: 412.bmp
[2519/4386] 처리완료: 4635.bmp
[2520/4386] 처리완료: 1839.bmp
[2521/4386] 처리완료: 3134.bmp
[2522/4386] 처리완료: 2993.bmp
[2523/4386] 처리완료: 3469.bmp
[2524/4386] 처리완료: 727.bmp
[2525/4386] 처리완료: 4734.bmp
[2526/4386] 처리완료: 1296.bmp
[2527/4386] 처리완료: 3964.bmp
[2528/4386] 처리완료: 2214.bmp
[2529/4386] 처리완료: 1199.bmp
[2530/4386] 처리완료: 1401.bmp
[2531

[2802/4386] 처리완료: 348.bmp
[2803/4386] 처리완료: 1717.bmp
[2804/4386] 처리완료: 3231.bmp
[2805/4386] 처리완료: 790.bmp
[2806/4386] 처리완료: 209.bmp
[2807/4386] 처리완료: 1625.bmp
[2808/4386] 처리완료: 3766.bmp
[2809/4386] 처리완료: 5029.bmp
[2810/4386] 처리완료: 3936.bmp
[2811/4386] 처리완료: 4742.bmp
[2812/4386] 처리완료: 2561.bmp
[2813/4386] 처리완료: 998.bmp
[2814/4386] 처리완료: 1959.bmp
[2815/4386] 처리완료: 4089.bmp
[2816/4386] 처리완료: 2794.bmp
[2817/4386] 처리완료: 1178.bmp
[2818/4386] 처리완료: 4986.bmp
[2819/4386] 처리완료: 4087.bmp
[2820/4386] 처리완료: 425.bmp
[2821/4386] 처리완료: 4189.bmp
[2822/4386] 처리완료: 151.bmp
[2823/4386] 처리완료: 4381.bmp
[2824/4386] 처리완료: 4790.bmp
[2825/4386] 처리완료: 2896.bmp
[2826/4386] 처리완료: 3224.bmp
[2827/4386] 처리완료: 54.bmp
[2828/4386] 처리완료: 4482.bmp
[2829/4386] 처리완료: 1437.bmp
[2830/4386] 처리완료: 465.bmp
[2831/4386] 처리완료: 3081.bmp
[2832/4386] 처리완료: 3366.bmp
[2833/4386] 처리완료: 1062.bmp
[2834/4386] 처리완료: 850.bmp
[2835/4386] 처리완료: 2126.bmp
[2836/4386] 처리완료: 1254.bmp
[2837/4386] 처리완료: 4083.bmp
[2838/4386] 처리완료: 2422.bmp
[2839/4386]

[3108/4386] 처리완료: 2311.bmp
[3109/4386] 처리완료: 511.bmp
[3110/4386] 처리완료: 853.bmp
[3111/4386] 처리완료: 652.bmp
[3112/4386] 처리완료: 4842.bmp
[3113/4386] 처리완료: 1973.bmp
[3114/4386] 처리완료: 1990.bmp
[3115/4386] 처리완료: 352.bmp
[3116/4386] 처리완료: 377.bmp
[3117/4386] 처리완료: 3500.bmp
[3118/4386] 처리완료: 3774.bmp
[3119/4386] 처리완료: 1780.bmp
[3120/4386] 처리완료: 4211.bmp
[3121/4386] 처리완료: 2988.bmp
[3122/4386] 처리완료: 2742.bmp
[3123/4386] 처리완료: 2350.bmp
[3124/4386] 처리완료: 3284.bmp
[3125/4386] 처리완료: 3130.bmp
[3126/4386] 처리완료: 4845.bmp
[3127/4386] 처리완료: 3715.bmp
[3128/4386] 처리완료: 3960.bmp
[3129/4386] 처리완료: 1738.bmp
[3130/4386] 처리완료: 833.bmp
[3131/4386] 처리완료: 3456.bmp
[3132/4386] 처리완료: 447.bmp
[3133/4386] 처리완료: 276.bmp
[3134/4386] 처리완료: 736.bmp
[3135/4386] 처리완료: 2094.bmp
[3136/4386] 처리완료: 580.bmp
[3137/4386] 처리완료: 4713.bmp
[3138/4386] 처리완료: 4213.bmp
[3139/4386] 처리완료: 3581.bmp
[3140/4386] 처리완료: 728.bmp
[3141/4386] 처리완료: 4905.bmp
[3142/4386] 처리완료: 2510.bmp
[3143/4386] 처리완료: 3485.bmp
[3144/4386] 처리완료: 4019.bmp
[3145/4386] 

[3415/4386] 처리완료: 1521.bmp
[3416/4386] 처리완료: 3110.bmp
[3417/4386] 처리완료: 3177.bmp
[3418/4386] 처리완료: 1109.bmp
[3419/4386] 처리완료: 4394.bmp
[3420/4386] 처리완료: 391.bmp
[3421/4386] 처리완료: 2996.bmp
[3422/4386] 처리완료: 2016.bmp
[3423/4386] 처리완료: 4068.bmp
[3424/4386] 처리완료: 2692.bmp
[3425/4386] 처리완료: 1601.bmp
[3426/4386] 처리완료: 2726.bmp
[3427/4386] 처리완료: 713.bmp
[3428/4386] 처리완료: 808.bmp
[3429/4386] 처리완료: 2874.bmp
[3430/4386] 처리완료: 3739.bmp
[3431/4386] 처리완료: 961.bmp
[3432/4386] 처리완료: 2823.bmp
[3433/4386] 처리완료: 1751.bmp
[3434/4386] 처리완료: 110.bmp
[3435/4386] 처리완료: 1591.bmp
[3436/4386] 처리완료: 4951.bmp
[3437/4386] 처리완료: 3626.bmp
[3438/4386] 처리완료: 1688.bmp
[3439/4386] 처리완료: 951.bmp
[3440/4386] 처리완료: 869.bmp
[3441/4386] 처리완료: 5030.bmp
[3442/4386] 처리완료: 2040.bmp
[3443/4386] 처리완료: 724.bmp
[3444/4386] 처리완료: 5071.bmp
[3445/4386] 처리완료: 4090.bmp
[3446/4386] 처리완료: 4721.bmp
[3447/4386] 처리완료: 3794.bmp
[3448/4386] 처리완료: 502.bmp
[3449/4386] 처리완료: 4824.bmp
[3450/4386] 처리완료: 3733.bmp
[3451/4386] 처리완료: 927.bmp
[3452/4386]

[3722/4386] 처리완료: 4057.bmp
[3723/4386] 처리완료: 1501.bmp
[3724/4386] 처리완료: 3897.bmp
[3725/4386] 처리완료: 1805.bmp
[3726/4386] 처리완료: 4476.bmp
[3727/4386] 처리완료: 3047.bmp
[3728/4386] 처리완료: 4181.bmp
[3729/4386] 처리완료: 1324.bmp
[3730/4386] 처리완료: 1342.bmp
[3731/4386] 처리완료: 4436.bmp
[3732/4386] 처리완료: 3775.bmp
[3733/4386] 처리완료: 3568.bmp
[3734/4386] 처리완료: 669.bmp
[3735/4386] 처리완료: 2464.bmp
[3736/4386] 처리완료: 1422.bmp
[3737/4386] 처리완료: 1927.bmp
[3738/4386] 처리완료: 4062.bmp
[3739/4386] 처리완료: 2774.bmp
[3740/4386] 처리완료: 3846.bmp
[3741/4386] 처리완료: 3264.bmp
[3742/4386] 처리완료: 2841.bmp
[3743/4386] 처리완료: 3831.bmp
[3744/4386] 처리완료: 2063.bmp
[3745/4386] 처리완료: 618.bmp
[3746/4386] 처리완료: 1079.bmp
[3747/4386] 처리완료: 2532.bmp
[3748/4386] 처리완료: 318.bmp
[3749/4386] 처리완료: 2509.bmp
[3750/4386] 처리완료: 2060.bmp
[3751/4386] 처리완료: 2594.bmp
[3752/4386] 처리완료: 2056.bmp
[3753/4386] 처리완료: 4690.bmp
[3754/4386] 처리완료: 1424.bmp
[3755/4386] 처리완료: 4401.bmp
[3756/4386] 처리완료: 3982.bmp
[3757/4386] 처리완료: 2967.bmp
[3758/4386] 처리완료: 4618.bmp
[375

[4028/4386] 처리완료: 3133.bmp
[4029/4386] 처리완료: 945.bmp
[4030/4386] 처리완료: 734.bmp
[4031/4386] 처리완료: 252.bmp
[4032/4386] 처리완료: 85.bmp
[4033/4386] 처리완료: 2588.bmp
[4034/4386] 처리완료: 2083.bmp
[4035/4386] 처리완료: 518.bmp
[4036/4386] 처리완료: 3878.bmp
[4037/4386] 처리완료: 137.bmp
[4038/4386] 처리완료: 3590.bmp
[4039/4386] 처리완료: 4235.bmp
[4040/4386] 처리완료: 4415.bmp
[4041/4386] 처리완료: 3526.bmp
[4042/4386] 처리완료: 926.bmp
[4043/4386] 처리완료: 4709.bmp
[4044/4386] 처리완료: 4312.bmp
[4045/4386] 처리완료: 4984.bmp
[4046/4386] 처리완료: 2871.bmp
[4047/4386] 처리완료: 958.bmp
[4048/4386] 처리완료: 4271.bmp
[4049/4386] 처리완료: 1733.bmp
[4050/4386] 처리완료: 3678.bmp
[4051/4386] 처리완료: 4197.bmp
[4052/4386] 처리완료: 1787.bmp
[4053/4386] 처리완료: 1658.bmp
[4054/4386] 처리완료: 3555.bmp
[4055/4386] 처리완료: 889.bmp
[4056/4386] 처리완료: 2433.bmp
[4057/4386] 처리완료: 2206.bmp
[4058/4386] 처리완료: 497.bmp
[4059/4386] 처리완료: 2157.bmp
[4060/4386] 처리완료: 4864.bmp
[4061/4386] 처리완료: 1133.bmp
[4062/4386] 처리완료: 966.bmp
[4063/4386] 처리완료: 3772.bmp
[4064/4386] 처리완료: 4259.bmp
[4065/4386] 처

[4336/4386] 처리완료: 1325.bmp
[4337/4386] 처리완료: 4886.bmp
[4338/4386] 처리완료: 4001.bmp
[4339/4386] 처리완료: 5096.bmp
[4340/4386] 처리완료: 292.bmp
[4341/4386] 처리완료: 3431.bmp
[4342/4386] 처리완료: 3619.bmp
[4343/4386] 처리완료: 4042.bmp
[4344/4386] 처리완료: 4505.bmp
[4345/4386] 처리완료: 4803.bmp
[4346/4386] 처리완료: 1265.bmp
[4347/4386] 처리완료: 43.bmp
[4348/4386] 처리완료: 4974.bmp
[4349/4386] 처리완료: 2194.bmp
[4350/4386] 처리완료: 2762.bmp
[4351/4386] 처리완료: 241.bmp
[4352/4386] 처리완료: 509.bmp
[4353/4386] 처리완료: 2013.bmp
[4354/4386] 처리완료: 364.bmp
[4355/4386] 처리완료: 3616.bmp
[4356/4386] 처리완료: 1775.bmp
[4357/4386] 처리완료: 4159.bmp
[4358/4386] 처리완료: 76.bmp
[4359/4386] 처리완료: 3040.bmp
[4360/4386] 처리완료: 588.bmp
[4361/4386] 처리완료: 609.bmp
[4362/4386] 처리완료: 3015.bmp
[4363/4386] 처리완료: 895.bmp
[4364/4386] 처리완료: 1602.bmp
[4365/4386] 처리완료: 2200.bmp
[4366/4386] 처리완료: 4926.bmp
[4367/4386] 처리완료: 1856.bmp
[4368/4386] 처리완료: 1782.bmp
[4369/4386] 처리완료: 3025.bmp
[4370/4386] 처리완료: 17.bmp
[4371/4386] 처리완료: 599.bmp
[4372/4386] 처리완료: 3949.bmp
[4373/4386] 처리완

In [None]:
import os
import shutil

# 소스 폴더와 대상 폴더 경로
src_folder = '/home/ec2-user/SageMaker/data/edge_NG_156'
dst_folder = '/home/ec2-user/SageMaker/data/edge_NG_143'

# 복사할 파일 리스트
files_to_copy = [
    'edge_971.bmp', 'edge_4439.bmp', 'edge_4447.bmp', 'edge_2659.bmp', 
    'edge_2667.bmp', 'edge_2675.bmp', 'edge_883.bmp', 'edge_4563.bmp',
    'edge_4875.bmp', 'edge_1755.bmp', 'edge_4987.bmp', 'edge_2642.bmp',
    'edge_4683.bmp', 'edge_4748.bmp', 'edge_4597.bmp', 'edge_4588.bmp',
    'edge_4322.bmp', 'edge_4377.bmp', 'edge_4383.bmp', 'edge_4345.bmp',
    'edge_4343.bmp', 'edge_4612.bmp', 'edge_4347.bmp', 'edge_4362.bmp',
    'edge_3940.bmp', 'edge_4306.bmp', 'edge_4483.bmp', 'edge_4491.bmp'
]

# 파일 복사
copied_count = 0
for filename in files_to_copy:
    src_path = os.path.join(src_folder, filename)
    dst_path = os.path.join(dst_folder, filename)
    if os.path.exists(src_path):
        shutil.copy2(src_path, dst_path)
        print(f"복사됨: {filename}")
        copied_count += 1
    else:
        print(f"파일을 찾을 수 없음: {filename}")

print(f"\n복사 완료! 총 {copied_count}개 파일이 복사되었습니다.")

In [None]:
from skimage.feature import graycomatrix, graycoprops
from sklearn.manifold import TSNE

def load_images_from_folder(folder_path):
    """폴더에서 이미지를 로드합니다."""
    images = []
    valid_extensions = ['.bmp', '.jpg', '.jpeg', '.png']
    
    for filename in os.listdir(folder_path):
        if any(filename.lower().endswith(ext) for ext in valid_extensions):
            image_path = os.path.join(folder_path, filename)
            img = cv2.imread(image_path)
            if img is not None:
                images.append({
                    'image': img,
                    'filename': filename
                })
    
    return images

def visualize_plots(images, predictions, probabilities, filenames):
    """결과 시각화 함수"""
    # 클래스별 예측 개수 시각화
    plt.figure(figsize=(12, 5))
    
    # 1. 클래스별 분포 그래프
    plt.subplot(121)
    classes = ['dent', 'torn', 'bubble', 'foreignsub']
    class_counts = [np.sum(predictions == i) for i in range(len(classes))]
    
    plt.bar(classes, class_counts)
    plt.title('Distribution of Predicted Classes')
    plt.ylabel('Number of Images')
    plt.xticks(rotation=45)
    
    # 2. t-SNE를 사용한 군집 시각화
    plt.subplot(122)
    tsne = TSNE(n_components=2, random_state=42)
    features_2d = tsne.fit_transform(probabilities)
    
    colors = ['red', 'blue', 'green', 'purple']
    for i, class_name in enumerate(classes):
        mask = predictions == i
        plt.scatter(features_2d[mask, 0], features_2d[mask, 1], 
                   c=colors[i], label=class_name, alpha=0.6)
    
    plt.title('Clustering Visualization')
    plt.legend()
    plt.tight_layout()
    plt.show()
    
    return {
        'class_distribution': dict(zip(classes, class_counts)),
        'clustering': features_2d
    }




def visualize_results(images, predictions, probabilities, filenames):
    """결과를 시각화합니다. 한 줄에 6개의 이미지를 표시합니다."""
    categories = ['dent', 'torn', 'bubble', 'foreignsub']
    n_images = len(images)
    n_cols = 6
    n_rows = math.ceil(n_images / n_cols)
    
    # 결과를 저장할 리스트 생성
    results = []
    
    # 이미지 그리드 생성
    fig = plt.figure(figsize=(20, 3*n_rows))
    for i, img_data in enumerate(images):
        plt.subplot(n_rows, n_cols, i + 1)
        plt.imshow(img_data['image'], cmap='gray')
        
        # 예측 결과 및 신뢰도
        pred_category = categories[predictions[i]]
        prob = probabilities[i][predictions[i]] * 100
        
        # 결과 저장
        results.append({
            'file': filenames[i],
            'prediction': pred_category,
            'confidence': prob
        })
        
        # 타이틀에 파일명과 예측 결과 표시
        plt.title(f"{filenames[i]}\n{pred_category}\n{prob:.1f}%", 
                 fontsize=8, pad=5)
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # 예측 결과 출력
    print("\n예측 결과:")
    for r in results:
        print(f"File: {r['file']:<20} Prediction: {r['prediction']:<12} Confidence: {r['confidence']:.1f}%")
    
    return results

In [None]:
import os
import shutil
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.semi_supervised import LabelPropagation
from sklearn.metrics.pairwise import rbf_kernel
import cv2
import math
from skimage.feature import graycomatrix, graycoprops
from sklearn.manifold import TSNE

def create_training_data(images, defect_images):
    """학습 데이터를 생성합니다."""
    features = []
    labels = []
    filenames = []
    categories = {'dent': 0, 'torn': 1, 'bubble': 2, 'foreignsub': 3}
    
    # 레이블이 있는 데이터와 없는 데이터 구분
    labeled_files = {d['file'].lower(): d['category'] for d in defect_images}
    
    labeled_count = 0
    for img_data in images:
        features.append(extract_features(img_data['image']))
        filenames.append(img_data['filename'])
        
        # 파일명을 소문자로 변환하여 비교
        if img_data['filename'].lower() in labeled_files:
            labels.append(categories[labeled_files[img_data['filename'].lower()]])
            labeled_count += 1
        else:
            labels.append(-1)
    
    print(f"Found {labeled_count} labeled images out of {len(images)} total images")
    
    if labeled_count == 0:
        raise ValueError("No labeled data found! Please check if the image filenames match with defect_images.")
    
    return np.array(features), np.array(labels), filenames

def extract_features(image):
    """특징 추출 함수 개선"""
    features = []
    
    # 1. 엣지 검출 강화
    edges = cv2.Canny(image, 50, 150)
    edge_density = np.mean(edges) / 255.0
    
    # 2. 로컬 컨트라스트 특징
    local_std = np.std(image.astype(float))
    
    # 3. 텍스처 특징 추출
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
    glcm = graycomatrix(gray, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4])
    contrast = graycoprops(glcm, 'contrast')[0]
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0]
    
    # 4. 블롭 검출
    params = cv2.SimpleBlobDetector_Params()
    params.minThreshold = 10
    params.maxThreshold = 200
    params.filterByArea = True
    params.minArea = 20
    detector = cv2.SimpleBlobDetector_create(params)
    keypoints = detector.detect(gray)
    blob_count = len(keypoints)
    
    # 5. 엣지 방향성 분석
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    edge_angles = np.arctan2(sobely, sobelx)
    angle_hist, _ = np.histogram(edge_angles, bins=8, range=(-np.pi, np.pi))
    
    # 특징 벡터 구성
    features.extend([
        edge_density,
        local_std,
        *contrast,
        *dissimilarity,
        blob_count,
        *angle_hist
    ])
    
    return np.array(features)

def train_semi_supervised(features, labels):
    """반지도 학습 모델 개선"""
    # 데이터 증강
    augmented_features = []
    augmented_labels = []
    for i, (feat, label) in enumerate(zip(features, labels)):
        if label != -1:  # 레이블이 있는 데이터만 증강
            noise = np.random.normal(0, 0.1, feat.shape)
            augmented_features.append(feat + noise)
            augmented_labels.append(label)
    
    features = np.vstack([features, augmented_features])
    labels = np.concatenate([labels, augmented_labels])
    
    # 모델 파라미터 조정
    model = LabelPropagation(
        kernel='rbf',
        gamma=3,  # gamma 값 조정
        max_iter=2000,
        tol=1e-6
    )
    
    # 모델 학습
    model.fit(features, labels)
    
    # 예측 및 확률 계산
    predictions = model.predict(features)
    probabilities = model.predict_proba(features)
    
    return predictions, probabilities


def compute_class_weights(labels):
    """클래스 불균형 처리를 위한 가중치 계산"""
    unique_labels = np.unique(labels[labels != -1])
    class_counts = np.array([np.sum(labels == label) for label in unique_labels])
    weights = 1.0 / class_counts
    weights = weights / np.sum(weights) * len(unique_labels)
    return dict(zip(unique_labels, weights))

def main(folder_path, defect_images):
    """메인 실행 함수"""
    print("Loading images...")
    images = load_images_from_folder(folder_path)
    
    print(f"Loaded {len(images)} images")
    print("\nImage filenames in folder:")
    for img in images[:5]:  # 처음 5개 파일명만 출력
        print(f"- {img['filename']}")
    
    print("\nLabeled images we're looking for:")
    for defect in defect_images:
        print(f"- {defect['file']} ({defect['category']})")
    
    print("\nExtracting features...")
    features, labels, filenames = create_training_data(images, defect_images)
    
    print("\nStandardizing features...")
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)
    
    print("Reducing dimensionality...")
    pca = PCA(n_components=min(20, scaled_features.shape[1]))
    reduced_features = pca.fit_transform(scaled_features)
    print(f"Reduced features shape: {reduced_features.shape}")
    
    print("\nTraining semi-supervised model...")
    predictions, probabilities = train_semi_supervised(reduced_features, labels)
    
    print("Visualizing plots...")
    results = visualize_plots(images, predictions, probabilities, filenames)
    
    print("Visualizing results...")
    results = visualize_results(images, predictions, probabilities, filenames)
    
    return results


if __name__ == "__main__":
    folder_path = '/home/ec2-user/SageMaker/data/edge_NG_143_add_28'
    defect_images = [
    # dent 예시 이미지들
    {'file': 'edge_830.bmp', 'category': 'dent'},
    {'file': 'edge_733.bmp', 'category': 'dent'},
    {'file': 'edge_4447.bmp', 'category': 'dent'},
    {'file': 'edge_2659.bmp', 'category': 'dent'},
    {'file': 'edge_725.bmp', 'category': 'dent'},
    {'file': 'edge_2675.bmp', 'category': 'dent'},
    
    # torn 예시 이미지들
    {'file': 'edge_997.bmp', 'category': 'torn'},
    {'file': 'edge_1102.bmp', 'category': 'torn'},
    {'file': 'edge_988.bmp', 'category': 'torn'},
    {'file': 'edge_856.bmp', 'category': 'torn'},
    {'file': 'edge_4987.bmp', 'category': 'torn'},
    {'file': 'edge_2642.bmp', 'category': 'torn'},
    {'file': 'edge_4683.bmp', 'category': 'torn'},
    
    # bubble 예시 이미지들
    {'file': 'edge_4748.bmp', 'category': 'bubble'},
    {'file': 'edge_4597.bmp', 'category': 'bubble'},
    {'file': 'edge_4588.bmp', 'category': 'bubble'},
    {'file': 'edge_4322.bmp', 'category': 'bubble'},
    {'file': 'edge_4377.bmp', 'category': 'bubble'},
    {'file': 'edge_4383.bmp', 'category': 'bubble'},
    {'file': 'edge_4345.bmp', 'category': 'bubble'},
    {'file': 'edge_4343.bmp', 'category': 'bubble'},
    {'file': 'edge_4612.bmp', 'category': 'bubble'},
    {'file': 'edge_4347.bmp', 'category': 'bubble'},
    {'file': 'edge_4362.bmp', 'category': 'bubble'},
    
    # foreignsub 예시 이미지들
    {'file': 'edge_3940.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4306.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4483.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4491.bmp', 'category': 'foreignsub'},
]
    main(folder_path, defect_images)

In [None]:
import pandas as pd
import re

# Create lists to store the data
files = []
predictions = []

# Get the actual data from the document_content
data = '''File: edge_4362.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_1172.bmp        Prediction: bubble       Confidence: 71.5%
File: edge_4825.bmp        Prediction: bubble       Confidence: 60.2%
File: edge_997.bmp         Prediction: torn         Confidence: 99.6%
File: edge_856.bmp         Prediction: torn         Confidence: 100.0%
File: edge_4937.bmp        Prediction: bubble       Confidence: 50.5%
File: edge_4923.bmp        Prediction: bubble       Confidence: 62.9%
File: edge_2675.bmp        Prediction: dent         Confidence: 100.0%
File: edge_4860.bmp        Prediction: bubble       Confidence: 68.3%
File: edge_4880.bmp        Prediction: bubble       Confidence: 55.6%
File: edge_725.bmp         Prediction: dent         Confidence: 96.7%
File: edge_4936.bmp        Prediction: bubble       Confidence: 54.0%
File: edge_998.bmp         Prediction: torn         Confidence: 80.0%
File: edge_4848.bmp        Prediction: bubble       Confidence: 60.7%
File: edge_988.bmp         Prediction: torn         Confidence: 94.6%
File: edge_4862.bmp        Prediction: bubble       Confidence: 60.5%
File: edge_4985.bmp        Prediction: bubble       Confidence: 65.3%
File: edge_4861.bmp        Prediction: bubble       Confidence: 63.9%
File: edge_4845.bmp        Prediction: bubble       Confidence: 60.4%
File: edge_4946.bmp        Prediction: bubble       Confidence: 49.7%
File: edge_886.bmp         Prediction: bubble       Confidence: 78.0%
File: edge_4892.bmp        Prediction: bubble       Confidence: 53.7%
File: edge_5194.bmp        Prediction: torn         Confidence: 61.1%
File: edge_4859.bmp        Prediction: torn         Confidence: 42.5%
File: edge_757.bmp         Prediction: torn         Confidence: 66.5%
File: edge_4563.bmp        Prediction: bubble       Confidence: 47.5%
File: edge_4905.bmp        Prediction: bubble       Confidence: 59.0%
File: edge_4894.bmp        Prediction: bubble       Confidence: 55.0%
File: edge_4906.bmp        Prediction: bubble       Confidence: 69.5%
File: edge_996.bmp         Prediction: bubble       Confidence: 66.9%
File: edge_4840.bmp        Prediction: bubble       Confidence: 60.0%
File: edge_4855.bmp        Prediction: bubble       Confidence: 62.7%
File: edge_4931.bmp        Prediction: bubble       Confidence: 60.2%
File: edge_4929.bmp        Prediction: bubble       Confidence: 58.7%
File: edge_4832.bmp        Prediction: bubble       Confidence: 59.8%
File: edge_4844.bmp        Prediction: bubble       Confidence: 60.3%
File: edge_4893.bmp        Prediction: bubble       Confidence: 53.7%
File: edge_830.bmp         Prediction: dent         Confidence: 99.8%
File: edge_4748.bmp        Prediction: bubble       Confidence: 97.9%
File: edge_4947.bmp        Prediction: bubble       Confidence: 55.5%
File: edge_4841.bmp        Prediction: bubble       Confidence: 59.0%
File: edge_4913.bmp        Prediction: bubble       Confidence: 58.3%
File: edge_4907.bmp        Prediction: torn         Confidence: 40.7%
File: edge_4976.bmp        Prediction: bubble       Confidence: 59.8%
File: edge_4920.bmp        Prediction: bubble       Confidence: 73.4%
File: edge_4872.bmp        Prediction: bubble       Confidence: 64.5%
File: edge_4824.bmp        Prediction: bubble       Confidence: 79.7%
File: edge_4987.bmp        Prediction: torn         Confidence: 94.5%
File: edge_4930.bmp        Prediction: bubble       Confidence: 49.5%
File: edge_4483.bmp        Prediction: foreignsub   Confidence: 88.5%
File: edge_4306.bmp        Prediction: foreignsub   Confidence: 99.8%
File: edge_4865.bmp        Prediction: bubble       Confidence: 58.0%
File: edge_4597.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_763.bmp         Prediction: dent         Confidence: 90.1%
File: edge_4839.bmp        Prediction: bubble       Confidence: 67.0%
File: edge_4899.bmp        Prediction: bubble       Confidence: 67.9%
File: edge_4857.bmp        Prediction: bubble       Confidence: 60.4%
File: edge_4875.bmp        Prediction: bubble       Confidence: 95.8%
File: edge_772.bmp         Prediction: dent         Confidence: 54.8%
File: edge_4863.bmp        Prediction: bubble       Confidence: 62.8%
File: edge_4928.bmp        Prediction: bubble       Confidence: 50.8%
File: edge_4383.bmp        Prediction: bubble       Confidence: 99.0%
File: edge_733.bmp         Prediction: dent         Confidence: 95.3%
File: edge_4889.bmp        Prediction: bubble       Confidence: 70.1%
File: edge_3940.bmp        Prediction: foreignsub   Confidence: 92.5%
File: edge_4831.bmp        Prediction: bubble       Confidence: 60.9%
File: edge_4810.bmp        Prediction: bubble       Confidence: 63.6%
File: edge_4888.bmp        Prediction: bubble       Confidence: 55.8%
File: edge_5970.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_971.bmp         Prediction: bubble       Confidence: 54.1%
File: edge_4927.bmp        Prediction: bubble       Confidence: 57.4%
File: edge_4925.bmp        Prediction: bubble       Confidence: 47.4%
File: edge_4912.bmp        Prediction: bubble       Confidence: 57.6%
File: edge_4868.bmp        Prediction: bubble       Confidence: 59.8%
File: edge_748.bmp         Prediction: torn         Confidence: 65.7%
File: edge_4897.bmp        Prediction: bubble       Confidence: 72.5%
File: edge_4876.bmp        Prediction: bubble       Confidence: 55.4%
File: edge_4816.bmp        Prediction: bubble       Confidence: 81.2%
File: edge_848.bmp         Prediction: torn         Confidence: 77.3%
File: edge_4871.bmp        Prediction: bubble       Confidence: 70.4%
File: edge_4838.bmp        Prediction: bubble       Confidence: 93.0%
File: edge_4903.bmp        Prediction: bubble       Confidence: 71.5%
File: edge_4886.bmp        Prediction: bubble       Confidence: 72.5%
File: edge_5020.bmp        Prediction: bubble       Confidence: 65.0%
File: edge_4864.bmp        Prediction: bubble       Confidence: 63.8%
File: edge_4377.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_4878.bmp        Prediction: bubble       Confidence: 60.4%
File: edge_4322.bmp        Prediction: bubble       Confidence: 96.9%
File: edge_4885.bmp        Prediction: bubble       Confidence: 66.5%
File: edge_4898.bmp        Prediction: bubble       Confidence: 71.5%
File: edge_1755.bmp        Prediction: bubble       Confidence: 49.5%
File: edge_5021.bmp        Prediction: bubble       Confidence: 69.1%
File: edge_4343.bmp        Prediction: bubble       Confidence: 93.7%
File: edge_4822.bmp        Prediction: bubble       Confidence: 64.2%
File: edge_4058.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_4345.bmp        Prediction: bubble       Confidence: 95.3%
File: edge_1094.bmp        Prediction: torn         Confidence: 73.8%
File: edge_4879.bmp        Prediction: foreignsub   Confidence: 50.1%
File: edge_4945.bmp        Prediction: bubble       Confidence: 55.4%
File: edge_4833.bmp        Prediction: bubble       Confidence: 60.4%
File: edge_4447.bmp        Prediction: dent         Confidence: 100.0%
File: edge_1102.bmp        Prediction: torn         Confidence: 100.0%
File: edge_4881.bmp        Prediction: bubble       Confidence: 56.3%
File: edge_4911.bmp        Prediction: bubble       Confidence: 56.5%
File: edge_4846.bmp        Prediction: bubble       Confidence: 60.5%
File: edge_719.bmp         Prediction: bubble       Confidence: 34.7%
File: edge_2659.bmp        Prediction: dent         Confidence: 81.2%
File: edge_4874.bmp        Prediction: bubble       Confidence: 82.1%
File: edge_1116.bmp        Prediction: torn         Confidence: 73.9%
File: edge_1164.bmp        Prediction: bubble       Confidence: 55.8%
File: edge_4491.bmp        Prediction: foreignsub   Confidence: 96.9%
File: edge_4922.bmp        Prediction: torn         Confidence: 43.4%
File: edge_4918.bmp        Prediction: bubble       Confidence: 81.9%
File: edge_2667.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_887.bmp         Prediction: torn         Confidence: 78.6%
File: edge_4873.bmp        Prediction: torn         Confidence: 47.1%
File: edge_4938.bmp        Prediction: foreignsub   Confidence: 33.6%
File: edge_4877.bmp        Prediction: bubble       Confidence: 58.3%
File: edge_4870.bmp        Prediction: bubble       Confidence: 60.5%
File: edge_4837.bmp        Prediction: bubble       Confidence: 60.2%
File: edge_1148.bmp        Prediction: bubble       Confidence: 46.6%
File: edge_4683.bmp        Prediction: torn         Confidence: 96.7%
File: edge_756.bmp         Prediction: foreignsub   Confidence: 70.4%
File: edge_4883.bmp        Prediction: bubble       Confidence: 55.5%
File: edge_1140.bmp        Prediction: torn         Confidence: 99.9%
File: edge_973.bmp         Prediction: bubble       Confidence: 100.0%
File: edge_4828.bmp        Prediction: bubble       Confidence: 73.2%
File: edge_831.bmp         Prediction: bubble       Confidence: 84.9%
File: edge_4919.bmp        Prediction: bubble       Confidence: 54.2%
File: edge_1027.bmp        Prediction: dent         Confidence: 45.5%
File: edge_4829.bmp        Prediction: bubble       Confidence: 64.0%
File: edge_4887.bmp        Prediction: bubble       Confidence: 68.3%
File: edge_2642.bmp        Prediction: torn         Confidence: 94.7%
File: edge_4347.bmp        Prediction: bubble       Confidence: 96.8%
File: edge_2758.bmp        Prediction: torn         Confidence: 60.8%
File: edge_4926.bmp        Prediction: torn         Confidence: 49.2%
File: edge_4821.bmp        Prediction: torn         Confidence: 64.0%
File: edge_4830.bmp        Prediction: bubble       Confidence: 60.7%
File: edge_2759.bmp        Prediction: bubble       Confidence: 67.6%
File: edge_4856.bmp        Prediction: bubble       Confidence: 57.1%
File: edge_4954.bmp        Prediction: bubble       Confidence: 56.0%
File: edge_4439.bmp        Prediction: bubble       Confidence: 81.6%
File: edge_4849.bmp        Prediction: bubble       Confidence: 59.6%
File: edge_4915.bmp        Prediction: bubble       Confidence: 62.1%
File: edge_4814.bmp        Prediction: bubble       Confidence: 58.3%
File: edge_4823.bmp        Prediction: bubble       Confidence: 61.3%
File: edge_1500.bmp        Prediction: torn         Confidence: 70.5%
File: edge_4921.bmp        Prediction: bubble       Confidence: 61.9%
File: edge_778.bmp         Prediction: bubble       Confidence: 53.4%
File: edge_4847.bmp        Prediction: bubble       Confidence: 60.3%
File: edge_4818.bmp        Prediction: bubble       Confidence: 68.9%
File: edge_4853.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_4896.bmp        Prediction: bubble       Confidence: 77.8%
File: edge_4948.bmp        Prediction: bubble       Confidence: 51.7%
File: edge_6070.bmp        Prediction: bubble       Confidence: 79.5%
File: edge_808.bmp         Prediction: bubble       Confidence: 62.2%
File: edge_4588.bmp        Prediction: bubble       Confidence: 100.0%
File: edge_4836.bmp        Prediction: bubble       Confidence: 85.6%
File: edge_1028.bmp        Prediction: torn         Confidence: 99.4%
File: edge_4852.bmp        Prediction: bubble       Confidence: 65.8%
File: edge_1179.bmp        Prediction: dent         Confidence: 55.6%
File: edge_4904.bmp        Prediction: bubble       Confidence: 60.3%
File: edge_734.bmp         Prediction: dent         Confidence: 97.7%
File: edge_4826.bmp        Prediction: bubble       Confidence: 64.0%
File: edge_883.bmp         Prediction: bubble       Confidence: 64.6%
File: edge_4910.bmp        Prediction: bubble       Confidence: 85.8%
File: edge_4895.bmp        Prediction: bubble       Confidence: 66.7%
File: edge_4854.bmp        Prediction: bubble       Confidence: 89.2%
File: edge_4869.bmp        Prediction: bubble       Confidence: 70.3%
File: edge_4612.bmp        Prediction: bubble       Confidence: 97.9%'''  # 전체 데이터를 여기에 넣어주세요

# Process each line
for line in data.strip().split('\n'):
    if line:  # 빈 줄 건너뛰기
        try:
            # Extract number from filename using regex and prediction
            number = re.search(r'edge_(\d+)\.bmp', line).group(1)
            prediction = line.split('Prediction:')[1].split('Confidence:')[0].strip()
            
            # Append to lists
            files.append(number)
            predictions.append(prediction)
        except AttributeError:
            print(f"Skipping problematic line: {line}")
            continue

# Create DataFrame
df = pd.DataFrame({
    'number': files,
    'prediction': predictions
})

# Save to CSV without index
df.to_csv('143+28_predictions.csv', index=False)

# Display first few rows without index
print("Data preview:")
print(df.to_string(index=False))

In [None]:
import os
import shutil

# 소스 폴더와 대상 폴더 경로
src_folder = '/home/ec2-user/SageMaker/data/NG'
dst_folder = '/home/ec2-user/SageMaker/data/NG_v2'

# 복사할 파일 리스트
files_to_copy = [
    '4483.bmp',
    '4306.bmp',
    '4383.bmp',
    '3940.bmp',
    '971.bmp',
    '4377.bmp',
    '4322.bmp',
    '4491.bmp',
    '2667.bmp',
    '4683.bmp',
    '2642.bmp',
    '4347.bmp',
    '4439.bmp',
    '4588.bmp',
    '883.bmp',
    '4612.bmp',
    '4362.bmp'
]

# 파일 복사
copied_count = 0
for filename in files_to_copy:
    src_path = os.path.join(src_folder, filename)
    dst_path = os.path.join(dst_folder, filename)
    if os.path.exists(src_path):
        shutil.copy2(src_path, dst_path)
        print(f"복사됨: {filename}")
        copied_count += 1
    else:
        print(f"파일을 찾을 수 없음: {filename}")

print(f"\n복사 완료! 총 {copied_count}개 파일이 복사되었습니다.")

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.semi_supervised import LabelPropagation
from sklearn.metrics.pairwise import rbf_kernel
import cv2
import math
from skimage.feature import graycomatrix, graycoprops
from sklearn.manifold import TSNE

def load_images_from_folder(folder_path):
    """폴더에서 이미지를 로드합니다."""
    images = []
    valid_extensions = ['.bmp', '.jpg', '.jpeg', '.png']
    
    for filename in os.listdir(folder_path):
        if any(filename.lower().endswith(ext) for ext in valid_extensions):
            image_path = os.path.join(folder_path, filename)
            img = cv2.imread(image_path)
            if img is not None:
                images.append({
                    'image': img,
                    'filename': filename
                })
    
    return images

def create_training_data(images, defect_images):
    """학습 데이터를 생성합니다."""
    features = []
    labels = []
    filenames = []
    categories = {'dent': 0, 'torn': 1, 'bubble': 2, 'foreignsub': 3}
    
    # 레이블이 있는 데이터와 없는 데이터 구분
    labeled_files = {d['file'].lower(): d['category'] for d in defect_images}
    
    labeled_count = 0
    for img_data in images:
        features.append(extract_features(img_data['image']))
        filenames.append(img_data['filename'])
        
        # 파일명을 소문자로 변환하여 비교
        if img_data['filename'].lower() in labeled_files:
            labels.append(categories[labeled_files[img_data['filename'].lower()]])
            labeled_count += 1
        else:
            labels.append(-1)
    
    print(f"Found {labeled_count} labeled images out of {len(images)} total images")
    
    if labeled_count == 0:
        raise ValueError("No labeled data found! Please check if the image filenames match with defect_images.")
    
    return np.array(features), np.array(labels), filenames

def extract_features(image):
    """특징 추출 함수"""
    features = []
    
    # 1. 엣지 검출 강화
    edges = cv2.Canny(image, 50, 150)
    edge_density = np.mean(edges) / 255.0
    
    # 2. 로컬 컨트라스트 특징
    local_std = np.std(image.astype(float))
    
    # 3. 텍스처 특징 추출
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
    glcm = graycomatrix(gray, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4])
    contrast = graycoprops(glcm, 'contrast')[0]
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0]
    
    # 4. 블롭 검출
    params = cv2.SimpleBlobDetector_Params()
    params.minThreshold = 10
    params.maxThreshold = 200
    params.filterByArea = True
    params.minArea = 20
    detector = cv2.SimpleBlobDetector_create(params)
    keypoints = detector.detect(gray)
    blob_count = len(keypoints)
    
    # 5. 엣지 방향성 분석
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    edge_angles = np.arctan2(sobely, sobelx)
    angle_hist, _ = np.histogram(edge_angles, bins=8, range=(-np.pi, np.pi))
    
    # 특징 벡터 구성
    features.extend([
        edge_density,
        local_std,
        *contrast,
        *dissimilarity,
        blob_count,
        *angle_hist
    ])
    
    return np.array(features)

def train_semi_supervised(features, labels):
    """반지도 학습 모델"""
    # 원본 feature 크기 저장
    original_size = len(features)
    
    # 데이터 증강
    augmented_features = []
    augmented_labels = []
    for i, (feat, label) in enumerate(zip(features, labels)):
        if label != -1:  # 레이블이 있는 데이터만 증강
            noise = np.random.normal(0, 0.1, feat.shape)
            augmented_features.append(feat + noise)
            augmented_labels.append(label)
    
    # 학습용 데이터 구성
    train_features = np.vstack([features, augmented_features])
    train_labels = np.concatenate([labels, augmented_labels])
    
    # 모델 파라미터 조정
    model = LabelPropagation(
        kernel='rbf',
        gamma=3,
        max_iter=2000,
        tol=1e-6
    )
    
    # 모델 학습
    model.fit(train_features, train_labels)
    
    # 원본 데이터에 대해서만 예측
    predictions = model.predict(features)
    probabilities = model.predict_proba(features)
    
    return predictions, probabilities

def compute_class_weights(labels):
    """클래스 불균형 처리를 위한 가중치 계산"""
    unique_labels = np.unique(labels[labels != -1])
    class_counts = np.array([np.sum(labels == label) for label in unique_labels])
    weights = 1.0 / class_counts
    weights = weights / np.sum(weights) * len(unique_labels)
    return dict(zip(unique_labels, weights))

def visualize_results(images, predictions, probabilities, filenames, original_folder_path='/home/ec2-user/SageMaker/sein/NG_2'):
    """불량품 탐지 결과 시각화"""
    # 카테고리 매핑
    categories = {0: 'dent', 1: 'torn', 2: 'bubble', 3:'foreignsub' }
    
    # 결과를 저장할 리스트
    results = []
    
    # 각 이미지에 대해 결과 시각화
    for idx, (image, pred, prob, filename) in enumerate(zip(images, predictions, probabilities, filenames)):
        if pred != -1:  # 예측된 이미지만 처리
            # 원본 이미지 경로 (edge_ 접두사 제거)
            original_filename = filename.replace('edge_', '')
            original_path = os.path.join(original_folder_path, original_filename)
            
            # 원본 이미지 로드
            original_img = cv2.imread(original_path)
            if original_img is None:
                print(f"Warning: Cannot load original image {original_filename}")
                continue
                
            # 결과 시각화를 위한 figure 생성
            plt.figure(figsize=(12, 6))
            
            # 원본 이미지 표시
            plt.subplot(1, 2, 1)
            plt.imshow(cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB))
            plt.title('Original Image')
            plt.axis('off')
            
            # 엣지 처리된 이미지 표시
            plt.subplot(1, 2, 2)
            plt.imshow(cv2.cvtColor(image['image'], cv2.COLOR_BGR2RGB))
            
            # 예측 결과 및 확률 표시
            category = categories[pred]
            confidence = prob[pred] * 100
            plt.title(f'Detected: {category}\nConfidence: {confidence:.2f}%')
            plt.axis('off')
            
            # 결과 저장
            results.append({
                'filename': original_filename,
                'category': category,
                'confidence': confidence
            })
            
            plt.tight_layout()
            plt.show()
            plt.close()
            
            # 상세 정보 출력
            print(f"\nResults for {original_filename}:")
            print(f"Detected defect: {category}")
            print(f"Confidence: {confidence:.2f}%")
            print("-" * 50)
    
    return results

def main(edge_folder_path, original_folder_path, defect_images):
    """메인 실행 함수"""
    print("Loading images...")
    images = load_images_from_folder(edge_folder_path)
    
    print(f"Loaded {len(images)} images")
    print("\nImage filenames in folder:")
    for img in images[:5]:
        print(f"- {img['filename']}")
    
    print("\nLabeled images we're looking for:")
    for defect in defect_images:
        print(f"- {defect['file']} ({defect['category']})")
    
    print("\nExtracting features...")
    features, labels, filenames = create_training_data(images, defect_images)
    
    print("\nStandardizing features...")
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)
    
    print("Reducing dimensionality...")
    pca = PCA(n_components=min(20, scaled_features.shape[1]))
    reduced_features = pca.fit_transform(scaled_features)
    print(f"Reduced features shape: {reduced_features.shape}")
    
    print("\nTraining semi-supervised model...")
    predictions, probabilities = train_semi_supervised(reduced_features, labels)
    
    print("\nVisualizing results...")
    results = visualize_results(images, predictions, probabilities, filenames, original_folder_path)
    
    return results

if __name__ == "__main__":
    edge_folder_path = '/home/ec2-user/SageMaker/data/edge_NG_143_add_28'
    original_folder_path = '/home/ec2-user/SageMaker/data/NG_v2'
    
    defect_images = [
    # dent 예시 이미지들
    {'file': 'edge_830.bmp', 'category': 'dent'},
    {'file': 'edge_733.bmp', 'category': 'dent'},
    {'file': 'edge_4447.bmp', 'category': 'dent'},
    {'file': 'edge_2659.bmp', 'category': 'dent'},
    {'file': 'edge_725.bmp', 'category': 'dent'},
    {'file': 'edge_2675.bmp', 'category': 'dent'},
    
    # torn 예시 이미지들
    {'file': 'edge_997.bmp', 'category': 'torn'},
    {'file': 'edge_1102.bmp', 'category': 'torn'},
    {'file': 'edge_988.bmp', 'category': 'torn'},
    {'file': 'edge_856.bmp', 'category': 'torn'},
    {'file': 'edge_4987.bmp', 'category': 'torn'},
    {'file': 'edge_2642.bmp', 'category': 'torn'},
    {'file': 'edge_4683.bmp', 'category': 'torn'},
    
    # bubble 예시 이미지들
    {'file': 'edge_4748.bmp', 'category': 'bubble'},
    {'file': 'edge_4597.bmp', 'category': 'bubble'},
    {'file': 'edge_4588.bmp', 'category': 'bubble'},
    {'file': 'edge_4322.bmp', 'category': 'bubble'},
    {'file': 'edge_4377.bmp', 'category': 'bubble'},
    {'file': 'edge_4383.bmp', 'category': 'bubble'},
    {'file': 'edge_4345.bmp', 'category': 'bubble'},
    {'file': 'edge_4343.bmp', 'category': 'bubble'},
    {'file': 'edge_4612.bmp', 'category': 'bubble'},
    {'file': 'edge_4347.bmp', 'category': 'bubble'},
    {'file': 'edge_4362.bmp', 'category': 'bubble'},
    
    # foreignsub 예시 이미지들
    {'file': 'edge_3940.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4306.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4483.bmp', 'category': 'foreignsub'},
    {'file': 'edge_4491.bmp', 'category': 'foreignsub'},
]
    
    results = main(edge_folder_path, original_folder_path, defect_images)