In [2]:
!pip install opencv-python
import boto3
import cv2
import numpy as np
from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt



In [3]:
# S3 클라이언트 생성
s3 = boto3.client('s3')

# 버킷 및 폴더 정보
source_bucket = "smwu-cv-data"
source_folder = "data/다이캐스팅/g2/dataset_2023-10-17/NG/5"

In [4]:
def calculate_white_black_ratio(image_bytes):
    # 이미지 메모리로 로드 (Pillow -> NumPy)
    image = np.array(Image.open(BytesIO(image_bytes)))

    # 전체 픽셀 수
    total_pixels = image.size

    # 흰색 픽셀 개수 (255)
    white_pixels = np.sum(image == 255)

    # 검정색 픽셀 개수 (0)
    black_pixels = np.sum(image == 0)

    # 비율 계산
    white_ratio = white_pixels / total_pixels
    black_ratio = black_pixels / total_pixels

    return white_ratio, black_ratio

In [5]:
def process_images_and_calculate_ratios():
    # S3에서 이미지 목록 가져오기
    response = s3.list_objects_v2(Bucket=source_bucket, Prefix=source_folder)
    if 'Contents' not in response:
        print("No files found in the source folder.")
        return

    for obj in response['Contents']:
        key = obj['Key']
        if key.endswith(('jpg', 'jpeg', 'png')):  # 이미지 파일만 처리
            print(f"Processing: {key}")

            # S3에서 이미지 다운로드
            file_obj = s3.get_object(Bucket=source_bucket, Key=key)
            image_bytes = file_obj['Body'].read()

            # 흰색 및 검정색 비율 계산
            white_ratio, black_ratio = calculate_white_black_ratio(image_bytes)
            print(f"White Ratio: {white_ratio:.2%}, Black Ratio: {black_ratio:.2%}")

In [6]:
def calculate_average_ratios():
    white_ratios = []
    black_ratios = []

    # S3에서 이미지 목록 가져오기
    response = s3.list_objects_v2(Bucket=source_bucket, Prefix=source_folder)
    if 'Contents' not in response:
        print("No files found in the source folder.")
        return

    for obj in response['Contents']:
        key = obj['Key']
        if key.endswith(('jpg', 'jpeg', 'png')):  # 이미지 파일만 처리
            print(f"Processing: {key}")

            # S3에서 이미지 다운로드
            file_obj = s3.get_object(Bucket=source_bucket, Key=key)
            image_bytes = file_obj['Body'].read()

            # 흰색 및 검정색 비율 계산
            white_ratio, black_ratio = calculate_white_black_ratio(image_bytes)
            white_ratios.append(white_ratio)
            black_ratios.append(black_ratio)
            print(f"White Ratio: {white_ratio:.2%}, Black Ratio: {black_ratio:.2%}")

    # 평균 계산
    avg_white_ratio = np.mean(white_ratios) if white_ratios else 0
    avg_black_ratio = np.mean(black_ratios) if black_ratios else 0

    print(f"\nAverage White Ratio: {avg_white_ratio:.2%}")
    print(f"Average Black Ratio: {avg_black_ratio:.2%}")


Processing: data/다이캐스팅/g2/dataset_2023-10-17/NG/5/20231013-202550529.png
White Ratio: 12.69%, Black Ratio: 0.05%
Processing: data/다이캐스팅/g2/dataset_2023-10-17/NG/5/20231013-204503291.png
White Ratio: 12.57%, Black Ratio: 0.06%
Processing: data/다이캐스팅/g2/dataset_2023-10-17/NG/5/20231013-210609601.png
White Ratio: 11.22%, Black Ratio: 0.05%
Processing: data/다이캐스팅/g2/dataset_2023-10-17/NG/5/20231013-210630856.png
White Ratio: 12.39%, Black Ratio: 0.05%
Processing: data/다이캐스팅/g2/dataset_2023-10-17/NG/5/20231013-210652009.png
White Ratio: 10.19%, Black Ratio: 0.06%
Processing: data/다이캐스팅/g2/dataset_2023-10-17/NG/5/20231013-210713263.png
White Ratio: 10.79%, Black Ratio: 0.06%
Processing: data/다이캐스팅/g2/dataset_2023-10-17/NG/5/20231013-210734417.png
White Ratio: 12.21%, Black Ratio: 0.04%
Processing: data/다이캐스팅/g2/dataset_2023-10-17/NG/5/20231013-210755671.png
White Ratio: 11.13%, Black Ratio: 0.07%
Processing: data/다이캐스팅/g2/dataset_2023-10-17/NG/5/20231013-210816824.png
White Ratio: 10.95%, Bl