# 주어진 Crop 영역을 새로운 이미지 파일로 만들기 위한 작업

- crop이 없는 경우 기존 이미지 사용
- crop이 있는 경우 crop image로 대체

- 구글 드라이브 파일 저장 문제로 인해
- crop image 저장하고,
- 파일 특성을 나타내는 DataFrame 생성
    - 경로
    - 이미지 type - raw, crop, cannot read


## data prepration

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import cv2
from google.colab.patches import cv2_imshow

from tqdm import tqdm

import os
import shutil
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
def load_image_path(dir):
    food_middle_list = sorted(os.listdir(dir))

    food_list = [] # 음식이름
    food_path_list = [] # 음식이름 경로

    for food_middle in food_middle_list:
        middle_path = os.path.join(dir,food_middle)
        food_name_list = os.listdir(middle_path)

        for food in food_name_list:
            path = os.path.join(middle_path,food)
            food_path_list.append(path)

            name = path.split('/')[-1]
            food_list.append(name)

    return food_list, food_path_list


In [4]:
data_dir = '/content/drive/MyDrive/project3/data/traindata/raw_image'
food_list, food_path_list = load_image_path(data_dir)
food_path_list = sorted(food_path_list)

print(len(food_list))
print(food_list)
print(food_path_list)

150
['곱창구이', '닭갈비', '더덕구이', '고등어구이', '떡갈비', '갈치구이', '불고기', '삼겹살', '장어구이', '조개구이', '황태구이', '훈제오리', '조기구이', '갈비구이', '떡국_만두국', '계란국', '무국', '미역국', '북엇국', '시래기국', '콩나물국', '육개장', '젓갈', '과메기', '양념치킨', '콩자반', '편육', '피자', '후라이드치킨', '갓김치', '깍두기', '배추김치', '백김치', '부추김치', '무생채', '열무김치', '오이소박이', '나박김치', '총각김치', '파김치', '숙주나물', '고사리나물', '가지볶음', '미역줄기볶음', '시금치나물', '애호박볶음', '경단', '송편', '꿀떡', '만두', '물냉면', '막국수', '라면', '비빔냉면', '수제비', '열무국수', '잔치국수', '짜장면', '짬뽕', '칼국수', '콩국수', '쫄면', '잡채', '도토리묵', '꽈리고추무침', '도라지무침', '콩나물무침', '홍어무침', '회무침', '김밥', '김치볶음밥', '누룽지', '비빔밥', '유부초밥', '잡곡밥', '알밥', '주먹밥', '새우볶음밥'

## Create Boxed Images

In [5]:
import os
import cv2
import concurrent.futures

## Crop영역 정보를 dictionary로
def crop_area_dict(crop_file_path):
    crop_areas = {}
    with open(crop_file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        for line in lines:
            key, value = line.strip().split('=')
            try:
                coordinates = list(map(int, value.split(',')))
                crop_areas[key] = coordinates
            # crop coordinate가 오류가 발생하는 경우 coordinate를 None으로
            except ValueError:
                crop_areas[key] = None
    return crop_areas

# 이미지 처리 - Crop coordinate를 통해 image crop 후
def process_image(filename, raw_image_path, crop_image_path, crop_areas):
    file_name_no_ext = os.path.splitext(filename)[0]
    image_path = os.path.join(raw_image_path, filename)
    image = cv2.imread(image_path)

    # 이미지 불러오기 실패하는 경우 cannot read
    if image is None:
        return {'file_path': image_path, 'type': 'cannot read'}

    # Crop coordinate를 통해 image crop 후 저장
    coordinates = crop_areas.get(file_name_no_ext)
    if coordinates:
        x, y, w, h = coordinates
        crop_img = image[y:y+h, x:x+w]
        output_crop_image_path = os.path.join(crop_image_path, f"{file_name_no_ext}_crop.jpg")
        cv2.imwrite(output_crop_image_path, crop_img)
        return {'file_path': output_crop_image_path, 'type': 'crop'}

    # crop coordinate 정보가 없는 경우 raw 데이터 사용
    else:
        return {'file_path': image_path, 'type': 'raw'}

# 위 두 함수를 결합해서 사용
def make_new_image(raw_image_path):
    crop_file_name = 'crop_area.properties'
    crop_file_path = os.path.join(raw_image_path, crop_file_name)

    crop_image_path = raw_image_path.replace('raw_image', 'crop_image')
    os.makedirs(crop_image_path, exist_ok=True)

    crop_areas = crop_area_dict(crop_file_path)

    img_extensions = ['.jpg', '.jpeg', '.JPG', '.JPEG', '.png', '.PNG', '.bmp', '.BMP']
    file_type = []

    # 병렬연산 처리
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [
            executor.submit(process_image, filename, raw_image_path, crop_image_path, crop_areas)
            for filename in os.listdir(raw_image_path)
            if any(filename.lower().endswith(ext) for ext in img_extensions)
        ]
        for future in concurrent.futures.as_completed(futures):
            file_type.append(future.result())

    return file_type



In [None]:
import time

for i in range(15):
    start_index = 0 + 15 * i
    end_index = 15 + 15 * i
    subset_food_path = food_path_list[start_index:end_index]
    for food_path in tqdm(subset_food_path):
        file_types = make_new_image(food_path)
        file_type = pd.DataFrame(file_types)
        food_name = food_path.split('/')[-1]
        file_type.to_csv(f'/content/drive/MyDrive/project3/data/traindata/read_file/{food_name}.csv', index=False)
        time.sleep(30)
    time.sleep(250)

100%|██████████| 15/15 [15:56<00:00, 63.77s/it]
100%|██████████| 15/15 [14:30<00:00, 58.02s/it]
100%|██████████| 15/15 [14:07<00:00, 56.52s/it]
100%|██████████| 15/15 [15:25<00:00, 61.73s/it]
100%|██████████| 15/15 [15:23<00:00, 61.56s/it]
100%|██████████| 15/15 [16:07<00:00, 64.53s/it]
100%|██████████| 15/15 [15:09<00:00, 60.64s/it]
100%|██████████| 15/15 [15:00<00:00, 60.04s/it]
100%|██████████| 15/15 [15:10<00:00, 60.69s/it]
100%|██████████| 15/15 [15:17<00:00, 61.17s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]


In [19]:
## 한과 / 약식에러 발생
# 약식 폴더있는 properties file이
# Img_145 즉 한과로 표현됨

# 약식에 있는 crop들의 key값을 Img_145에서 Img_144로 변경
yak_crop = crop_area_dict('/content/drive/MyDrive/project3/data/traindata/raw_image/한과/약식/crop_area.properties')

yak_crop_updated = {}

for key, value in yak_crop.items():
    if key.startswith('Img_145_'):
        new_key = key.replace('Img_145_', 'Img_144_', 1)
        yak_crop_updated[new_key] = value
    else:
        yak_crop_updated[key] = value


yak_crop_updated

{'Img_144_0000': [95, 519, 433, 297],
 'Img_144_0001': [19, 260, 449, 478],
 'Img_144_0006': [0, 157, 245, 281],
 'Img_144_0014': [8, 282, 617, 595],
 'Img_144_0023': [25, 505, 606, 487],
 'Img_144_0024': [135, 159, 461, 362],
 'Img_144_0034': [0, 28, 193, 272],
 'Img_144_0048': [33, 300, 561, 561],
 'Img_144_0052': [103, 65, 580, 392],
 'Img_144_0058': [0, 98, 600, 643],
 'Img_144_0060': [307, 96, 412, 392],
 'Img_144_0065': [0, 161, 242, 259],
 'Img_144_0070': [297, 342, 803, 392],
 'Img_144_0073': [4, 151, 217, 149],
 'Img_144_0074': [0, 71, 640, 569],
 'Img_144_0079': [0, 214, 651, 253],
 'Img_144_0080': [22, 23, 312, 298],
 'Img_144_0088': [474, 0, 606, 653],
 'Img_144_0090': [0, 1093, 785, 568],
 'Img_144_0093': [144, 178, 387, 243],
 'Img_144_0102': [0, 265, 583, 448],
 'Img_144_0106': [267, 142, 154, 141],
 'Img_144_0109': [67, 16, 343, 327],
 'Img_144_0119': [47, 67, 370, 394],
 'Img_144_0120': [57, 147, 324, 270],
 'Img_144_0129': [74, 182, 323, 355],
 'Img_144_0133': [52, 79

In [23]:
# 약식만 수동으로 다시 돌린다.

raw_image_path = '/content/drive/MyDrive/project3/data/traindata/raw_image/한과/약식/'
crop_image_path = raw_image_path.replace('raw_image', 'crop_image')
os.makedirs(crop_image_path, exist_ok=True)

crop_areas = yak_crop_updated

img_extensions = ['.jpg', '.jpeg', '.JPG', '.JPEG', '.png', '.PNG', '.bmp', '.BMP']
yak_type = []

for filename in os.listdir(raw_image_path):
    if os.path.splitext(filename)[1] in img_extensions:
        file_name_no_ext = os.path.splitext(filename)[0]
        image_path = os.path.join(raw_image_path, filename)
        image = cv2.imread(image_path)

        # Check if the image was read correctly
        if image is None:
            yak_type.append({'file_path': image_path, 'type': 'cannot read'})
            continue

        # Get crop coordinates
        coordinates = crop_areas.get(file_name_no_ext)
        if coordinates:
            x, y, w, h = coordinates
            crop_img = image[y:y+h, x:x+w]
            output_crop_image_path = os.path.join(crop_image_path, f"{file_name_no_ext}_crop.jpg")
            cv2.imwrite(output_crop_image_path, crop_img)
            yak_type.append({'file_path': output_crop_image_path, 'type': 'crop'})
        else:
            # Use raw image if no crop coordinates are available
            yak_type.append({'file_path': image_path, 'type': 'raw'})


yak_type = pd.DataFrame(yak_type)
yak_type.to_csv(f'/content/drive/MyDrive/project3/data/traindata/read_file/약식.csv', index=False)