# SEM Image

In [None]:
from semimg import SemImage

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image, ImageFilter
from IPython.display import display
import cv2

from tqdm.notebook import tqdm
from IPython.display import clear_output
from pprint import pprint
import json
import os
import sys
import importlib
import shutil
import warnings

warnings.filterwarnings(action='ignore')

%matplotlib inline

In [None]:
importlib.reload(sys.modules['semimg'])
from semimg import SemImage

## Dataset

In [None]:
posi_mh_main = pd.read_csv('dataset/posi_mh_main.csv', dtype={'SEM_No': object})

## SEM Image

In [None]:
IMAGE_DIR = 'RCore(MH).CSV'
IMG_PATH = f'dataset/image/{IMAGE_DIR}/Main/'

### e.g. Hole

In [None]:
grp_hole = posi_mh_main[posi_mh_main['group'] == 'HOLE']

In [None]:
# Normal image
normal_img = Image.open(f'dataset/image/{IMAGE_DIR}/Main/00441.JPG')
normal_img.resize((500, 500))

In [None]:
# Error image
error_img = Image.open(f'dataset/image/{IMAGE_DIR}/Main/00442.JPG')
error_img.resize((500, 500))

## Group SEM Image Filename with its Type

In [None]:
sem_group = posi_mh_main.groupby('group')

In [None]:
group_list = posi_mh_main['group'].unique().tolist()

In [None]:
data_container = {}

for grp_type in group_list:    
    grp = sem_group.get_group(grp_type)
    sem_img_filename = [
        f"{grp.loc[i, ['SEM_No']].item()}.JPG"
        for i
        in grp.index
    ]
    data_container[grp_type] = sem_img_filename

In [None]:
with open('dataset/data_container.json', 'w') as f:
    json.dump(data_container, f)

In [None]:
with open('dataset/data_container.json') as f:
    sem_img_container = json.load(f)

## Crop

### Test Single Sem Image

In [None]:
sem_no = 276
sem_img = SemImage(sem_no=sem_no)

In [None]:
thresh = 90
kernel_size = (3, 3)
contours = sem_img.get_contour(thresh=thresh, kernel_size=kernel_size)

In [None]:
save = False
sem_img.crop(contours=contours, save=save)
sem_img.plot(bbox=True, figsize=(10, 10))

### Crop for each SEM Image

- DOT 타입에 대해서만 진행

In [None]:
with open('dataset/data_container.json') as f:
    img_matching = json.load(f)
    
dot = img_matching['DOT']

In [None]:
target_sem_no = [int(x.split('.')[0]) for x in dot]
cnt_dot = len(target_sem_no)

In [None]:
for i, sem_no in enumerate(target_sem_no):
    print(f'[{i + 1}/{cnt_dot}] Processing {sem_no:05d}.JPG ...')
    sem = SemImage(sem_no=sem_no)
    
    thresh = 90
    kernel_size = (3, 3)
    morph_type = 'open'
    transform = sem.transform(
        thresh=thresh,
        kernel_size=kernel_size,
        morph_type=morph_type,
    )
    contours = sem.get_contour(transform)
    design_threshold = 0.06
    save = True
    sem.crop(contours=contours, design_threshold=design_threshold, save=save)
    clear_output(wait=True)

## Organize

- Pattern이 큰 SEM 이미지는 Error가 안나기 때문에 모델 학습시 제외시킬 예정

In [None]:
dot_type = posi_mh_main.query('group == "DOT"')

In [None]:
small_pattern = dot_type.query('TARGET <= 150')
big_pattern = dot_type.query('TARGET > 150')

In [None]:
os.mkdir('cropped/small_pattern')
os.mkdir('cropped/big_pattern')

In [None]:
pattern_mapping = {
    'small_pattern': small_pattern['SEM_No'].values,
    'big_pattern': big_pattern['SEM_No'].values,
}
for pattern, pattern_list in pattern_mapping.items():
    for i in pattern_list:
        original = f'cropped/{i}'
        target = f'cropped/{pattern}/{i}'
        shutil.move(original, target)