<a href="https://colab.research.google.com/github/eeseohyun/project/blob/main/AI_05_%EC%9D%B4%EC%84%9C%ED%98%84_Section4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**필요한 패키지 불러오기**

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import os


import tensorflow as tf
from tensorflow import keras
import cv2
from keras_preprocessing.image import ImageDataGenerator

import glob
# xml 파일을 다루기 위한 모듈
from xml.etree import ElementTree

In [None]:
!pip install opencv-python

In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [None]:
dir = '/content/drive/MyDrive/Colab Notebooks/face mask detection'

In [None]:
annotations_dir = os.path.join(dir, 'annotations')
images_dir = os.path.join(dir, 'images')

In [None]:
annotations_files = os.listdir(annotations_dir)
images_files = os.listdir(images_dir)

In [None]:
print('Total annotaions file :', len(annotations_files))
print('Total images file :', len(images_files))

**데이터 추출**

In [None]:
information = {'xmin': [], 'ymin': [], 'xmax': [], 'ymax': [], 'label': [], 'file': [], 'width': [], 'height': []}

for annotation in glob.glob(annotations_dir + '/*.xml'):
    tree = ElementTree.parse(annotation)
    
    for element in tree.iter():
        if 'size' in element.tag:
            for attribute in list(element):
                if 'width' in attribute.tag: 
                    width = int(round(float(attribute.text)))
                if 'height' in attribute.tag:
                    height = int(round(float(attribute.text)))    
        if 'object' in element.tag:
            for attribute in list(element):
                
                if 'name' in attribute.tag:
                    name = attribute.text                 
                    information['label'] += [name]
                    information['width'] += [width]
                    information['height'] += [height] 
                    information['file'] += [annotation.split('/')[-1][0:-4]] 
                            
                if 'bndbox' in attribute.tag:
                    for dimension in list(attribute):
                        if 'xmin' in dimension.tag:
                            xmin = int(round(float(dimension.text)))
                            information['xmin'] += [xmin]
                        if 'ymin' in dimension.tag:
                            ymin = int(round(float(dimension.text)))
                            information['ymin'] += [ymin]                                
                        if 'xmax' in dimension.tag:
                            xmax = int(round(float(dimension.text)))
                            information['xmax'] += [xmax]                                
                        if 'ymax' in dimension.tag:
                            ymax = int(round(float(dimension.text)))
                            information['ymax'] += [ymax]

In [None]:
annotations_info_df = pd.DataFrame(information)
annotations_info_df.head(20)

**Feature Engineering**

In [None]:
# Annotation&Image에 파일명 추가하기(.xml, .png)
annotations_info_df['annotation_file'] = annotations_info_df['file'] + '.xml'
annotations_info_df['image_file'] = annotations_info_df['file'] + '.png'

# "mask_weared_incorrect" 레이블을 "mask_incorrectly_worn" 으로 변경
annotations_info_df.loc[annotations_info_df['label'] == 'mask_weared_incorrect', 'label'] = 'mask_incorrectly_worn'

In [None]:
annotations_info_df.head(20)

**Label이 올바른지 확인**
```ex> Image_001```

In [None]:
# 첫번째 라벨 사진으로 결과 확인해보기
annotations_info_df['image_file'].iloc[0]

In [None]:
# Image 001 File Path
image_001_path = os.path.join(images_dir, annotations_info_df['image_file'].iloc[0])
image_001_path

In [None]:
# Read Image_001
image_001 = cv2.imread(image_001_path)
image_001

In [None]:
# 실제 이미지 구현 함수
def render_image(image):
    plt.figure(figsize = (12, 8))
    plt.imshow(image)
    plt.show()
    
# BGR -> RGB로 변환하는 함수
def convert_to_RGB(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

In [None]:
# 위의 함수 적용
render_image(image_001)
render_image(convert_to_RGB(image_001))

In [None]:
# Shape of Image_001
image_001.shape

In [None]:
# Image_001 Annotation
annotation_001_path = os.path.join(annotations_dir, annotations_info_df['annotation_file'].iloc[0])
annotation_001_path

**이미지 자르기**


한 이미지에 여러 개의 라벨 존재(=한 이미지에서 두 명 이상의 사람이 존재)

따라서, 한 사람으로만 구성된 여러 개의 이미지로 이미지를 잘라내도록 한다. 

경계 상자 내에서 이미지를 자르도록 ```xmin, ymin, xmax, ymax 값```을 정의해줌

In [None]:
# Image_001을 예시로 듬
x = annotations_info_df['xmin'].iloc[0]
y = annotations_info_df['ymin'].iloc[0]
width = annotations_info_df['xmax'].iloc[0]
height = annotations_info_df['ymax'].iloc[0]

cropped_001 = image_001[y:height, x:width]
render_image(cropped_001)

# 함수 적용
render_image(convert_to_RGB(cropped_001))

**자른 이미지에 대한 새로운 디렉토리 생성**

In [None]:
# 자른 이미지 폴더 생성
directory = 'cropped_images'
parent_directory = dir
path = os.path.join(parent_directory, directory)
try:
    os.mkdir(path)
except OSError:
    pass

In [None]:
# 파일 이름 복사(.png 확장자로 추가하기 전)
annotations_info_df['cropped_image_file'] = annotations_info_df['file']
annotations_info_df

In [None]:
for i in range(len(annotations_info_df)):
    # 파일 경로 가져오기 & 이미지 읽기
    image_filepath = os.path.join(images_dir + '/' + annotations_info_df['image_file'].iloc[i])
    image = cv2.imread(image_filepath)
    
    # 잘라낸 이미지 파일 이름 설정 'file-i.png'
    annotations_info_df['cropped_image_file'].iloc[i] = annotations_info_df['cropped_image_file'].iloc[i] + '-' + str(i) + '.png'
    cropped_image_filename = annotations_info_df['cropped_image_file'].iloc[i]
    
    # 이미지를 자르기 위한 xmin, ymin, xmax, ymax 값 설정
    xmin = annotations_info_df['xmin'].iloc[i]
    ymin = annotations_info_df['ymin'].iloc[i]
    xmax = annotations_info_df['xmax'].iloc[i]
    ymax = annotations_info_df['ymax'].iloc[i]

    # 설정한 값으로 이미지 자르기 
    cropped_image = image[ymin : ymax, xmin : xmax]
    
    # 잘라낸 이미지 저장하기 cv2.imwrite
    cropped_image_directory = os.path.join(path, cropped_image_filename) 
    cv2.imwrite(cropped_image_directory, cropped_image)

In [None]:
annotations_info_df

In [None]:
# 잘린 이미지가 저장되었는지 확인
cropped_images_files = os.listdir(path)
cropped_images_files

In [None]:
print('There are {} cropped images in total.'.format(len(cropped_images_files)))

0번 인덱스의 이미지(Image_001)를 다시 가져와서 잘린 이미지를 표시

In [None]:
# Image_001 File Name
annotations_info_df['cropped_image_file'].iloc[0]

In [None]:
# Image_001 File Path
cropped_001_0_path = os.path.join(path, annotations_info_df['cropped_image_file'].iloc[0])
cropped_001_0_path

In [None]:
# Read Image_001
cropped_001_0 = cv2.imread(cropped_001_0_path)
cropped_001_0

In [None]:
# 함수 적용
render_image(convert_to_RGB(cropped_001_0))

In [None]:
# Shape of Cropped Image_001
cropped_001_0.shape

**훈련/테스트 데이터 나누기**
*   테스트 데이터 : 25% 
*   훈련 데이터 : 75%

In [None]:
test_df = annotations_info_df[:800]
train_df = annotations_info_df[800:]


train_df.shape, test_df.shape

In [None]:
train_df.head()

In [None]:
# 클래스 분류
classes = list(train_df['label'].unique())
classes

**Exploratory Data Analysis (EDA)**

*   마스크를 잘 착용하였는가?
*   마스크를 착용하지 않았는가?
*   마스크를 잘못 착용하였는가?

In [None]:
train_df

In [None]:
train_df[train_df['file'] == 'maksssksksss139']['label'].unique()

In [None]:
image_139_path = os.path.join(images_dir, 'maksssksksss139.png')
image_139 = cv2.imread(image_139_path)
image_139

In [None]:
# 함수 적용
image_139_rgb = convert_to_RGB(image_139)
render_image(image_139_rgb)

In [None]:
image_139_df = train_df[train_df['file'] == 'maksssksksss139']
image_139_df

In [None]:
with_mask_list, without_mask_list, incorrectly_worn_list = [], [], []
for i in range(len(image_139_df)):
    bounding_box = [image_139_df['xmin'].iloc[i], image_139_df['ymin'].iloc[i],
                    image_139_df['xmax'].iloc[i], image_139_df['ymax'].iloc[i]]
    if image_139_df['label'].iloc[i] == 'with_mask':
        with_mask_list.append(bounding_box)
    elif image_139_df['label'].iloc[i] == 'without_mask':
        without_mask_list.append(bounding_box)
    else:
        incorrectly_worn_list.append(bounding_box)
        
found_objects_dict = {'With Mask': with_mask_list, 
                      'Without Mask': without_mask_list, 
                      'Incorrectly Worn': incorrectly_worn_list}
found_objects_dict

In [None]:
for key, value in found_objects_dict.items():
    for i in range(len(value)):
        color = (0, 255, 0) # green
        text = 'Mask'
        if key == 'Without Mask':
            color = (255, 0, 0) # red
            text = 'No Mask'
        elif key == 'Incorrectly Worn':
            color = (255, 255, 0) # yellow
            text = 'Incorrect'
        start_point = (value[i][0], value[i][1])
        end_point = (value[i][2], value[i][3])
        cv2.rectangle(image_139_rgb, start_point, end_point, color = color, thickness = 2)
        cv2.putText(image_139_rgb, org = (value[i][0] - 8, value[i][1] - 3), text = text, 
                    fontFace = cv2.FONT_HERSHEY_SIMPLEX, fontScale = 0.5, color = color)

In [None]:
render_image(image_139_rgb)

In [None]:
# 라벨 별 수
train_df['label'].value_counts()

In [None]:
sorted_label_df = pd.DataFrame(train_df['label'].value_counts()).reset_index()
sorted_label_df.rename(columns = {'index': 'label', 'label': 'count'}, inplace = True)
sorted_label_df

In [None]:
# 시각화
plt.style.use('seaborn')
plt.figure(figsize = (8, 6))
barplot = sns.barplot(x = 'count', y = 'label', data = sorted_label_df, orient = 'horizontal', 
                      palette = ['skyblue', 'red', 'yellow'])
plt.title('Distribution of Labels', fontsize = 20, fontweight = 'bold')
plt.xlabel('Count', fontsize = 15, fontweight = 'bold')
plt.ylabel('Label', fontsize = 15, fontweight = 'bold')

for p in barplot.patches:
    width = p.get_width()
    percentage = round(width * 100 / sum(sorted_label_df['count']), 2)
    plt.text(x = width + 15, y = p.get_y() + 0.55 * p.get_height(), s = f'{int(width)}\n({percentage} %)')

plt.show()

In [None]:
cropped_image_path = os.path.join(path, train_df['cropped_image_file'].iloc[0])
cropped_image = cv2.imread(cropped_image_path)
cropped_image.shape

In [None]:
image_width = []
image_height = []
for i in range(len(train_df)):
    cropped_image_path = os.path.join(path, train_df['cropped_image_file'].iloc[i])
    cropped_image = cv2.imread(cropped_image_path)
    image_width.append(cropped_image.shape[0])
    image_height.append(cropped_image.shape[1])

In [None]:
sns.histplot(image_width, kde = True)
plt.title('Image Width Distribution', fontsize = 16, fontweight = 'bold')
plt.xlabel('Image Width', fontweight = 'bold')
plt.ylabel('Count', fontweight = 'bold')
plt.show()

In [None]:
sns.histplot(image_height, kde = True)
plt.title('Image Height Distribution', fontsize = 16, fontweight = 'bold')
plt.xlabel('Image Height', fontweight = 'bold')
plt.ylabel('Count', fontweight = 'bold')
plt.show()

In [None]:
print('IMAGE WIDTH')
print(f'Min: {min(image_width)}')
print(f'Max: {max(image_width)}')
print(f'Mean: {np.mean(image_width)}')
print(f'Median: {np.median(image_width)}')
print('IMAGE HEIGHT')
print(f'Min: {min(image_height)}')
print(f'Max: {max(image_height)}')
print(f'Mean: {np.mean(image_height)}')
print(f'Median: {np.median(image_height)}')

In [None]:
image_target_size = (int(np.median(image_width)), int(np.median(image_height)))
image_target_size

**Image Data Generator**

In [None]:
train_image_generator = ImageDataGenerator(rescale = 1. / 255., validation_split = 0.25)

train_generator = train_image_generator.flow_from_dataframe(
    dataframe = train_df,
    directory = path,
    x_col = 'cropped_image_file',
    y_col = 'label',
    subset = 'training',
    batch_size = 32,
    seed = 42,
    shuffle = True,
    class_mode = 'categorical',
    target_size = image_target_size
)

valid_generator = train_image_generator.flow_from_dataframe(
    dataframe = train_df,
    directory = path,
    x_col = 'cropped_image_file',
    y_col = 'label',
    subset = 'validation',
    batch_size = 32,
    seed = 42,
    shuffle = True,
    class_mode = 'categorical',
    target_size = image_target_size
)

In [None]:
test_image_generator = ImageDataGenerator(rescale = 1. / 255.)

test_generator = train_image_generator.flow_from_dataframe(
    dataframe = test_df,
    directory = path,
    x_col = 'cropped_image_file',
    y_col = 'label',
    batch_size = 32,
    seed = 42,
    shuffle = True,
    class_mode = 'categorical',
    target_size = image_target_size
)

In [None]:
print(train_generator)
print(valid_generator)
print(test_generator)

**Modelling**

In [None]:
input_shape = [int(np.median(image_width)), int(np.median(image_height)), 3]

In [None]:
model = keras.models.Sequential([
    keras.layers.Conv2D(filters = 10, kernel_size = 3, activation = 'relu', 
                        input_shape = input_shape),
    keras.layers.Conv2D(filters = 10, kernel_size = 3, activation = 'relu'),
    keras.layers.MaxPool2D(pool_size = 2, padding = 'valid'),
    keras.layers.Conv2D(filters = 10, kernel_size = 3, activation = 'relu'),
    keras.layers.Conv2D(filters = 10, kernel_size = 3, activation = 'relu'),
    keras.layers.MaxPool2D(pool_size = 2, padding = 'valid'),
    keras.layers.Flatten(),
    keras.layers.Dense(units = len(classes), activation = 'softmax')
])

In [None]:
model.compile(loss = 'categorical_crossentropy',
                optimizer = keras.optimizers.Adam(),
                metrics = ['accuracy', keras.metrics.Recall()])

history = model.fit(train_generator, epochs = 10, steps_per_epoch = len(train_generator), 
                        validation_data = valid_generator, validation_steps = len(valid_generator))

In [None]:
result = pd.DataFrame(history.history)
result

In [None]:
# 시각화
result.plot()