https://www.kaggle.com/ayushimishra2809/face-mask-detection

<h2> import libraries </h2>

In [11]:
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import cv2
import matplotlib.patches as patches
import tensorflow as tf
from keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, Dropout
from keras.models import Sequential

In [12]:
pip install mtcnn



In [13]:
from mtcnn.mtcnn import MTCNN   # face detection algorithm

<h2> loading datasets </h2>

In [15]:
images=os.path.join("/Medical mask/Medical mask/Medical Mask/images")
annotations=os.path.join("/Medical mask/Medical mask/Medical Mask/annotations")
train=pd.read_csv(os.path.join("train.csv"))
submission=pd.read_csv(os.path.join("submission.csv"))

In [16]:
print(len(train))
train.head()

15412


Unnamed: 0,name,x1,x2,y1,y2,classname
0,2756.png,69,126,294,392,face_with_mask
1,2756.png,505,10,723,283,face_with_mask
2,2756.png,75,252,264,390,mask_colorful
3,2756.png,521,136,711,277,mask_colorful
4,6098.jpg,360,85,728,653,face_no_mask


In [17]:
print(len(submission))
submission.head()

8142


Unnamed: 0,name,x1,x2,y1,y2,classname
0,1800.jpg,,,,,
1,1800.jpg,,,,,
2,1800.jpg,,,,,
3,1799.jpg,,,,,
4,1799.jpg,,,,,


In [18]:
len(os.listdir(images))

FileNotFoundError: ignored

In [19]:
a=os.listdir(images)
b=os.listdir(annotations)
a.sort()
b.sort()
print(len(b),len(a))

FileNotFoundError: ignored

In [None]:
train_images=a[1698:]
test_images=a[:1698]

In [None]:
test_images[0]

In [None]:
img=plt.imread(os.path.join(images,test_images[0]))
plt.imshow(img)
plt.show()

In [None]:
options=['face_with_mask','face_no_mask']
train= train[train['classname'].isin(options)]
train.sort_values('name',axis=0,inplace=True)

In [None]:
bbox=[]
for i in range(len(train)):
    arr=[]
    for j in train.iloc[i][["x1",'x2','y1','y2']]:
        arr.append(j)
    bbox.append(arr)
train["bbox"]=bbox  
def get_boxes(id):
    boxes=[]
    for i in train[train["name"]==str(id)]["bbox"]:
        boxes.append(i)
    return boxes
print(get_boxes(train_images[3]))
image=train_images[3]

img=plt.imread(os.path.join(images,image))

fig,ax = plt.subplots(1)
ax.imshow(img)
boxes=get_boxes(image)
for box in boxes:
    rect = patches.Rectangle((box[0],box[1]),box[2]-box[0],box[3]-box[1],linewidth=2,edgecolor='r',facecolor='none')
    ax.add_patch(rect)
plt.show()

In [None]:
image=train_images[5]

img=plt.imread(os.path.join(images,image))

fig,ax = plt.subplots(1)
ax.imshow(img)
boxes=get_boxes(image)
for box in boxes:
    rect = patches.Rectangle((box[0],box[1]),box[2]-box[0],box[3]-box[1],linewidth=2,edgecolor='r',facecolor='none')
    ax.add_patch(rect)
plt.show()

In [None]:
plt.bar(['face_with_mask','face_no_mask'],train.classname.value_counts())

<h2> creating training data </h2>

In [None]:
img_size=50
data=[]
path='/kaggle/input/face-mask-detection-dataset/Medical mask/Medical mask/Medical Mask/images/'
def create_data():
       for i in range(len(train)):
            arr=[]
            for j in train.iloc[i]: # iloc : 행번호 선택
                   arr.append(j)
            # opencv로 이미지 불러오기
            # IMREAD_GRAYSCALE : 칼라이미지 -> gray 색상으로 해석해 이미지 객체 반환
            img_array=cv2.imread(os.path.join(images,arr[0]),cv2.IMREAD_GRAYSCALE)
            crop_image = img_array[arr[2]:arr[4],arr[1]:arr[3]]
            #이미지 크기 조절 : cv2.resize(원본이미지, 결과이미지크기(너비, 높이),보간법)
            new_img_array=cv2.resize(crop_image,(img_size,img_size))
            data.append([new_img_array,arr[5]])
create_data()

In [None]:
data[0][0]
plt.imshow(data[0][0])  # 이미지 출력

In [None]:
x=[]
y=[]
for features, labels in data:
    x.append(features)
    y.append(labels)
from sklearn.preprocessing import LabelEncoder
# 학습모델이 해당 데이터를 인지할 수 있도록 데이터 수치화하기
lbl=LabelEncoder()  # 라벨인코더 생성
y = lbl.fit_transform(y)   # 수치화하기

In [None]:
# array 재구성
x = np.array(x).reshape(-1,50,50,1)
# 모든 값이 0~1사이에 있도록 정규화
x = tf.keras.utils.normalize(x,axis=1)
# one-hot incoding : 10진 정수 -> 2진 바이너리 형식으로 변경
# (파라미터로 값 크기만큼 0으로 된 배열을 만들고 파라미터 값 위치에만 1(hot)을 넣어줌)
# ex) to_categorial(4);; array([0,0,0,0,1])
from keras.utils import to_categorical
y = to_categorical(y)

<h2> model fitting </h2>

In [None]:
from keras.layers import LSTM
# 선형회귀모델(y = Wx+B) 만들기
# 인공 신경망의 각 층을 순서대로 쌓음
model=Sequential()

# 합성곱 신경망 구성
# conv2D layer : 영상인식에 주로 사용
# conv2D(컨볼루션 필터 수, (컨볼루션 커널 행,열), input_shape, activation, strides)
# filter : 이미지에서 특징(feature)을 분리해내는 기능
# kernel_size : filter의 크기
# input_shape : 샘플 수를 제외한 입력 형태(행,열,채널수(흑백=1))
# activation : 활성화 함수 설정 (relu = rectifier 함수)
# strides : 필터 순회 간격
# pooling : feature map으로부터 값을 샘플링해서 정보 압축 - max : 특정영역에서 가장 큰 값을 샘플링
model.add(Conv2D(100,(3,3),input_shape=x.shape[1:],activation='relu', strides=2))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())

# Dense - 분류
# Dense layer : 입출력 모두 연결, 입출력을 각각 연결해주는 가중치 포함
# 출력 뉴런(노드)수 50, acvivation 활성화 함수 = relu(은닉층)
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.2))
# 출력 노드 수 2, softmax(확률값으로 다양한 클래스 분류-출력층에 사용)
model.add(Dense(2, activation='softmax'))

In [None]:
opt = tf.keras.optimizers.Adam(lr=1e-3, decay=1e-5)
# 만들어진 모델 컴파일
# optimizer : 손실함수 기반으로 네트워크가 어떻게 업데이트 될지 결정
# loss : 손실함수, 입력데이터가 출력데이터와 얼마나 일치하는지 평가
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) 
# 컴파일한 모델 훈련
# 입력데이터와 출력데이터를 인자로 넣고
# epochs : 30번 훈련, batch_size : 작업단위 5개씩 잡아서 훈련
model.fit(x,y,epochs=30,batch_size=5)

In [None]:
detector=MTCNN()
img=plt.imread(os.path.join(images,test_images[0]))
# 이미지에 얼굴인식
face=detector.detect_faces(img)
for face in face:
        bounding_box=face['box']
        x=cv2.rectangle(img,
              (bounding_box[0], bounding_box[1]),
              (bounding_box[0]+bounding_box[2], bounding_box[1] + bounding_box[3]),
              (0,155,255),
              10)
        plt.imshow(x)

In [None]:
img=plt.imread(os.path.join(images,test_images[3]))
face=detector.detect_faces(img)
for face in face:
        bounding_box=face['box']
        x=cv2.rectangle(img,
              (bounding_box[0], bounding_box[1]),
              (bounding_box[0]+bounding_box[2], bounding_box[1] + bounding_box[3]),
              (0,155,255),
              10)
        plt.imshow(x)

In [None]:
detector=MTCNN()
test_df=[]
for image in test_images:
    img=plt.imread(os.path.join(images,image))
    faces=detector.detect_faces(img)
    test=[]
    for face in faces:
        bounding_box=face['box']
        test.append([image,bounding_box])
    test_df.append(test)
test=[]
for i in test_df:
    if len(i)>0:
        if len(i)==1:
            test.append(i[0])
        else:
            for j in i:
                test.append(j)  
sub=[]
rest_image=[]
for i in test:
    sub.append(i[0])
for image in test_images:
    if image not in sub:
        rest_image.append(image) 
detector=MTCNN()
test_df_=[]
for image in rest_image:
    img=cv2.imread(os.path.join(images,image))
    faces=detector.detect_faces(img)
    test_=[]
    for face in faces:
        bounding_box=face['box']
        test_.append([image,bounding_box])
    test_df_.append(test_) 
for i in test_df_:
    if len(i)>0:
        if len(i)==1:
            test.append(i[0])
        else:
            for j in i:
                test.append(j)      

In [None]:
negative=[]
for i in test:
    for j in i[1]:
        if j<0:
            negative.append(i)
    

In [None]:
test_data=[]
def create_test_data():
            for j in test:
                if j not in negative:
                    img=cv2.imread(os.path.join(images,j[0]),cv2.IMREAD_GRAYSCALE)
                    img=img[j[1][1]:j[1][1]+j[1][3],j[1][0]:j[1][0]+j[1][2]]
                    new_img=cv2.resize(img,(50,50))
                    new_img=new_img.reshape(-1,50,50,1)
                    predict=model.predict(new_img)
                    test_data.append([j,predict])

create_test_data()  

In [None]:
image=[]
classname=[]
for i,j in test_data:
    classname.append(np.argmax(j))
    image.append(i)
df=pd.DataFrame(columns=['image','classname'])
df['image']=image
df['classname']=classname
df['classname']=lbl.inverse_transform(df['classname'])
image=[]
x1=[]
x2=[]
y1=[]
y2=[]
for i in df['image']:
    image.append(i[0])
    x1.append(i[1][0])
    x2.append(i[1][1])
    y1.append(i[1][2])
    y2.append(i[1][3])
df['name']=image
df['x1']=x1
df['x2']=x2
df['y1']=y1
df['y2']=y2    
df.drop(['image'],axis=1,inplace=True)

In [None]:
df.sort_values('name',axis=0,inplace=True,ascending=False)
df.to_csv('submission_1.csv')