In [18]:
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import tensorflow_addons as tfa
import pickle
import pydot
import graphviz
from PIL import Image
from tensorflow.keras.utils import plot_model
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.layers import Activation, Flatten, Dropout, Dense
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from imutils import paths
from tqdm import tqdm

In [2]:
img_dim = (180, 180, 3)

imgs = []
labels = []

def resizer(img_paths):
  """
  모델에 사용할 수 있도록 이미지 사이즈 조정
  """
  for img_path in tqdm(img_paths):
      try:
        img = cv2.imread(img_path)
        img = cv2.resize(img, (img_dim[1], img_dim[0]))

        label = img_path.split(os.path.sep)[-2]
        labels.append([label])

        imgs.append(img)
      except:
        print(f"error file is {img_path.split('.')[-1]}")

In [4]:
images = '/Users/cmblir/Python/Phill-Detector/Model/Multi-Classification/image/images/'
train_img_paths = sorted(list(paths.list_images(images)))
print(">>> img counts =", len(train_img_paths))

>>> img counts = 21465


In [5]:
resizer(train_img_paths)

 13%|█▎        | 2844/21465 [00:15<01:49, 170.39it/s]

error file is png


100%|██████████| 21465/21465 [01:58<00:00, 180.52it/s]


In [6]:
imgs = np.array(imgs, dtype= 'float32') / 255.0
imgs_test = np.array(imgs, dtype= 'float32') / 255.0
labels = np.array(labels)

In [7]:
mlb = MultiLabelBinarizer()
enc_labels = mlb.fit_transform(labels)

In [8]:
print('>>> classes name =', mlb.classes_)

>>> classes name = ['X선조영제' 'nan' '소화성궤양용제' '정신신경용제' '각성제,흥분제' '간장질환용제'
 '갑상선, 부갑상선호르몬제' '강심제' '건위소화제' '골격근이완제' '구충제' '기생성 피부질환용제' '기타의 말초신경용약'
 '기타의 비뇨생식기관 및 항문용약' '기타의 비타민제' '기타의 소화기관용약' '기타의 순환계용약'
 '기타의 신경계및 감각기관용 의약품' '기타의 알레르기용약' '기타의 외피용약' '기타의 자양강장변질제' '기타의 조제용약'
 '기타의 조직세포의 기능용의약품' '기타의 종양치료제' '기타의 중추신경용약' '기타의 항생물질제제(복합항생물질제제를 포함)'
 '기타의 혈액 및 체액용약' '기타의 호르몬제(항호르몬제를 포함)' '기타의 호흡기관용약' '기타의 화학요법제'
 '난포호르몬제 및 황체호르몬제' '뇌하수체호르몬제' '단백동화스테로이드제' '단백아미노산제제' '당뇨병용제' '동맥경화용제'
 '따로 분류되지 않고 치료를 주목적으로 하지않는 의약품' '따로 분류되지 않는 대사성 의약품' '면역혈청학적 검사용 시약'
 '모발용제(발모, 탈모, 염모, 양모제)' '무기질제제' '백신류' '부신호르몬제' '부정맥용제'
 '비뇨생식기관용제(성병예방제포함)' '비타민 A 및 D제' '비타민 B1제' '비타민 B제(비타민 B1을 제외)'
 '비타민 C 및 P제' '비타민 E 및 K제' '비타민제' '설화제' '소화기관용약' '아편알카로이드계 제제' '안과용제'
 '이뇨제' '이담제' '이비과용제' '자격요법제(비특이성면역억제제를 포함)' '자궁수축제' '자율신경제' '장기제제'
 '저함량 비타민 및 미네랄 제제' '정장제' '제산제' '종합대사성제제' '주로 곰팡이, 원충에 작용하는 것'
 '주로 그람양성, 음성균, 리케치아, 비루스에 작용하는 것' '주로 그람양성, 음성균에 작용하는 것'
 '주로 그람양성균, 리케치아, 비루스에 작용하는 것' '주로 그람양성균에 작용하는 것' '

In [9]:
(X_train, X_test, y_train, y_test) = train_test_split(imgs, enc_labels, test_size = 0.2, random_state=42)

In [10]:
# 모델링
class Classifier:
	def build(width, height, depth, classes):
		model = Sequential()
		input_shape = (height, width, depth)
		
		model.add(Conv2D(32, (3, 3), padding='same', input_shape=input_shape))
		model.add(Activation('relu'))
		model.add(BatchNormalization(axis=-1))
		model.add(MaxPooling2D(pool_size=(3, 3)))
		model.add(Dropout(0.25))
  
		model.add(Conv2D(64, (3, 3), padding='same'))
		model.add(Activation('relu'))
		model.add(BatchNormalization(axis=-1))
	
		model.add(Conv2D(64, (3, 3), padding='same'))
		model.add(Activation('relu'))
		model.add(BatchNormalization(axis=-1))
		model.add(MaxPooling2D(pool_size=(2, 2)))
		model.add(Dropout(0.25))
 
		model.add(Conv2D(128, (3, 3), padding='same'))
		model.add(Activation('relu'))
		model.add(BatchNormalization(axis=-1))
	
		model.add(Conv2D(128, (3, 3), padding='same'))
		model.add(Activation('relu'))
		model.add(BatchNormalization(axis=-1))
		model.add(MaxPooling2D(pool_size=(2, 2)))
		model.add(Dropout(0.25))
        
		model.add(Conv2D(256, (3, 3), padding='same'))
		model.add(Activation('relu'))
		model.add(BatchNormalization(axis=-1))
	
		model.add(Conv2D(256, (3, 3), padding='same'))
		model.add(Activation('relu'))
		model.add(BatchNormalization(axis=-1))
		model.add(MaxPooling2D(pool_size=(2, 2)))
		model.add(Dropout(0.25))

		model.add(Flatten())
		model.add(Dense(2048))
		model.add(Activation('relu'))
		model.add(BatchNormalization())
		model.add(Dropout(0.5))
  
		model.add(Dense(classes))
		model.add(Activation('softmax'))
		return model

In [11]:
# 모델 설정하기
model = Classifier.build(
    width=img_dim[1], height=img_dim[0], depth=img_dim[2],
    classes=len(mlb.classes_)
)

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-10-15 23:30:39.858789: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-10-15 23:30:39.860390: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [16]:
plot_model(model, to_file='model_shapes.png', show_shapes=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [19]:
# 학습 및 성능 측정하기
model.compile(loss = CategoricalCrossentropy(from_logits=False),
optimizer=Adam(learning_rate=1e-3, decay= 1e-3 / 200),
metrics = ['accuracy'])

In [20]:
tqdm_callback = tfa.callbacks.TQDMProgressBar()

In [23]:
# M! GPU 사용여부 확인
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [24]:
history = model.fit(
    X_train, y_train, batch_size = 32,
validation_data = (X_test, y_test),
epochs = 200, verbose = 1,
callbacks = [tqdm_callback]
)

Training:   0%|           0/200 ETA: ?s,  ?epochs/s

Epoch 1/200


0/537           ETA: ?s - 

Epoch 1/200
119/537 [=====>........................] - ETA: 3:12 - loss: 3.6026 - accuracy: 0.1161

In [None]:
fig, loss_ax = plt.subplots()
acc_ax = loss_ax.twinx()

loss_ax.plot(history.history['loss'], 'y', label = 'train loss')
loss_ax.plot(history.history['val_loss'], 'r', label = 'val loss')

acc_ax.plot(history.history['accuracy'], 'b', label = 'train accuracy')
acc_ax.plot(history.history['val_accuracy'], 'g', label = 'valid accuracy')

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuracy')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
# 7 훈련 과정 시각화 (손실)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
test_image_paths = sorted(
    list(
        paths.list_images("/content/drive/Shareddrives/DVC/AI 모델/DVC 이미지/TEST_images/")
    )
)
print(">>> test image path =", test_image_paths)

print(">>> class index =",  mlb.classes_)

for image_path in test_image_paths:
    test_image = cv2.imread(image_path)

    test_image = cv2.resize(
        test_image, (180, 180)
    )
    cv2_imshow(test_image)

    test_image = test_image.astype("float") / 255.0
    test_image = np.expand_dims(test_image, axis=0)

    proba = model.predict(test_image)[0]
    print(
        np.round(proba, 3)
    )
    idx = np.argmax(proba)
    print(">>> predict class =", mlb.classes_[idx])
