**기존 Dataset을 이용해 CNN 학습**  
- 정상군 : 1266명
- 코로나19 환자 : 460명

#라이브러리 및 패키지

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import glob
import os
import time
import PIL
import tensorflow as tf

#모델 생성
from tensorflow.keras import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Sequential
from keras.preprocessing.image import array_to_img, img_to_array, load_img, image_dataset_from_directory
from keras.callbacks import ModelCheckpoint, EarlyStopping

#모델저장
import joblib

# 데이터셋 형성 및 이미지 전처리

In [None]:
#구글 드라이브 mount
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 이미지 관련 변수, 모델학습 변수

batch_size = 128
img_height = 128
img_width = 128
seed = 42
epoch = 500

In [None]:
# Dataset 형성

train_path="/content/drive/MyDrive/CodeStates/Section4/project/Before_GAN/train"
test_path="/content/drive/MyDrive/CodeStates/Section4/project/Before_GAN/test"

train_ds = image_dataset_from_directory(
  train_path,
  validation_split=0.2,
  subset="training",
  seed=seed,
  image_size=(img_height, img_width),
  batch_size=batch_size)

val_ds = image_dataset_from_directory(
  train_path,
  validation_split=0.2,
  subset="validation",
  seed=seed,
  image_size=(img_height, img_width),
  batch_size=batch_size)

test_ds =  image_dataset_from_directory(
  test_path,
  seed=seed,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 1726 files belonging to 2 classes.
Using 1381 files for training.
Found 1726 files belonging to 2 classes.
Using 345 files for validation.
Found 433 files belonging to 2 classes.


In [None]:
# Class 확인
class_names = train_ds.class_names
print(class_names)

['COVID19', 'NORMAL']


In [None]:
#Train set에서 class 수 확인
num_images_train_normal = len(os.listdir(os.path.join(train_path,'NORMAL/')))
num_images_train_covid19 = len(os.listdir(os.path.join(train_path,'COVID19/')))

print(f"Normal data 수: {num_images_train_normal}")
print(f"COVID19 data 수: {num_images_train_covid19}")

Normal data 수: 1266
COVID19 data 수: 460


In [None]:
# 이미지 load 가볍게 해줌

AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
# 이미지 정규화
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y))

# CNN 모델 형성

In [None]:
#CNN 모델 형성
model = Sequential()
model.add(Conv2D(32, (3,3), padding='same', activation='relu', input_shape=(img_height, img_width, 3))) #128x128
model.add(MaxPooling2D(2,2))
model.add(Conv2D(64, (3,3), padding='same', activation='relu')) #64x64
model.add(MaxPooling2D(2,2))
model.add(Conv2D(128, (3,3), padding='same', activation='relu')) #32x32
model.add(MaxPooling2D(2,2))
model.add(Conv2D(128, (3,3), padding='same', activation='relu')) #16x16
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 128, 128, 32)      896       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 64, 64, 32)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 64, 64, 64)        18496     
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 32, 32, 64)       0         
 2D)                                                             
                                                                 
 conv2d_6 (Conv2D)           (None, 32, 32, 128)       73856     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 16, 16, 128)     

In [None]:
filename = 'checkpoint-epoch-{}-batch-{}-trial-001.h5'.format(epoch, batch_size)
checkpoint = ModelCheckpoint(filename,             # file명을 지정합니다
                             monitor='val_loss',   # val_loss 값이 개선되었을때 호출됩니다
                             verbose=1,            # 로그를 출력합니다
                             save_best_only=True,  # 가장 best 값만 저장합니다
                             mode='auto'           # auto는 알아서 best를 찾습니다. min/max
                            )

earlystopping = EarlyStopping(monitor='val_loss',  # 모니터 기준 설정 (val loss) 
                              patience=50,         # 50회 Epoch동안 개선되지 않는다면 종료
                             )

In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 모델 훈련

In [None]:
model.fit(train_ds,
          batch_size=batch_size,
          validation_data=val_ds,
          epochs=epoch,
          callbacks=[checkpoint, earlystopping])

Epoch 1/500
Epoch 00001: val_loss improved from inf to 0.36081, saving model to checkpoint-epoch-500-batch-128-trial-001.h5
Epoch 2/500
Epoch 00002: val_loss improved from 0.36081 to 0.12327, saving model to checkpoint-epoch-500-batch-128-trial-001.h5
Epoch 3/500
Epoch 00003: val_loss improved from 0.12327 to 0.09437, saving model to checkpoint-epoch-500-batch-128-trial-001.h5
Epoch 4/500
Epoch 00004: val_loss improved from 0.09437 to 0.03855, saving model to checkpoint-epoch-500-batch-128-trial-001.h5
Epoch 5/500
Epoch 00005: val_loss did not improve from 0.03855
Epoch 6/500
Epoch 00006: val_loss improved from 0.03855 to 0.03031, saving model to checkpoint-epoch-500-batch-128-trial-001.h5
Epoch 7/500
Epoch 00007: val_loss did not improve from 0.03031
Epoch 8/500
Epoch 00008: val_loss improved from 0.03031 to 0.02332, saving model to checkpoint-epoch-500-batch-128-trial-001.h5
Epoch 9/500
Epoch 00009: val_loss did not improve from 0.02332
Epoch 10/500
Epoch 00010: val_loss did not impr

<keras.callbacks.History at 0x7f5d721b0ed0>

In [None]:
model.evaluate(test_ds, verbose=2)

4/4 - 6s - loss: 0.0951 - accuracy: 0.9792 - 6s/epoch - 1s/step


[0.09512540698051453, 0.9792147874832153]

# 모델 저장

In [None]:
save_path = "/content/drive/MyDrive/CodeStates/Section4/project/"
file_name = 'before_GAN.pkl' 
save_path = os.path.join(save_path, file_name)

joblib.dump(model, save_path) 

INFO:tensorflow:Assets written to: ram://a2bbd8e9-7b16-4341-9630-3d4d80e570a4/assets


['/content/drive/MyDrive/CodeStates/Section4/project/before_GAN.pkl']