In [1]:
#2021.07.01. THUR 
#Hankyeong

#00. 패키지 호출
import warnings
import numpy as np 
import tensorflow as tf 
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img 
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

#00-1. warning message ignore
warnings.filterwarnings(action='ignore')


In [2]:
#01. 데이터셋 불러오기 및 전처리하기.  
#(1) train generator 객체 만들고 스케일링 처리하기.   
train_data_gen = ImageDataGenerator(rescale=1/255.)

#(2) train 데이터셋 불러오기. 
train_generator = train_data_gen.flow_from_directory(
    '../../data/hard_handwriting_shape/train',
    target_size=(24,24),
    batch_size=3,
    class_mode='categorical'
)

Found 45 images belonging to 3 classes.


In [3]:
#(3) generator에 들어간 파일 확인하기. 
train_generator.filenames[:5]

['circle\\circle001.png',
 'circle\\circle002.png',
 'circle\\circle003.png',
 'circle\\circle004.png',
 'circle\\circle005.png']

In [4]:
#(4) 들어간 파일의 라벨 확인하기. 
train_generator.labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2])

In [5]:
#(5) test generator 객체 만들고 스케일링 처리하기.   
test_data_gen = ImageDataGenerator(rescale=1/255.)

#(6) test 데이터셋 불러오기. 
test_generator = test_data_gen.flow_from_directory(
    '../../data/hard_handwriting_shape/test',
    target_size=(24,24),
    batch_size=3,
    class_mode='categorical'
)

Found 15 images belonging to 3 classes.


In [6]:
#(7) generator에 입력된 파일 확인하기. 
test_generator.filenames[:5]

['circle\\circle021.png',
 'circle\\circle022.png',
 'circle\\circle023.png',
 'circle\\circle024.png',
 'circle\\circle025.png']

In [7]:
#(8) 입력된 파일의 라벨 확인하기. 
test_generator.labels

array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2])

In [8]:
#04. CNN 모델 정의, 컴파일, 학습, 예측 및 평가하기. 
#(1) 모델 정의하기. 
model = Sequential([
    Conv2D(32, kernel_size=(3,3), input_shape=(24,24,3), activation='relu'),
    Conv2D(64, kernel_size=(3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(rate=0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(rate=0.5),
    Dense(3, activation='softmax')
])

#(2) 모델의 요약 정보 확인하기. 
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 22, 22, 32)        896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 20, 64)        18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 10, 10, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 10, 10, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 6400)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               819328    
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0

In [10]:
#(3) 모델의 컴파일 설정하기. 
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics='accuracy'
)

#(4) 모델의 checkpointer 객체 할당하기. 
MODEL_PATH = '../../model/hard_polygon_best_CNN_model.h5'
checkpointer = ModelCheckpoint(
    filepath=MODEL_PATH, 
    monitor='val_loss',
    save_best_only=True,
    verbose=0
)

#(5) 모델의 ealrystopping 객체 할당하기. 
earlystopping = EarlyStopping(patience=1000)

#(6) 모델 학습하기. 
model.fit_generator(
    train_generator,
    steps_per_epoch=15,
    epochs=150000,
    validation_data=test_generator,
    validation_steps=5,
    verbose=0,
    callbacks=[checkpointer,earlystopping]
)

#(9) best 모델 할당하기. 
best_model = load_model(MODEL_PATH)

#(8) 모델 평가하기. 
best_model.evaluate(test_generator, steps=5)



[8.369754791259766, 0.3333333432674408]

In [11]:
#05. train 데이터셋 부풀리기(Data Augmentation)
#(1) train generator 객체에 여러 설정 부여하기. 
train_data_gen_2 = ImageDataGenerator(
    rescale=1/255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.5,
    zoom_range=[0.8,2.0],
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

#(2) train 데이터셋 입력하기. 
train_generator_2 = train_data_gen_2.flow_from_directory(
    '../../data/hard_handwriting_shape/train',
    target_size=(24,24),
    batch_size=3,
    class_mode='categorical'
)

Found 45 images belonging to 3 classes.


In [12]:
#06. CNN 모델 정의, 컴파일, 학습, 예측 및 평가하기. 
#(1) 모델 정의하기. 
model_2 = Sequential([
    Conv2D(32, kernel_size=(3,3), input_shape=(24,24,3), activation='relu'),
    Conv2D(64, kernel_size=(3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(rate=0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(rate=0.5),
    Dense(3, activation='softmax')
])

#(2) 모델의 요약 정보 확인하기. 
model_2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 22, 22, 32)        896       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 20, 20, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 10, 10, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 10, 10, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 6400)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               819328    
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)              

In [14]:
#(3) 모델의 컴파일 설정하기. 
model_2.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics='accuracy'
)

#(4) 모델의 checkpointer 객체 할당하기. 
MODEL_2_PATH = '../../model/hard_polygon_best_CNN_model_2.h5'
checkpointer = ModelCheckpoint(
    filepath=MODEL_2_PATH, 
    monitor='val_loss',
    save_best_only=True,
    verbose=0
)

#(5) 모델의 ealrystopping 객체 할당하기. 
earlystopping = EarlyStopping(patience=7500)

#(6) 모델 학습하기. 
model_2.fit_generator(
    train_generator_2,
    steps_per_epoch=15,
    epochs=150000,
    validation_data=test_generator,
    validation_steps=5,
    verbose=0,
    callbacks=[checkpointer, earlystopping]
)

#(9) best 모델 할당하기. 
best_model_2 = load_model(MODEL_2_PATH)

#(8) 모델 평가하기. 
best_model_2.evaluate(test_generator, steps=5)



[3.8434176445007324, 0.800000011920929]