In [1]:
# 기본
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-whitegrid') # sns에 흰색 그리드 유지
import missingno # 결측치 시각화

# KFold (교차 검증을 사용하기 위해)
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

# 교차검증 함수
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate

# 학습 데이터와 검증 데이터로 나누는 함수
from sklearn.model_selection import train_test_split

# 데이터 전처리
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

# 하이퍼 파라미터 튜닝
from sklearn.model_selection import GridSearchCV

# 평가 함수
from sklearn.metrics import accuracy_score

# 머신러닝 알고리즘 - 분류
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier


# 머신러닝 알고리즘 - 회귀
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import  XGBRegressor

# 머신러닝 알고리즘 - 군집
from sklearn.cluster import KMeans
from sklearn.cluster import MeanShift

# 머신러닝 알고리즘 - 차원축소
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# 딥러닝 알고리즘 
from keras.models import Sequential
import tensorflow as tf

from keras.utils import np_utils
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Conv2D
from keras.layers import MaxPooling2D

# 다중분류를 위한 원핫 인코더
from keras.utils import to_categorical

# 학습 자동 중단
from keras.callbacks import EarlyStopping

# 모델 저장
from keras.callbacks import ModelCheckpoint

# 저장된 딥러닝 모델 불러오기
from keras.models import load_model

# 저장
import pickle

# 시간 모듈
import time

# 그래프 설정
# plt.rcParams['font.family'] = 'Malgun Gothic'   # 윈도우용
plt.rcParams['font.family'] = 'AppleGothic'   # 맥용
plt.rcParams['font.size'] = 10                 # 폰트 크기
plt.rcParams['figure.figsize'] = 10,8          # 그래프 크기
plt.rcParams['axes.unicode_minus'] = False     # - 기호 깨짐 방지


# 경고 메시지가 안나오게 하기
import warnings
warnings.filterwarnings('ignore')

In [2]:
# MNIST를 위한..
from keras.datasets import mnist

In [3]:
# gpu 사용 초기화 및 할당
# gpus= tf.config.experimental.list_physical_devices('GPU')
# tf.config.experimental.set_memory_growth(gpus[0], True)

# seed값 설정
np.random.seed(3)
tf.random.set_seed(3)

In [4]:
# 데이터 불러오기
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [5]:
# 결과데이터 원핫 인코딩
y_train = np_utils.to_categorical(y_train)
y_test  = np_utils.to_categorical(y_test)

In [6]:
# 2차원으로 되어 있는 이미지 데이터를 CNN에 적합한 행렬 형태로 변환
X_train = X_train.reshape(X_train.shape[0],28,28,1)
X_test = X_test.reshape(X_test.shape[0],28,28,1)

# 정규화 0~1의 값으로 조정한다.
X_train = X_train.astype('float64')
X_train = X_train / 255
X_test = X_test.astype('float64')
X_test = X_test / 255

In [7]:
# CNN 설정
# Conv2D : 케라스에서 컨볼루션 레이어를 추가하는 함수
# Maxpooling2D : 데이터 축소
# Dropout : 은닉층의 노드를 임의로 꺼주는 것 (과적합 방지)
# Flatten : 2차원 배열을 1차원으로 변환

model = Sequential()

# 입력층 + 은닉층 : Conv2D
model.add(Conv2D(32, kernel_size=(3,3), input_shape=(28,28,1), activation='relu'))
# 은닉층
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=2)) # 데이터 축소
model.add(Dropout(0.25)) 
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
# 출력층
model.add(Dense(10, activation='softmax'))

In [8]:
# 모델 컴파일 
model.compile(loss ='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [9]:
# 모델 저장 콜백
model_path = 'models/MNIST/deep/{epoch} - {val_loss}.hdf5'
callback1 = ModelCheckpoint(filepath=model_path, monitor='accuracy',verbose=1,
                           save_best_only=True)

# 조기중단 콜백
callback2 = EarlyStopping(monitor='val_loss' ,patience=10)

In [10]:
# 모델 학습
history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                   epochs=30, batch_size=200, verbose=0, callbacks=[callback1,callback2])


Epoch 00001: accuracy improved from -inf to 0.91765, saving model to models/MNIST/deep/1 - 0.053644560277462006.hdf5

Epoch 00002: accuracy improved from 0.91765 to 0.97392, saving model to models/MNIST/deep/2 - 0.03966768831014633.hdf5


KeyboardInterrupt: 

In [None]:
# 정확도 확인
best_model = load_model()

best_model.evluate(X_test, y_test)[]

In [None]:
# 학습 데이터 오차
y_loss = history.history['loss']
y_loss

In [None]:
# 검증 데이터 오차
y_vloss = history.history['val_loss']
y_vloss

In [None]:
# 그래프 표현
x_len = np.arange(len(y_loss))

plt.plot(x_len, y_vloss, marker='.', label='테스트 데이터')
plt.plot(x_len, y_loss, marker='.', label='학습 데이터')
plt.legend()
plt.show()