In [1]:
# mnist_cnn
# MNIST and Convolutional Neural Network
# L1,L2 : conv2d + relu + max_pool 
# L3 : FC(Fully Connected Layer)

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
tf.random.set_seed(5)
tf.__version__

'2.17.0'

In [2]:
# MNSIT 데이터 가져오기
mnist = tf.keras.datasets.mnist 
(x_train,y_train),(x_test,y_test) = mnist.load_data()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [3]:
# one-hot 인코딩
nb_classes = 10  # 분류 class의 갯수(0~9)

Y_one_hot = tf.one_hot(y_train,nb_classes)   
print(Y_one_hot.shape)                     # (60000, 10)

(60000, 10)


In [4]:
# X값의 타입을 float형으로 변환
x_train = tf.cast(x_train,dtype=tf.float32)
print(x_train.shape,x_train.dtype)

x_test = tf.cast(x_test,dtype=tf.float32)
print(x_test.shape,x_test.dtype)

(60000, 28, 28) <dtype: 'float32'>
(10000, 28, 28) <dtype: 'float32'>


In [5]:
# X값의 shape을 4차원으로 변환 : (N,H,W,C)
X_img = tf.reshape(x_train,[-1,28,28,1])
print(X_img.shape)  # (60000, 28, 28, 1)

(60000, 28, 28, 1)


In [6]:
# Layer 1 : conv2d - relu - max_pool
# (?, 28, 28, 1) --> (?, 14, 14, 32)

# <1> conv2d
# L1 input image shape : (?, 28, 28, 1)
# filter : (3,3,1,32), 필터 32개
# strides : (1,1,1,1), padding='SAME'
# 출력 이미지 : (28+2 - 3)/1 + 1 = 28
# (?, 28, 28, 1) --> (?, 28, 28, 32)
W1 = tf.Variable(tf.random.normal([3,3,1,32]), name='weight1')

def L1_conv2d(X):
    return tf.nn.conv2d(X,W1,strides=[1,1,1,1],padding='SAME')

# <2> relu
def L1_relu(X):
    return tf.nn.relu(L1_conv2d(X))     # shape 변화가 없다

# <3> max_pool
# input image : (?, 28, 28, 32)
# ksize : (1,2,2,1), strides : (1,2,2,1), padding='SAME'
# 출력 이미지 : (28+1 - 2)/2 + 1 = 14
# (?, 28, 28, 32) -->  (?, 14, 14, 32)
def L1_MaxPool(X):
    return tf.nn.max_pool(L1_relu(X),ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

In [7]:
# L1_MaxPool(X_img[:1])  # 출력 (?, 14, 14, 32)

In [11]:
# Layer 2 : conv2d - relu - max_pool
#  (?, 14, 14, 32) --> (?, 14, 14, 32)

# <1> conv2d
# L1 input image shape :  (?, 14, 14, 32)
# filter : (3,3,1,64), 필터 64개
# strides : (1,1,1,1), padding='SAME'
# 출력 이미지 : (14+2 - 3)/1 + 1 = 14
# (?, 14, 14, 32) --> (?, 14, 14, 64)
W2 = tf.Variable(tf.random.normal([3,3,1,64]), name='weight2')

def L2_conv2d(X):
    return tf.nn.conv2d(L1_MaxPool(X),W2,strides=[1,1,1,1],padding='SAME')

# <2> relu
def L2_relu(X):
    return tf.nn.relu(L2_conv2d(X))     # shape 변화가 없다

# <3> max_pool
# input image : (?, 14, 14, 64)
# ksize : (1,2,2,1), strides : (1,2,2,1), padding='SAME'
# 출력 이미지 : (14+1 - 2)/2 + 1 = 7
# (?, 14, 14, 64) -->  (?, 7, 7, 64)
def L2_MaxPool(X):
    return tf.nn.max_pool(L2_relu(X),ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

# <4> flatten layer : : 다차원 배열을 2차원으로 변환하여 FC layer에 전달한다
def L2_flat(X):
    return tf.reshape(L2_MaxPool(X),[-1,7*7*64])

In [19]:
# L2_MaxPool(X_img[:1])  # 출력 (?, 7, 7, 64)
# L2_flat(X_img[:1])  # 출력 (?, 7*7*64)  = (?, 3136)

<tf.Tensor: shape=(1, 3136), dtype=float32, numpy=array([[0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>

In [20]:
# Layer 3 : FC (Fully Connected Layer)
# (?, 7*7*64) * (7*67*64,10) = (?,10)
W3 = tf.Variable(tf.random.normal([7*7*64,nb_classes]),name='weight3')
b = tf.Variable(tf.random.normal([nb_classes]),name='bias')    

In [22]:
# 예측 함수(hypothesis) : H(X) = softmax(X*W + b)
def logits(X):
    return tf.matmul(L2_flat(X),W3) + b
    
def hypothesis(X):
    return tf.nn.softmax(logits(X))  

In [23]:
# 비용 함수  : tf.nn.softmax_cross_entropy_with_logits() 함수 사용
# def cost_func():
#     cost_i = tf.nn.softmax_cross_entropy_with_logits(logits = logits(X_img),
#                                              labels = Y_one_hot)
#     cost =  tf.reduce_mean(cost_i)
#     return cost

In [None]:
# batch 사이즈로 나누어 학습, 효율적 이며 학습 시간 단축

training_epoch = 15  # 50
batch_size = 600     # 32

# 경사 하강법
# learning_rate(학습율)을 0.01 로 설정하여 optimizer객체를 생성
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.01)

# 학습 시작  : mini batch 학습
print('****** Start Learning!!')

for epoch in range(training_epoch):

    toal_batch = x_train.shape[0]/batch_size  # 60000/600=100
    for k in range(toal_batch):
        batch_xs = x_train[0 + k*batch_size:toal_batch  + k*batch_size]  # 600개의 X 데이터
        batch_ys = Y_one_hot[0 + k*batch_size:toal_batch  + k*batch_size]  # 600개의 Y 데이터
        
        

In [28]:
x_train.shape

TensorShape([60000, 28, 28])

In [24]:
Y_one_hot.shape

TensorShape([60000, 10])