# mnist_cnn
MNIST and Convolutional Neural Network
L1,L2 : conv2d + relu + max_pool
L3 : FC(Fully Connected Layer)

In [14]:
import tensorflow as tf
import numpy as np
import os
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'


In [17]:
mnist = tf.keras.datasets.mnist.load_data()

In [18]:
(x_train, y_train), (x_test, y_test) = mnist
print(x_train,'\n', y_train)

[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 ...

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]] 
 [5 0 4 ... 5 6 8]


In [19]:
nb_class = 10

y_one_hot = tf.one_hot(y_train, nb_class)
print(y_one_hot)

tf.Tensor(
[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]], shape=(60000, 10), dtype=float32)


In [20]:
x_train_img = x_train.reshape(-1, 28, 28, 1)
x_test_img = x_test.reshape(-1, 28, 28, 1)

In [21]:
x_train_img = tf.cast(x_train_img, tf.float32)
x_test_img = tf.cast(x_test_img, tf.float32)

In [22]:
x_train_img.shape, y_train.shape

(TensorShape([60000, 28, 28, 1]), (60000,))

# Layer 1 : conv2d - relu - max_pool

# conv2d
L1 input image shape : (60000, 28, 28, 1)
filter : (3,3,1,32), 필터 32개
strides : (1,1,1,1), padding='SAME'
출력 이미지 : (28+2 - 3)/1 + 1 = 28
(?, 28, 28, 1) --> (?, 28, 28, 32)

# max_pool
input image : (?, 28, 28, 32)
ksize : (1,2,2,1), strides : (1,2,2,1), padding='SAME'
출력 이미지 : (28+1 - 2)/2 + 1 = 14
(?, 28, 28, 32) -->  (?, 14, 14, 32)

In [8]:
w1 = tf.Variable(np.random.random([3, 3, 1, 32]), dtype=tf.float32, name='weight1')

def layer1_conv2d(x):
    output = tf.nn.conv2d(x, filters= w1, strides= [1,1,1,1], padding='SAME')
    return output

def layer1_relu(x):
    output = tf.nn.relu(layer1_conv2d(x))
    return output

def layer1_max_pool(x):
    output = tf.nn.max_pool(layer1_relu(x), ksize=[1,2,2,1], strides=[1,2,2,1], padding= 'SAME')
    return output


# Layer 2 : conv2d - relu - max_pool
# conv2d
L1
input
image
shape: (60000, 14, 14, 32)
filter: (3, 3, 32, 64), 필터 64개
strides: (1, 1, 1, 1), padding='SAME'
출력
(?, 14, 14, 32) --> (?, 14, 14, 64)


# max_pool
input
image: (?, 14, 14, 64)
ksize: (1, 2, 2, 1), strides: (1, 2, 2, 1), padding='SAME'
출력
(?, 14, 14, 64) -->  (?, 7, 7, 64)

# flatten layer
(?, 7, 7, 64) --> (?, 7 * 7 * 64)

In [9]:
w2 = tf.Variable(np.random.random([3, 3, 32, 64]), dtype=tf.float32, name='weight2')

def layer2_conv2d(x):
    output = tf.nn.conv2d(layer1_max_pool(x), filters= w2, strides= [1,1,1,1], padding='SAME')
    return output

def layer2_relu(x):
    output = tf.nn.relu(layer2_conv2d(x))
    return output

def layer2_max_pool(x):
    output = tf.nn.max_pool(layer2_relu(x), ksize=[1,2,2,1], strides=[1,2,2,1], padding= 'SAME')
    return output

def layer2_flat(x):
    output = tf.reshape(layer2_max_pool(x), [-1,7*7*64])
    return output

# layer 3 : fully connected layer, 출력층
(?, 7 * 7 *64) --> (7 * 7 * 64, 10) = (?. 10)

In [10]:
w3 = tf.Variable(np.random.random([7*7*64, 10]), dtype=tf.float32, name='weight3')
b3 = tf.Variable(np.random.random([10]), dtype=tf.float32, name='bias3')

In [11]:
def logits(x):
    output = tf.matmul(layer2_flat(x), w3) + b3
    return output

def hyp(x):
    return tf.nn.softmax(logits(x))

In [12]:
optimizer = tf.optimizers.Adam(learning_rate= 0.01)
training_epoch = 60
batch_size = 256

print('***** Start Learning!!')
for epoch in range(training_epoch):
    avg_cost = 0
    total_batch = int(x_train.shape[0]/batch_size)
    for k in range(total_batch):
        batch_xs = x_train[0 + k*batch_size: 0 + (k+1)*batch_size]
        batch_ys = y_one_hot[0 + k*batch_size: 0 + (k+1)*batch_size]

        batch_xs = batch_xs.reshape(-1,28,28,1)

        # 비용함수
        def cost_func_batch():
            cost_i = tf.nn.softmax_cross_entropy_with_logits(logits = logits(batch_xs), labels= batch_ys)
            cost = tf.reduce_mean(cost_i)
            return cost

        optimizer.minimize(loss=cost_func_batch, var_list=[w1, w2, w3, b3])
        avg_cost += cost_func_batch().numpy() / total_batch
    print('epoch :',epoch + 1,'cost :',avg_cost,'\n', )
print('***** Learning Finished!!')

***** Start Learning!!
epoch : 1 cost : 65410.28257086337 

epoch : 2 cost : 1634.7244681333877 

epoch : 3 cost : 550.7029661520934 

epoch : 4 cost : 256.6352612422066 

epoch : 5 cost : 135.04888909494778 

epoch : 6 cost : 71.25615511185086 

epoch : 7 cost : 38.98600306877725 

epoch : 8 cost : 20.605115870125275 

epoch : 9 cost : 11.732631138247301 

epoch : 10 cost : 7.39674729363531 

epoch : 11 cost : 5.017719980743189 

epoch : 12 cost : 3.655754891598326 

epoch : 13 cost : 3.5618619597875174 

epoch : 14 cost : 2.969856640212556 

epoch : 15 cost : 2.6797691502122793 

epoch : 16 cost : 2.500251399146186 

epoch : 17 cost : 2.3945851743730726 

epoch : 18 cost : 2.3357717012747727 

epoch : 19 cost : 2.3058437638812594 

epoch : 20 cost : 2.2965343507946048 

epoch : 21 cost : 2.2957005663814694 

epoch : 22 cost : 2.295732230202764 

epoch : 23 cost : 2.2957499312539382 

epoch : 24 cost : 2.2957652063451257 

epoch : 25 cost : 2.2957850592768105 

epoch : 26 cost : 2.295

In [14]:
def pred(x):
    return tf.argmax(logits(x),  axis=1)

def accuracy(pred, real_y):
    return pd.DataFrame(pred, real_y).assign(equal = pred == real_y)

In [23]:
pred(x_train_img)

ResourceExhaustedError: {{function_node __wrapped__Conv2D_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[60000,28,28,32] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Conv2D]

In [None]:
pred(x_test_img)

In [None]:
accuracy(pred(x_train_img), tf.argmax(y_train, axis=1))[ 'equal' ].mean()

In [None]:
accuracy(pred(x_test), y_test)[ 'equal' ].mean()

# 실습 과제
mnist_cnn_deep
MNIST and Convolutional Neural Network
L1,L2,L3 : conv2d + relu + max_pool
L4,L5 : FC(Fully Connected Layer)

출력 size : 32(L1) --> 64(L2)-->128(L3) --> 512(L4) --> 10(L5)

In [1]:
import tensorflow as tf
import numpy as np
import os
# os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'


In [2]:
mnist = tf.keras.datasets.mnist.load_data()
(x_train, y_train), (x_test, y_test) = mnist
print(x_train, '\n', y_train)
nb_class = 10

y_one_hot = tf.one_hot(y_train, nb_class)
print(y_one_hot)
x_train_img = x_train.reshape(-1, 28, 28, 1)
x_test_img = x_test.reshape(-1, 28, 28, 1)
x_train_img = tf.cast(x_train_img, tf.float32)
x_test_img = tf.cast(x_test_img, tf.float32)
x_train_img.shape, y_train.shape

[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 ...

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]] 
 [5 0 4 ... 5 6 8]
tf.Tensor(
[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]], shape=(60000, 10), dtype=float32)


(TensorShape([60000, 28, 28, 1]), (60000,))

In [3]:
w1 = tf.Variable(np.random.random([3, 3, 1, 32]), dtype=tf.float32, name='weight1')

def layer1_conv2d(x):
    output = tf.nn.conv2d(x, filters= w1, strides= [1,1,1,1], padding='SAME')
    return output

def layer1_relu(x):
    output = tf.nn.relu(layer1_conv2d(x))
    return output

def layer1_max_pool(x):
    output = tf.nn.max_pool(layer1_relu(x), ksize=[1,2,2,1], strides=[1,2,2,1], padding= 'SAME')
    return output

In [4]:
w2 = tf.Variable(np.random.random([3, 3, 32, 64]), dtype=tf.float32, name='weight2')

def layer2_conv2d(x):
    output = tf.nn.conv2d(layer1_max_pool(x), filters= w2, strides= [1,1,1,1], padding='SAME')
    return output

def layer2_relu(x):
    output = tf.nn.relu(layer2_conv2d(x))
    return output

def layer2_max_pool(x):
    output = tf.nn.max_pool(layer2_relu(x), ksize=[1,2,2,1], strides=[1,2,2,1], padding= 'SAME')
    return output

In [5]:
layer2_max_pool(x_train_img[:5]).shape

TensorShape([5, 7, 7, 64])

In [6]:
w3 = tf.Variable(np.random.random([3, 3, 64, 128]), dtype=tf.float32, name='weight3')

def layer3_conv2d(x):
    output = tf.nn.conv2d(layer2_max_pool(x), filters= w3, strides= [1,1,1,1], padding='SAME')
    return output

def layer3_relu(x):
    output = tf.nn.relu(layer3_conv2d(x))
    return output

def layer3_max_pool(x):
    output = tf.nn.max_pool(layer3_relu(x), ksize=[1,1,1,1], strides=[1,1,1,1], padding= 'SAME')
    return output

def layer3_flat(x):
    output = tf.reshape(layer3_max_pool(x), [-1,7*7*128])
    return output

In [7]:
layer3_max_pool(x_train_img[:5]).shape

TensorShape([5, 7, 7, 128])

In [8]:
w4 = tf.Variable(np.random.random([7*7*128, 512]), dtype=tf.float32, name='weight4')
b4 = tf.Variable(np.random.random([512]), dtype=tf.float32, name='bias4')

def layer4_fully_connect(x):
    output = tf.matmul(layer3_flat(x), w4) + b4
    return output

def layer4_relu(x):
    output = tf.nn.relu(layer4_fully_connect(x))
    return output


In [9]:
layer4_fully_connect(x_train_img[:5]).shape

TensorShape([5, 512])

In [10]:
w5 = tf.Variable(np.random.random([512, 10]), dtype=tf.float32, name='weight5')
b5 = tf.Variable(np.random.random([10]), dtype=tf.float32, name='bias5')

def logits(x):
    output = tf.matmul(layer4_relu(x), w5) + b5
    return output

# def layer5_relu(x):
#     output = tf.nn.relu(layer5_fully_connect(x))
#     return output

In [11]:
logits(x_train_img[:5]).shape

TensorShape([5, 10])

In [23]:
optimizer = tf.optimizers.Adam(learning_rate= 0.01)
training_epoch = 20
batch_size = 256

print('***** Start Learning!!')
for epoch in range(training_epoch):
    avg_cost = 0
    total_batch = int(x_train.shape[0]/batch_size)
    for k in range(total_batch):
        batch_xs = x_train[0 + k*batch_size: 0 + (k+1)*batch_size]
        batch_ys = y_one_hot[0 + k*batch_size: 0 + (k+1)*batch_size]

        batch_xs = batch_xs.reshape(-1,28,28,1)

        # 비용함수
        def cost_func_batch():
            cost_i = tf.nn.softmax_cross_entropy_with_logits(logits = logits(batch_xs), labels= batch_ys)
            cost = tf.reduce_mean(cost_i)
            return cost

        optimizer.minimize(loss=cost_func_batch, var_list=[w1, w2, w3, w4, w5, b4, b5])
        avg_cost += cost_func_batch().numpy() / total_batch
    print('epoch :',epoch + 1,'cost :',avg_cost,'\n', )
print('***** Learning Finished!!')

***** Start Learning!!
epoch : 1 cost : 0.14331184269494215 

epoch : 2 cost : 0.16853582968091604 

epoch : 3 cost : 0.14209339129300708 

epoch : 4 cost : 0.14827416227477752 

epoch : 5 cost : 0.12255807986689946 

epoch : 6 cost : 0.11139280690003632 

epoch : 7 cost : 0.1015262903653595 

epoch : 8 cost : 0.08718080681243227 

epoch : 9 cost : 0.08266444206556191 

epoch : 10 cost : 0.0788774233486535 

epoch : 11 cost : 0.07563783867189135 

epoch : 12 cost : 0.09196425098766625 

epoch : 13 cost : 0.09271291689549252 

epoch : 14 cost : 0.07714217371191892 



KeyboardInterrupt: 

In [24]:
def pred(x):
    return tf.argmax(logits(x), axis=1)


def accuracy(pred, real_y):
    return pd.DataFrame(pred, real_y).assign(equal=pred == real_y)



In [25]:
pred(x_test_img[:500])

<tf.Tensor: shape=(500,), dtype=int64, numpy=
array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 8, 4, 7, 6,
       6, 5, 4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7, 2, 7, 1, 2, 1, 1, 7, 4, 2,
       3, 0, 1, 2, 9, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 9, 3, 7, 1,
       6, 4, 3, 0, 7, 0, 2, 9, 1, 4, 3, 2, 9, 7, 9, 6, 2, 7, 8, 4, 7, 3,
       6, 1, 3, 6, 9, 3, 1, 4, 1, 7, 6, 9, 6, 0, 5, 4, 9, 9, 2, 1, 4, 4,
       8, 7, 3, 9, 7, 4, 4, 4, 9, 2, 5, 4, 7, 6, 7, 4, 0, 5, 3, 5, 6, 6,
       5, 7, 8, 1, 0, 1, 6, 4, 6, 7, 3, 1, 7, 1, 8, 2, 0, 4, 4, 9, 5, 5,
       1, 5, 6, 0, 3, 4, 8, 6, 5, 4, 6, 5, 4, 3, 1, 4, 4, 7, 2, 3, 2, 7,
       1, 8, 1, 8, 1, 8, 5, 0, 3, 9, 2, 5, 0, 1, 1, 1, 0, 9, 0, 3, 1, 6,
       4, 2, 3, 6, 1, 1, 1, 3, 9, 5, 2, 4, 4, 5, 9, 3, 8, 0, 3, 6, 5, 5,
       7, 3, 2, 7, 1, 2, 8, 4, 1, 7, 3, 3, 8, 8, 7, 9, 2, 2, 4, 1, 5, 0,
       8, 7, 1, 3, 0, 6, 4, 2, 4, 1, 9, 5, 7, 1, 2, 8, 2, 6, 8, 5, 7, 7,
       4, 1, 9, 1, 8, 0, 3, 0, 1, 9, 9, 4, 1, 8, 2, 1, 2, 9, 1, 5, 9, 2,
     

In [26]:
accuracy(pred(x_train_img[:500]), (y_train[:500]))[ 'equal' ].mean()

0.956