# 18-2. 신경망 구성 (1) 개요

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

# MNIST 데이터를 로드. 다운로드하지 않았다면 다운로드까지 자동으로 진행됩니다. 
mnist = keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()   

# 모델에 맞게 데이터 가공
x_train_norm, x_test_norm = x_train / 255.0, x_test / 255.0
x_train_reshaped = x_train_norm.reshape(-1, x_train_norm.shape[1]*x_train_norm.shape[2])
x_test_reshaped = x_test_norm.reshape(-1, x_test_norm.shape[1]*x_test_norm.shape[2])

# 딥러닝 모델 구성 - 2 Layer Perceptron
model=keras.models.Sequential()
model.add(keras.layers.Dense(50, activation='sigmoid', input_shape=(784,)))  # 입력층 d=784, 은닉층 레이어 H=50
model.add(keras.layers.Dense(10, activation='softmax'))   # 출력층 레이어 K=10
model.summary()

# 모델 구성과 학습
model.compile(optimizer='adam',
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])
model.fit(x_train_reshaped, y_train, epochs=10)

# 모델 테스트 결과
test_loss, test_accuracy = model.evaluate(x_test_reshaped,y_test, verbose=2)
print("test_loss: {} ".format(test_loss))
print("test_accuracy: {}".format(test_accuracy))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                39250     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                510       
Total params: 39,760
Trainable params: 39,760
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
313/313 - 0s - loss: 0.1041 - accuracy: 0.9672
test_loss: 0.10410819202661514 
test_accuracy: 0.967199981212616


In [2]:
# 입력층 데이터의 모양(shape)
print(x_train_reshaped.shape)

# 테스트를 위해 x_train_reshaped의 앞 5개의 데이터를 가져온다.
X = x_train_reshaped[:5]
print(X.shape)

(60000, 784)
(5, 784)


In [3]:
weight_init_std = 0.1
input_size = 784
hidden_size=50

# 인접 레이어간 관계를 나타내는 파라미터 W를 생성하고 random 초기화
W1 = weight_init_std * np.random.randn(input_size, hidden_size)  
# 바이어스 파라미터 b를 생성하고 Zero로 초기화
b1 = np.zeros(hidden_size)

a1 = np.dot(X, W1) + b1   # 은닉층 출력

print(W1.shape)
print(b1.shape)
print(a1.shape)

(784, 50)
(50,)
(5, 50)


In [4]:
# 첫 번째 데이터의 은닉층 출력을 확인해 봅시다.  50dim의 벡터가 나오나요?
a1[0]

array([-1.0746363 , -0.6030655 ,  0.069531  , -0.17460482,  0.35397097,
        1.54747412, -0.3673675 , -1.03805019, -0.41942205,  1.41660513,
       -0.51756174,  0.5525889 , -1.91118091,  0.43183832, -0.13401952,
        0.55830453, -1.37730792,  1.0045746 , -2.11288315, -0.99751683,
       -1.33082135,  1.33328086,  1.10629416, -0.38479068,  0.01135605,
       -0.19143257, -0.42837175, -0.39390736,  1.13491548,  2.02732874,
       -0.30054547,  0.8722763 ,  0.84078451,  0.13908758, -1.58889646,
        0.71287168,  0.71037166,  0.65748565,  1.34994493,  0.56602528,
       -1.87807223,  0.77636357, -0.75371087,  0.5279212 , -0.06688803,
        2.1646689 ,  1.20026805, -0.41069282,  0.55743981,  0.79085812])

# 18-3. 신경망 구성 (2) 활성화 함수와 손실 함수

In [5]:
a1

array([[-1.0746363 , -0.6030655 ,  0.069531  , -0.17460482,  0.35397097,
         1.54747412, -0.3673675 , -1.03805019, -0.41942205,  1.41660513,
        -0.51756174,  0.5525889 , -1.91118091,  0.43183832, -0.13401952,
         0.55830453, -1.37730792,  1.0045746 , -2.11288315, -0.99751683,
        -1.33082135,  1.33328086,  1.10629416, -0.38479068,  0.01135605,
        -0.19143257, -0.42837175, -0.39390736,  1.13491548,  2.02732874,
        -0.30054547,  0.8722763 ,  0.84078451,  0.13908758, -1.58889646,
         0.71287168,  0.71037166,  0.65748565,  1.34994493,  0.56602528,
        -1.87807223,  0.77636357, -0.75371087,  0.5279212 , -0.06688803,
         2.1646689 ,  1.20026805, -0.41069282,  0.55743981,  0.79085812],
       [-2.15192582, -0.12312737,  0.28485943,  0.40300915,  0.31853434,
         0.8821356 , -1.79179754, -0.53503969, -0.48087875, -0.09891713,
        -0.56095271,  1.55283798, -2.31278424,  0.7172017 ,  0.38088244,
         1.8128909 , -1.68354764,  0.86330211, -0.

In [6]:
# 위 수식의 sigmoid 함수를 구현해 봅니다.
def sigmoid(x):
    return 1 / (1 + np.exp(-x))  


z1 = sigmoid(a1)
print(z1[0])  # sigmoid의 출력은 모든 element가 0에서 1사이

[0.25452239 0.35364267 0.51737575 0.45645936 0.5875802  0.82454862
 0.40917728 0.26152639 0.39665506 0.80480566 0.37342256 0.63473603
 0.12884824 0.60631256 0.46654518 0.63606015 0.20144171 0.73195705
 0.10785094 0.26942992 0.20902354 0.79138281 0.75143758 0.40497196
 0.50283898 0.45228748 0.39451521 0.40277704 0.75674489 0.88363669
 0.42542414 0.70521913 0.69863042 0.53471595 0.16953921 0.67103539
 0.67048328 0.65869535 0.79412062 0.63784552 0.13261046 0.68489585
 0.32001326 0.62899813 0.48328422 0.8970316  0.76857246 0.39874601
 0.63585996 0.68801556]


In [7]:
X

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [8]:
# 단일 레이어 구현 함수
def affine_layer_forward(X, W, b):
    y = np.dot(X, W) + b
    cache = (X, W, b)
    return y, cache

print('go~')

go~


In [9]:
input_size = 784
hidden_size = 50
output_size = 10

W1 = weight_init_std * np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = weight_init_std * np.random.randn(hidden_size, output_size)
b2 = np.zeros(output_size)

a1, cache1 = affine_layer_forward(X, W1, b1)
z1 = sigmoid(a1)
a2, cache2 = affine_layer_forward(z1, W2, b2)    # z1이 다시 두번째 레이어의 입력이 됩니다. 

print(a2[0])  # 최종 출력이 output_size만큼의 벡터가 되었습니다.

[ 0.57019891  0.37766416 -0.10946128  0.1796736   0.00267533 -0.40224146
 -0.13117736  0.24739873 -0.43220359 -0.03416542]


In [12]:
def softmax(x):
    if x.ndim == 2:
        x = x.T
        print(x)
        x = x - np.max(x, axis=0)
        print(x)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        print(y)
        return y.T 

    x = x - np.max(x) # 오버플로 대책
    return np.exp(x) / np.sum(np.exp(x))

In [13]:
y_hat = softmax(a2)
y_hat[0]  # 10개의 숫자 중 하나일 확률이 되었습니다.

[[ 0.57019891  0.44806036  0.54878498  0.65561112  0.61583903]
 [ 0.37766416  0.39590998  0.45813305  0.50039211  0.38428069]
 [-0.10946128  0.12337557 -0.07258199 -0.12586432 -0.01325028]
 [ 0.1796736   0.18846405  0.34156063  0.48174984  0.31974674]
 [ 0.00267533 -0.03109572 -0.15847526 -0.15963382 -0.19016816]
 [-0.40224146 -0.27448957 -0.50067373 -0.56607983 -0.5498717 ]
 [-0.13117736 -0.35503506 -0.17128517 -0.10527203 -0.04511415]
 [ 0.24739873  0.20391317  0.29066747  0.43242692  0.32901033]
 [-0.43220359 -0.36231135 -0.23674472 -0.33558683 -0.30148199]
 [-0.03416542  0.18316589 -0.06958024 -0.20733133 -0.13956886]]
[[ 0.          0.          0.          0.          0.        ]
 [-0.19253475 -0.05215039 -0.09065194 -0.15521901 -0.23155834]
 [-0.67966019 -0.3246848  -0.62136698 -0.78147544 -0.62908931]
 [-0.39052531 -0.25959632 -0.20722436 -0.17386128 -0.29609229]
 [-0.56752358 -0.47915608 -0.70726024 -0.81524494 -0.80600719]
 [-0.97244037 -0.72254993 -1.04945871 -1.22169094 -1.1

array([0.16428772, 0.1355153 , 0.08325924, 0.11117372, 0.09313925,
       0.06212689, 0.08147067, 0.11896379, 0.06029305, 0.08977037])