<img src='images/neuron.jpg' />

$$ f(x) = \frac{1}{1+e^{-x}} $$

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

x = np.arange(-10,10,0.01)
y = sigmoid(x)
plt.plot(x, y)

$$ a_{j}^{(i)} = w_{j1}^{(i)}x_{1} + w_{j2}^{(i)}x_{2} + w_{j3}^{(i)}x_{3} + b_{j}^{(i)} $$

$$ A^{(1)} = \sigma( XW^{(1)} + B^{(1)}) $$
$$ A^{(1)} = (a_{1}^{(1)} a_{2}^{(1)} a_{3}^{(1)}) $$
$$ X = (x_{1} x_{2}) $$
$$ B^{(1)} = (b_{1}^{(1)} b_{2}^{(1)} b_{3}^{(1)}) $$
$$ W^{(1)} = 
\begin{pmatrix}
W_{11}^{(1)} & W_{21}^{(1)} & W_{31}^{(1)} \\
W_{12}^{(1)} & W_{22}^{(1)} & W_{32}^{(1)}
\end{pmatrix}
$$

<a href='https://deep-learning-basic.readthedocs.io/ko/latest/Neural%20Networks%20and%20Deep%20Learning/3.%20Shallow%20Neural%20Network/'>신경망</a>

In [3]:
X = np.array([1.0, 0.5])
W1 = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
B1 = np.array([0.1, 0.2, 0.3])
print(X.shape)
print(W1.shape)
print(B1.shape)
A1 = np.dot(X, W1) + B1 # activation 함수를 통과하지 않은 것을 A로 표현 (잘못)
Z1 = sigmoid(A1) # sigmoid  통과한 것을 Z1이라고 함

(2,)
(2, 3)
(3,)


In [4]:
W2 = np.array([[0.1, 0.4],[0.2, 0.5],[0.3, 0.6]])
B2 = np.array([0.1, 0.2])
print(Z1.shape)
print(W2.shape)
print(B2.shape)
A2 = np.dot(Z1, W2) + B2 # 2개의 레이어를 거침
Z2 = sigmoid(A2)
Z2

(3,)
(3, 2)
(2,)


array([0.62624937, 0.7710107 ])

In [5]:
def identity_function(x): return x # 항등함수

In [6]:
W3 = np.array([[0.1, 0.2],[0.2, 0.4]])
B3 = np.array([0.1, 0.2])
A3 = np.dot(Z2, W3) + B3
Y = identity_function(A3) # , 혹은 Y = A3
Y

array([0.31682708, 0.63365415])

In [7]:
def init_network():
    network = {}
    network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
    network['b1'] = np.array([0.1, 0.2, 0.3])
    network['W2'] = np.array([[0.1, 0.4],[0.2, 0.5],[0.3, 0.6]])
    network['b2'] = np.array([0.1, 0.2])
    network['W3'] = np.array([[0.1, 0.2],[0.2, 0.4]])
    network['b3'] = np.array([0.1, 0.2])
    return network

In [8]:
def forward(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    
    a3 = np.dot(z2, W3) + b3
    y = identity_function(a3) # 나중에는 softmax로 변경
    return y

In [9]:
network = init_network()
x = np.array([1.0, 0.5])
y = forward(network, x)
y

array([0.31682708, 0.63365415])

$$ y_{i} = \frac{exp(a_{i})}{\sum_{i=1}^{n}exp(a_{i})} $$

In [10]:
# 함수 만드시고 데이터를 5개 전달 출력 결과가 더 더해서 1이 나와야 하고
# 가장 큰값이 가장 큰 확률로 나온다.
def softmax(x):
    return np.exp(x)/sum(np.exp(x))

In [11]:
sum(softmax([1,3,4,5,6,7]))

1.0

$$ e^{1000}은 무한대(inf)가 되므로 해결해야 합니다. $$
$$ y_{i} = \frac{Cexp(a_{i})}{C\sum_{i=1}^{n}exp(a_{i})} $$

$$ y_{i} = \frac{exp(a_{i} + logC)}{\sum_{i=1}^{n}exp(a_{i} + logC)} $$

$$ y_{i} = \frac{exp(a_{i} + C)}{\sum_{i=1}^{n}exp(a_{i} + C)} $$

In [12]:
def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c)
    sum_exp_a = sum(exp_a)
    return exp_a/sum_exp_a

In [13]:
sum(softmax(np.array([1,3,4,5,6,7000])))

1.0

In [14]:
for i in softmax(np.array([1,3,4,5,6,7])):
    print(i)

0.0015750147332396142
0.011637872220549801
0.031635016579048846
0.0859928907098291
0.2337529121931931
0.6354062935641396


손글씨 숫자 인식

In [15]:
import sys, os
sys.path.append(os.pardir) # 부모 디렉토리의 파일을 가져올 수 있도록 설정
import numpy as np
from dataset.mnist import load_mnist
from PIL import Image

In [16]:
def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()

In [17]:
(x_train, y_train), (x_test, y_test)= load_mnist(flatten=True, normalize=False)

In [18]:
# mnist 손글씨에서 60000만장은 학습용으로 사용하고
# 10000장은 TEST용으로 사용

In [19]:
x_train.shape

(60000, 784)

In [20]:
y_train.shape

(60000,)

In [21]:
x_test.shape

(10000, 784)

In [22]:
y_test.shape

(10000,)

In [25]:
img = x_train[1]
img = img.reshape(28,28)

In [26]:
img_show(img)

In [27]:
!pip install tensorflow # 안 되면 anaconda prompt

Collecting tensorflow
  Downloading tensorflow-2.14.0-cp310-cp310-win_amd64.whl (2.1 kB)
Collecting tensorflow-intel==2.14.0
  Downloading tensorflow_intel-2.14.0-cp310-cp310-win_amd64.whl (284.1 MB)
     -------------------------------------- 284.1/284.1 MB 7.6 MB/s eta 0:00:00
Collecting flatbuffers>=23.5.26
  Downloading flatbuffers-23.5.26-py2.py3-none-any.whl (26 kB)
Collecting tensorflow-estimator<2.15,>=2.14.0
  Downloading tensorflow_estimator-2.14.0-py2.py3-none-any.whl (440 kB)
     ------------------------------------- 440.7/440.7 kB 26.9 MB/s eta 0:00:00
Collecting google-pasta>=0.1.1
  Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)
     ---------------------------------------- 57.5/57.5 kB 3.0 MB/s eta 0:00:00
Collecting tensorboard<2.15,>=2.14
  Downloading tensorboard-2.14.1-py3-none-any.whl (5.5 MB)
     ---------------------------------------- 5.5/5.5 MB 69.6 MB/s eta 0:00:00
Collecting absl-py>=1.0.0
  Downloading absl_py-2.0.0-py3-none-any.whl (130 kB)
     

In [28]:
import tensorflow as tf

In [29]:
mnist = tf.keras.datasets.mnist

In [38]:
(x_train, y_train), (x_test, y_test)= mnist.load_data()
x_train, x_test = x_train/255.0, x_test/255.0

In [39]:
model = tf.keras.models.Sequential(
    [tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
    ]
)
# model.summary()
model.compile(optimizer='adam', 
            loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1a561381960>

In [40]:
model.summary() # 세 개의 레이어로 네트워크

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 784)               0         
                                                                 
 dense_4 (Dense)             (None, 128)               100480    
                                                                 
 dropout_2 (Dropout)         (None, 128)               0         
                                                                 
 dense_5 (Dense)             (None, 10)                1290      
                                                                 
Total params: 101770 (397.54 KB)
Trainable params: 101770 (397.54 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


<a href='https://softtone-someday.tistory.com/11'>딥러닝 프레임워크 케라스</a>

In [41]:
(x_train, y_train), (x_test, y_test)= mnist.load_data()
x_train, x_test = x_train/255.0, x_test/255.0

model = tf.keras.models.Sequential(
    [tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='sigmoid'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(10, activation='softmax')
    ]
)

model.compile(optimizer='adam', 
            loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1a52667f430>