# XOR

<img src="https://i.imgur.com/cQFdJq7.jpg" width="100%">

In [0]:
# xor.ipynb
import math
import numpy as np

def Sigmoid(x):
    return 1/(1+np.exp(-x))

x = np.array([[0,0],                            # (4,2)
              [0,1],
              [1,0],
              [1,1]])

w1 = np.array([[-2, 5, 4],                      # (2,3)
               [ 3, 6, 3]])

b1 = np.array([2, -2, -5])                      # (1,3)

w2 = np.array([[-4],                            # (3,1)
               [ 8],
               [-8]])

h = Sigmoid(np.dot(x,w1)+b1)                    # (4,2)*(2,3)+(1,3)=(4,3)
y = Sigmoid(np.dot(h,w2))                       # (4,3)*(3,1)=(4,1)
print (y)

[[0.06766597]
 [0.94927397]
 [0.96979454]
 [0.0542867 ]]


# Backpropagation

<img src="https://i.imgur.com/CEQ6mqx.jpg" width="100%">

In [0]:
# xor1.ipynb
import numpy as np
def Sigmoid(x):
    return 1/(1+np.exp(-x))

lamda = 1
x=np.array([[0,0],[0,1],[1,0],[1,1]])
t=np.array([[0],[1],[1],[0]])

w1=2*np.random.rand(2,3)-1          # -1 ~ 1
b1=2*np.random.rand(1,3)-1          # -1 ~ 1
w2=2*np.random.rand(3,1)-1          # -1 ~ 1

w1, b1, w2

(array([[-0.95502897, -0.10115515, -0.4109373 ],
        [-0.0604845 , -0.89544419,  0.47100726]]),
 array([[-0.51730176, -0.15551825,  0.3934197 ]]),
 array([[ 0.64137807],
        [-0.33651734],
        [-0.46654634]]))

In [0]:
for i in range(0,1000):
    h=Sigmoid(np.dot(x,w1)+b1)
    y=Sigmoid(np.dot(h,w2)) # dot(): matrix multiplication.
    deltaY= np.multiply(y-t,np.multiply(y,(1-y))) # multiply(): Multiply arguments element-wise.
    temp = np.multiply(w2.transpose(),np.multiply(h,(1-h)))
    deltaH = deltaY * temp
    w2=w2-np.dot(h.transpose(),lamda*deltaY)
    w1=w1-np.dot(x.transpose(),lamda*deltaH)
    b1=b1-lamda*deltaH
print (y)

[[0.01607888]
 [0.9819435 ]
 [0.98196683]
 [0.01780796]]


In [0]:
# XOR_NN_TF2.ipynb

import tensorflow as tf
import numpy as np

In [0]:
x = np.array([[0,0],[0,1],[1,0],[1,1]]).astype('float32')
y = np.array([[0],[1],[1],[0]]).astype('float32')
x, y

(array([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]], dtype=float32), array([[0.],
        [1.],
        [1.],
        [0.]], dtype=float32))

<img src="https://i.imgur.com/25JnIFQ.jpg" width="100%">

In [0]:
from tensorflow.keras import layers
model = tf.keras.Sequential()
model.add(layers.Dense(3, activation='sigmoid', input_dim=2))
model.add(layers.Dense(1, activation='sigmoid'))
sgd = tf.keras.optimizers.SGD(learning_rate=0.1)
model.compile(optimizer=sgd,loss='binary_crossentropy',metrics=['accuracy'])

In [0]:
model.fit(x, y, epochs=10000, batch_size=4, verbose=0)  # default batch_size: 32
model.evaluate(x, y)



[0.013456800021231174, 1.0]

In [0]:
predicted = model.predict(x)
predicted

array([[0.00349279],
       [0.9827073 ],
       [0.9880476 ],
       [0.02064393]], dtype=float32)

# ReLU

* sigmoid 함수에 비해 계산량이 작다.
* ReLU의 음숫값이 0으로 죽는 현상을 개선하여 ReakyReLU 개발

In [0]:
# XOR_DNN_TF2.ipynb

import tensorflow as tf
import numpy as np

In [4]:
x = np.array([[0,0],[0,1],[1,0],[1,1]]).astype('float32')
y = np.array([[0],[1],[1],[0]]).astype('float32')
x, y

(array([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]], dtype=float32), array([[0.],
        [1.],
        [1.],
        [0.]], dtype=float32))

In [0]:
from tensorflow.keras import layers

#actFunc = 'sigmoid' # 학습이 안된다.
actFunc = 'relu' # 된다.
model = tf.keras.Sequential()
model.add(layers.Dense(10, activation=actFunc, input_dim=2))
model.add(layers.Dense(10, activation=actFunc))
model.add(layers.Dense(10, activation=actFunc))
model.add(layers.Dense(10, activation=actFunc))
model.add(layers.Dense(10, activation=actFunc))
model.add(layers.Dense(10, activation=actFunc))
model.add(layers.Dense(10, activation=actFunc))
model.add(layers.Dense(1, activation='sigmoid'))    # 출력값을 0~1로 만들기 위함
sgd = tf.keras.optimizers.SGD(learning_rate=0.1)
model.compile(optimizer=sgd,loss='binary_crossentropy',metrics=['accuracy'])

In [6]:
model.fit(x, y, epochs=10000, batch_size=4, verbose=0)
model.evaluate(x, y)



[8.566623364458792e-06, 1.0]

In [7]:
predicted = model.predict(x)
predicted

array([[1.0839906e-05],
       [9.9999499e-01],
       [9.9999201e-01],
       [1.0443308e-05]], dtype=float32)

# Initialization

W=np.random.randn(fan_in, fan_out)/np.sqrt(fan_in) # Xavier initialization(2010)<br>
W=np.random.randn(fan_in, fan_out)/np.sqrt(fan_in/2) #He initialization(2015)
<br><br>
fan_in: 이전 뉴런의 수
fan_out: 이후 뉴런의 수

# Dropout

결과에 많은 영향을 미쳤던 노드 뿐만 아니라 다른 노드들도 학습을 해주는 방법<br><br>

tf.keras.layers.Dropout(0.3): 30%를 죽인다

# Batch Normalization

MNIST dataset을 예로 들어보자. MNIST dataset은 총 60000개의 이미지가 존재하고, 각 이미지는 (28,28)의 크기를 갖는다. 당연히 전체 dataset을 학습시키면 각 epoch마다 도출되는 cost값은 점점 줄어드는 추세이다. 그런데 이 방법은 각 epoch을 계산하는 시간이 오래 걸리기 때문에 전체 dataset을 batch로 나눠 batch 단위로 학습하고 cost가 도출된다. 우리는 이러한 방법을 SGD(Stochastic Gradient Descent)라고 한다. 왜냐하면 batch마다 도출되는 cost가 계속해서 줄어들지 않을 수 있기 때문이다. 이는 전체 dataset은 표준화되어있지만, 각 batch로 봤을 땐 표준화되어있지 않기 때문이다. 결국 cost curve는 전체적으로 감소하는 그래프이지만 들쭉날쭉하며 불안전한 양상을 보일것이다.

In [0]:
# Mnist_DNN_TF2.ipynb

import tensorflow as tf
import numpy as np

In [2]:
from tensorflow.keras import datasets
from tensorflow.keras.utils import to_categorical
mnist = datasets.mnist
(train_x, train_y), (test_x, test_y) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
train_x.shape

(60000, 28, 28)

In [0]:
train_x = train_x.reshape(-1,784) 
test_x = test_x.reshape(-1,784) 

In [0]:
train_x = train_x / 255
test_x = test_x / 255

In [6]:
train_y_onehot = to_categorical(train_y)
test_y_onehot = to_categorical(test_y)
train_y_onehot[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [7]:
#Hidden Layer 추가
from tensorflow.keras import layers
model = tf.keras.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=784))  # weights: 784x256+256
model.add(layers.Dense(256, activation='relu'))                 # weights: 256x256+256
model.add(layers.Dense(10, activation='softmax'))               # weights: 256x10+10
model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(train_x, train_y_onehot, batch_size = 100, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f14800f2780>

In [8]:
# adam 옵티마이저 사용
from tensorflow.keras import layers
model = tf.keras.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=784))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(train_x, train_y_onehot, batch_size = 100, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f143a5d81d0>

In [9]:
# adam 옵티마이저 + He init.
from tensorflow.keras import layers
model = tf.keras.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=784, kernel_initializer='he_normal'))
model.add(layers.Dense(256, activation='relu', kernel_initializer='he_normal'))
model.add(layers.Dense(10, activation='softmax', kernel_initializer='he_normal'))
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(train_x, train_y_onehot, batch_size = 100, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f1497a46ef0>

In [10]:
model.evaluate(test_x, test_y_onehot)



[0.07684167474508286, 0.9775000214576721]

In [0]:
# adam optimizer와 He 초기화 방법, 드롭아웃 추가
from tensorflow.keras import layers
model = tf.keras.Sequential()
model.add(layers.Dense(256, activation='relu', kernel_initializer='he_normal', input_dim=784))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(256, activation='relu', kernel_initializer='he_normal'))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(10, activation='softmax'))
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [12]:
model.fit(train_x, train_y_onehot, batch_size = 100, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f143a48ce48>

In [13]:
model.evaluate(test_x, test_y_onehot)



[0.07120649516582489, 0.9787999987602234]

In [14]:
# 오버피팅 상황 만들기
from tensorflow.keras import layers
model = tf.keras.Sequential()
model.add(layers.Dense(512, activation='relu', input_dim=784))

model.add(layers.Dense(512, activation='relu'))

model.add(layers.Dense(512, activation='relu'))

model.add(layers.Dense(512, activation='relu'))

model.add(layers.Dense(512, activation='relu'))

model.add(layers.Dense(512, activation='relu'))

model.add(layers.Dense(512, activation='relu'))

model.add(layers.Dense(512, activation='relu'))

model.add(layers.Dense(10, activation='softmax'))
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(train_x, train_y_onehot, batch_size = 100, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f143a283438>

In [15]:
model.evaluate(test_x, test_y_onehot)



[0.11612177640199661, 0.9714000225067139]

In [0]:
model.save("test.h5")

In [17]:
!ls 

sample_data  test.h5


In [18]:
import tensorflow as tf
from tensorflow.keras import datasets
from tensorflow.keras.utils import to_categorical
#from tensorflow import keras
mnist = datasets.mnist
(train_x, train_y), (test_x, test_y) = mnist.load_data()
train_x = train_x.reshape(-1,784) 
test_x = test_x.reshape(-1,784)
train_x = train_x / 255
test_x = test_x / 255
train_y_onehot = to_categorical(train_y)
test_y_onehot = to_categorical(test_y)
train_y_onehot[0]

model = tf.keras.models.load_model('test.h5')
model.evaluate(test_x, test_y_onehot)



[0.11612177640199661, 0.9714000225067139]

In [19]:
import tensorflow as tf
my_devices = tf.config.experimental.list_physical_devices(device_type='CPU')
tf.config.experimental.set_visible_devices(devices= my_devices, device_type='CPU')

RuntimeError: ignored

In [20]:
a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
c = tf.matmul(a, b)
c

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[22., 28.],
       [49., 64.]], dtype=float32)>

# Quiz

1. 시그모이드함수의 미분값은?
 * S(x)(1-S(x))

2. Backpropagation에서 Back의 의미는?
 * 신경망에서 우측결과로 좌측 값을 계산한다.

3. dropout 설명에서 나온 축구선수가 아닌 것은?
 * 박지성(O), 손흥민(O), 안정환
4. SGD에서 S는 무엇의 약자인가?
 * Stochastic