In [None]:
# 몬테 카를로 드롭 아웃

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import sklearn
import matplotlib.pyplot as plt

In [2]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
pixel_means = X_train.mean(axis=0, keepdims=True) 
pixel_stds = X_train.std(axis=0, keepdims=True) 
X_train_scaled = (X_train - pixel_means) / pixel_stds 
X_valid_scaled = (X_valid - pixel_means) / pixel_stds 
X_test_scaled = (X_test - pixel_means) / pixel_stds

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [3]:
y_train.shape

(55000,)

In [None]:
# 드롭아웃 모델

In [None]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])
n_epochs = 2
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
                    validation_data=(X_valid_scaled, y_valid))


Epoch 1/2
Epoch 2/2


In [None]:
np.round(model.predict(X_test_scaled[:1]), 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.05, 0.  , 0.93]],
      dtype=float32)

In [None]:
# 몬테 카를로 드롭아웃

In [None]:
y_probas = np.stack([model(X_test_scaled, training=True)
                     for sample in range(100)])
y_proba = y_probas.mean(axis=0)
y_std = y_probas.std(axis=0)


In [None]:
# 드롭아웃으로 만든 예측을 평균

In [None]:
np.round(model.predict(X_test_scaled[:1]), 2)


array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.06, 0.  , 0.93]],
      dtype=float32)

In [None]:
np.round(y_probas[:,:1], 2)

array([[[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.44, 0.01, 0.55]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.2 , 0.  , 0.8 ]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.98]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.02, 0.  , 0.97]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.2 , 0.  , 0.79]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.03, 0.  , 0.07, 0.  , 0.9 ]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.09, 0.  , 0.43, 0.  , 0.48]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.04, 0.  , 0.96]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.33, 0.  , 0.16, 0.16, 0.34]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.08, 0.  , 0.92]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.05, 0.  , 0.48, 0.  , 0.47]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.08, 0.  , 0.  , 0.  , 0.91]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.1 , 0.  , 0.01, 0.  , 0.9 ]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0

In [None]:
# 첫번째 차원으로 평균을 내면 몬테카를로 드롭아웃의 예측이 얻어짐

In [None]:
np.round(y_proba[:1], 2)


array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.05, 0.  , 0.14, 0.  , 0.81]],
      dtype=float32)

In [None]:
# 표준편차확인

In [None]:
np.round(y_std[:1], 2)


array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.1 , 0.  , 0.17, 0.02, 0.2 ]],
      dtype=float32)

In [None]:
# 전체 모델의 정확도

In [None]:
y_pred = np.argmax(y_proba, axis=1)
accuracy = np.sum(y_pred == y_test) / len(y_pred)
print(accuracy)

0.8578


In [None]:
# 훈련하는 동안 다르게 작동하는 층을 가질 경우

In [None]:
class MCDropout(keras.layers.Dropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

class MCAlphaDropout(keras.layers.AlphaDropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

In [None]:
# 이미 Droput모델을 훈련한 경우

In [None]:
mc_model = keras.models.Sequential([
    MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.Dropout) else layer
    for layer in model.layers
])

In [None]:
mc_model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_3 (Flatten)         (None, 784)               0         
                                                                 
 mc_alpha_dropout (MCAlphaDr  (None, 784)              0         
 opout)                                                          
                                                                 
 dense_9 (Dense)             (None, 300)               235500    
                                                                 
 mc_alpha_dropout_1 (MCAlpha  (None, 300)              0         
 Dropout)                                                        
                                                                 
 dense_10 (Dense)            (None, 100)               30100     
                                                                 
 mc_alpha_dropout_2 (MCAlpha  (None, 100)             

In [None]:
optimizer = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
mc_model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

In [None]:
mc_model.set_weights(model.get_weights())

In [None]:
# 첫번째 모델에 대한 100개의 예측 평균

In [None]:
np.round(np.mean([mc_model.predict(X_test_scaled[:10]) for sample in range(100)], axis=0), 2)

array([[0.  , 0.01, 0.  , 0.  , 0.  , 0.18, 0.  , 0.21, 0.02, 0.56],
       [0.02, 0.  , 0.71, 0.  , 0.12, 0.  , 0.14, 0.  , 0.  , 0.  ],
       [0.  , 0.99, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.01, 0.94, 0.01, 0.03, 0.01, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.28, 0.01, 0.1 , 0.02, 0.06, 0.03, 0.47, 0.  , 0.03, 0.01],
       [0.02, 0.94, 0.01, 0.01, 0.01, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.02, 0.01, 0.1 , 0.02, 0.71, 0.01, 0.1 , 0.  , 0.02, 0.01],
       [0.03, 0.  , 0.14, 0.02, 0.3 , 0.01, 0.48, 0.  , 0.02, 0.  ],
       [0.05, 0.03, 0.07, 0.07, 0.03, 0.39, 0.05, 0.25, 0.04, 0.02],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.1 , 0.  , 0.85, 0.01, 0.03]],
      dtype=float32)

In [None]:
# 맥스 - 노름 규제

In [None]:
from functools import partial

MaxNormDense = partial(keras.layers.Dense,
                       activation="selu", kernel_initializer="lecun_normal",
                       kernel_constraint=keras.constraints.max_norm(1.))

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    MaxNormDense(300),
    MaxNormDense(100),
    keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])
n_epochs = 2
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
                    validation_data=(X_valid_scaled, y_valid))

Epoch 1/2
Epoch 2/2
