# Lecture7; Regularization

### Library

In [None]:
import tensorflow as tf
from tensorflow.keras import Model, layers, regularizers, initializers
import numpy as np
from matplotlib import pyplot as plt
import random

random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

### Data processing

In [None]:
(x_trainval, y_trainval), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_trainval, x_test = tf.cast(x_trainval, tf.float32), tf.cast(x_test, tf.float32)
y_trainval, y_test = tf.cast(y_trainval, tf.float32), tf.cast(y_test, tf.float32)

num_classes = 10
num_features = 784

x_trainval, x_test = tf.reshape(x_trainval,[-1, num_features]), tf.reshape(x_test, [-1, num_features])
x_trainval, x_test = x_trainval / 255., x_test / 255.

In [None]:
"""_____fill here______"""

In [None]:
print(x_train.shape) #(50000, 784)
print(x_val.shape) #(10000, 784)
print(x_test.shape) #(10000, 784)

print(y_train.shape) #(50000,)
print(y_val.shape) #(10000,)
print(y_test.shape) #(10000,)

In [None]:
batch_size = 200
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.shuffle(50000).batch(batch_size).prefetch(1)

### Define functions for Training/Testing

In [None]:
def cross_entropy_loss(x, y):
    y = tf.cast(y, tf.int64)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x)
    return tf.reduce_mean(loss)

def accuracy(y_pred, y_true):
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy()
    
def Train_model(model, lr, epo, print_loss=True, plot_graph=True):
    step_l = []
    loss_l = []
    
    optimizer = tf.optimizers.SGD(0.01, momentum=0.9)
    
    for epoch in range(1,epo+1):
        for step, (batch_x, batch_y) in enumerate(train_data, 1):
            with tf.GradientTape() as g:
                pred = model(batch_x, is_training=True)
                loss = cross_entropy_loss(pred, batch_y)

            trainable_variables = model.trainable_variables
            gradients = g.gradient(loss, trainable_variables)
            optimizer.apply_gradients(zip(gradients, trainable_variables))
            
            if plot_graph:
                if step % 50 == 0:
                    step_l.append((epoch-1)*300 + step)
                    loss_l.append(loss.numpy())
        if print_loss:
            acc = accuracy(model(x_test), y_test)
            print("epoch: ", epoch, ", loss: ", loss.numpy(), "acc: ", acc)
    
    if plot_graph:
        plt.plot(step_l, loss_l)
    return model

In [None]:
class NeuralNet(Model):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.fc1 = layers.Dense(128, kernel_initializer = initializers.GlorotNormal())
        self.bn1 = layers.BatchNormalization()
        self.ac1 = layers.Activation(tf.nn.relu)
        self.fc2 = layers.Dense(256, kernel_initializer = initializers.GlorotNormal())
        self.bn2 = layers.BatchNormalization()
        self.ac2 = layers.Activation(tf.nn.relu)
        self.out = layers.Dense(num_classes, kernel_initializer = initializers.GlorotNormal())

    def call(self, x, is_training=False):
        x = self.fc1(x)
        x = self.bn1(x, training=is_training)
        x = self.ac1(x)
        x = self.fc2(x)
        x = self.bn2(x, training=is_training)
        x = self.ac2(x)
        x = self.out(x)
        x = tf.nn.softmax(x)
        return x

## Early stopping

In [None]:
val_data = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_data = val_data.shuffle(10000).batch(batch_size).prefetch(1)

def early_stopping("""_____fill here______"""):
    return """_____fill here______"""

In [None]:
neural_net = NeuralNet()
"""_____fill here______""" = early_stopping("""_____fill here______""")

In [None]:
n_acc = accuracy(neural_net(x_test), y_test)
print("test accuracy =", n_acc)

## Regularization
### weight decay

regularizer 종류 : https://www.tensorflow.org/api_docs/python/tf/keras/regularizers

In [None]:
class WD_NeuralNet(Model):
    def __init__(self):
        super(WD_NeuralNet, self).__init__()
        self.fc1 = layers.Dense(128, kernel_initializer = initializers.GlorotNormal(), kernel_regularizer="""_____fill here______""")
        self.bn1 = layers.BatchNormalization()
        self.ac1 = layers.Activation(tf.nn.relu)
        self.fc2 = layers.Dense(256, kernel_initializer = initializers.GlorotNormal(), kernel_regularizer="""_____fill here______""")
        self.bn2 = layers.BatchNormalization()
        self.ac2 = layers.Activation(tf.nn.relu)
        self.out = layers.Dense(num_classes, kernel_initializer = initializers.GlorotNormal(), kernel_regularizer="""_____fill here______""")

    def call(self, x, is_training=False):
        x = self.fc1(x)
        x = self.bn1(x, training=is_training)
        x = self.ac1(x)
        x = self.fc2(x)
        x = self.bn2(x, training=is_training)
        x = self.ac2(x)
        x = self.out(x)
        x = tf.nn.softmax(x)
        return x

In [None]:
wd_nn = WD_NeuralNet()
wd_nn, wd_best_epoch = early_stopping(wd_nn, 0.01)

In [None]:
wd_acc = accuracy(wd_nn(x_test), y_test)
print("test accuracy =", wd_acc)

### drop out 

In [None]:
# dropout 코딩
class DO_NeuralNet(Model):
    def __init__(self):
        super(DO_NeuralNet, self).__init__()
        self.fc1 = layers.Dense(128, kernel_initializer = initializers.GlorotNormal())
        self.bn1 = layers.BatchNormalization()
        self.ac1 = layers.Activation(tf.nn.relu)
        self.fc2 = layers.Dense(256, kernel_initializer = initializers.GlorotNormal())
        self.bn2 = layers.BatchNormalization()
        self.ac2 = layers.Activation(tf.nn.relu)
        self.out = layers.Dense(num_classes, kernel_initializer = initializers.GlorotNormal())

    def call(self, x, is_training=False):
        x = self.fc1(x)
        x = self.bn1(x, training=is_training)
        x = self.ac1(x)
        x = self.fc2(x)
        x = self.bn2(x, training=is_training)
        x = self.ac2(x)
        x = self.out(x)
        x = tf.nn.softmax(x)
        return x

In [None]:
do_nn = DO_NeuralNet()
do_nn, do_best_epoch = early_stopping(do_nn, 0.01)

In [None]:
do_acc = accuracy(do_nn(x_test), y_test)
print("test accuracy =", do_acc)

### model Ensembles

In [None]:
pred = """_____fill here______"""
ensem_acc = accuracy(pred, y_test)

print(ensem_acc)

### Compare accuracy

In [None]:
print(n_acc)
print(wd_acc)
print(dowobn_acc)
print(ensem_acc)