In [1]:
import tensorflow as tf 
from d2l import tensorflow as d2l
from util.MLPutils import Animator3D, IteratorEx
import pandas as pd

In [2]:
num_outputs, num_hiddens1, num_hiddens2 = 100, 256, 256

In [3]:
class Net(tf.keras.Model):
    def __init__(self, num_outputs, num_hiddens1, num_hiddens2, weight_decay, dropout1, dropout2):
        super().__init__()
        self.input_layer = tf.keras.layers.Flatten()
        self.hidden1 =  tf.keras.layers.Dense(num_hiddens1, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.l2(weight_decay)) if weight_decay is not None else tf.keras.layers.Dense(num_hiddens1, activation=tf.nn.relu)
        self.dropout1 = tf.keras.layers.Dropout(dropout1)
        self.hidden2 = tf.keras.layers.Dense(num_hiddens2, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.l2(weight_decay)) if weight_decay is not None else tf.keras.layers.Dense(num_hiddens2, activation=tf.nn.relu)
        self.dropout2 = tf.keras.layers.Dropout(dropout2)
        self.output_layer = tf.keras.layers.Dense(num_outputs)
        
    def call(self, inputs):
        x = self.input_layer(inputs)
        x = self.hidden1(x)
        x = self.dropout1(x)
        
        x = self.hidden2(x)
        x = self.dropout2(x)
        x = self.output_layer(x)
        return x
    


In [4]:
def train_ch3( train_iter, test_iter, loss, num_epochs, updater, dropout1, dropout2, wd):
    net =  Net(num_outputs, num_hiddens1, num_hiddens2, wd, dropout1, dropout2 )
    
    epoch_gen = IteratorEx(range(num_epochs))
    while epoch_gen.hasNext :
        d2l.train_epoch_ch3(net, train_iter, loss, updater)
        d2l.evaluate_accuracy(net, test_iter)
        epoch_gen.next()
    train_metrics = d2l.train_epoch_ch3(net, train_iter, loss, updater)
    test_acc = d2l.evaluate_accuracy(net, test_iter)
    train_loss, train_acc  =  train_metrics
    return (train_acc, test_acc, train_loss)

MLP with weight decay and dropout

In [5]:
num_epochs, lr, batch_size, = 50, .5, 256
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
trainer = tf.keras.optimizers.SGD(learning_rate=lr)
dropout_losses = []
droput_indices = []
dropout1 = .4
for dropout2 in [0.1, .2, .4, .6, .8]:
        for decay in [0.003, 0.03, .3, 3,]:
            droput_indices.append((dropout1,dropout2, decay))
            train_acc, test_acc, train_loss = train_ch3( train_iter, test_iter, loss, num_epochs, trainer, dropout1 , dropout2, decay )
            train_metrics = train_loss, train_acc 
            dropout_losses.append((train_loss,train_acc,test_acc))   

In [6]:
index = pd.MultiIndex.from_tuples(droput_indices, names = ["dropout1", "dropout2", "decay"])
dropout_losses = pd.DataFrame(dropout_losses, index = index, columns=["train_loss","train_acc","test_acc"])
dropout_losses

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train_loss,train_acc,test_acc
dropout1,dropout2,decay,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.4,0.1,0.003,0.352156,0.8703,0.8468
0.4,0.1,0.03,,0.1,0.1
0.4,0.1,0.3,0.890637,0.64775,0.6998
0.4,0.1,3.0,0.180567,0.93145,0.8799
0.4,0.2,0.003,0.201135,0.922833,0.872
0.4,0.2,0.03,0.149566,0.94355,0.8849
0.4,0.2,0.3,0.166673,0.936717,0.8844
0.4,0.2,3.0,0.316023,0.881183,0.8682
0.4,0.4,0.003,1.651822,0.226267,0.3343
0.4,0.4,0.03,,0.1,0.1


In [7]:
num_epochs, lr, batch_size, = 100, .5, 256
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
trainer = tf.keras.optimizers.SGD(learning_rate=lr)
dropout_losses_with_no_decay = []
droput_indices_with_no_decay = []
dropout1 = .4
for dropout2 in [0.1, .2, .4, .6, .8]:
            droput_indices_with_no_decay.append((dropout1,dropout2))
            train_acc, test_acc, train_loss = train_ch3( train_iter, test_iter, loss, num_epochs, trainer, dropout1 , dropout2 ,None)
            train_metrics = train_loss, train_acc 
            dropout_losses_with_no_decay.append((train_loss,train_acc,test_acc))   

In [8]:
index = pd.MultiIndex.from_tuples(droput_indices_with_no_decay, names = ["dropout1", "dropout2"])
dropout_losses_with_no_decay = pd.DataFrame(dropout_losses_with_no_decay, index = index, columns=["train_loss","train_acc","test_acc"])
dropout_losses_with_no_decay

Unnamed: 0_level_0,Unnamed: 1_level_0,train_loss,train_acc,test_acc
dropout1,dropout2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.4,0.1,0.310461,0.884317,0.8581
0.4,0.2,0.102515,0.96065,0.8859
0.4,0.4,2.30409,0.09875,0.1
0.4,0.6,,0.1,0.1
0.4,0.8,0.173894,0.932817,0.8772
