In [1]:
import tensorflow as tf 
from d2l import tensorflow as d2l
from util.MLPutils import Animator3D, IteratorEx
import pandas as pd

In [2]:
num_outputs, num_hiddens1, num_hiddens2 = 100, 256, 256

In [3]:
def train_ch3( train_iter, test_iter, loss, num_epochs, updater, dropout1, dropout2, wd):
    net = tf.keras.models.Sequential()
    net.add(tf.keras.layers.Flatten())
    if wd is not None:
        net.add(tf.keras.layers.Dense(num_hiddens1, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.l2(wd)))
    else:
        net.add(tf.keras.layers.Dense(num_hiddens1, activation=tf.nn.relu))
    net.add(tf.keras.layers.Dropout(dropout1))
    if wd is not None:
        net.add(tf.keras.layers.Dense(num_hiddens2, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.l2(wd)))
    else:
        net.add(tf.keras.layers.Dense(num_hiddens2, activation=tf.nn.relu))
    net.add(tf.keras.layers.Dropout(dropout2))
    net.add(tf.keras.layers.Dense(num_outputs))

    
    epoch_gen = IteratorEx(range(num_epochs))
    while epoch_gen.hasNext :
        d2l.train_epoch_ch3(net, train_iter, loss, updater)
        d2l.evaluate_accuracy(net, test_iter)
        epoch_gen.next()
    train_metrics = d2l.train_epoch_ch3(net, train_iter, loss, updater)
    test_acc = d2l.evaluate_accuracy(net, test_iter)
    train_loss, train_acc  =  train_metrics
    return (train_acc, test_acc, train_loss)

MLP with weight decay and dropout

In [4]:
num_epochs, lr, batch_size, = 100, .5, 256
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
trainer = tf.keras.optimizers.SGD(learning_rate=lr)
dropout_losses = []
droput_indices = []
dropout1 = .1
for dropout2 in [0.1, .2, .4, .6, .8]:
        for decay in [0.003, 0.03, .3, 3,]:
            droput_indices.append((dropout1,dropout2, decay))
            train_acc, test_acc, train_loss = train_ch3( train_iter, test_iter, loss, num_epochs, trainer, dropout1 , dropout2 , wd = decay)
            train_metrics = train_loss, train_acc 
            dropout_losses.append((train_loss,train_acc,test_acc))   

In [5]:
index = pd.MultiIndex.from_tuples(droput_indices, names = ["dropout1", "dropout2", "decay"])
dropout_losses = pd.DataFrame(dropout_losses, index = index, columns=["train_loss","train_acc","test_acc"])
dropout_losses

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train_loss,train_acc,test_acc
dropout1,dropout2,decay,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.1,0.1,0.003,,0.1,0.1
0.1,0.1,0.03,2.30432,0.097533,0.1
0.1,0.1,0.3,2.307127,0.100367,0.1
0.1,0.1,3.0,,0.1,0.1
0.1,0.2,0.003,0.226194,0.914783,0.8731
0.1,0.2,0.03,,0.1,0.1
0.1,0.2,0.3,0.090508,0.96555,0.8822
0.1,0.2,3.0,0.608181,0.7507,0.7449
0.1,0.4,0.003,0.586942,0.777767,0.7868
0.1,0.4,0.03,,0.1,0.1


In [8]:
num_epochs, lr, batch_size, = 100, .5, 256
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
trainer = tf.keras.optimizers.SGD(learning_rate=lr)
dropout_losses_with_no_decay = []
droput_indices_with_no_decay = []
dropout1 = .1
for dropout2 in [0.1, .2, .4, .6, .8]:
            droput_indices_with_no_decay.append((dropout1,dropout2))
            train_acc, test_acc, train_loss = train_ch3( train_iter, test_iter, loss, num_epochs, trainer, dropout1 , dropout2,None)
            train_metrics = train_loss, train_acc 
            dropout_losses_with_no_decay.append((train_loss,train_acc,test_acc))   

In [9]:
index = pd.MultiIndex.from_tuples(droput_indices_with_no_decay, names = ["dropout1", "dropout2"])
dropout_losses_with_no_decay = pd.DataFrame(dropout_losses_with_no_decay, index = index, columns=["train_loss","train_acc","test_acc"])
dropout_losses_with_no_decay

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train_loss,train_acc,test_acc
dropout1,dropout2,decay,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.1,0.1,3,1.272418,0.495083,0.3537
0.1,0.2,3,0.192007,0.92735,0.8701
0.1,0.4,3,0.103611,0.960383,0.8844
0.1,0.6,3,,0.1,0.1
0.1,0.8,3,1.705402,0.210333,0.1992
