In [None]:
# enhancing performance with additional layers, initialization, and dropout

In [6]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
nb_classes = 10
x_train = tf.reshape(x_train, [len(x_train), -1])
x_test = tf.reshape(x_test, [len(x_test), -1])
y_train = tf.one_hot(y_train, depth=nb_classes)
y_test = tf.one_hot(y_test, depth=nb_classes)

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))

training_epochs = 15
batch_size = 100

dataset = train_data.batch(batch_size)

# for features, label in dataset.take(1):
    # print(features, label)
    
W = tf.Variable(tf.random.normal([784, nb_classes]), name="weight")
b = tf.Variable(tf.random.normal([nb_classes]), name="bias")

optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

def compute_cost(X, Y):
    X = tf.cast(X, dtype=tf.float32) / 255.0
    hypothesis = tf.matmul(X, W) + b
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
    return hypothesis, cost

def train_step(X, Y):
    with tf.GradientTape() as tape:
        hypothesis, cost = compute_cost(X, Y)
    gradients  = tape.gradient(cost, [W, b])
    optimizer.apply_gradients(zip(gradients, [W, b]))
    return hypothesis, cost

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(train_data) / batch_size

    for batch in dataset:
        x_batch, y_batch = batch
        hypothesis, cost_val = train_step(x_batch, y_batch)
        avg_cost += cost_val / total_batch

    print("Epoch:", "%04d" % (epoch + 1), "cost", "{:.9f}".format(avg_cost))

hypothesis, cost_val = compute_cost(x_test, y_test)
is_correct = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y_test, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
print("Accuracy: ", accuracy)



Epoch: 0001 cost 1.375367522
Epoch: 0002 cost 0.517982066
Epoch: 0003 cost 0.431009620
Epoch: 0004 cost 0.388565034
Epoch: 0005 cost 0.362870783
Epoch: 0006 cost 0.345713407
Epoch: 0007 cost 0.333525181
Epoch: 0008 cost 0.324450165
Epoch: 0009 cost 0.317453653
Epoch: 0010 cost 0.311925709
Epoch: 0011 cost 0.307465553
Epoch: 0012 cost 0.303797930
Epoch: 0013 cost 0.300731212
Epoch: 0014 cost 0.298131734
Epoch: 0015 cost 0.295902938
Accuracy:  tf.Tensor(0.9029, shape=(), dtype=float32)


In [32]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
nb_classes = 10
x_train = tf.reshape(x_train, [len(x_train), -1])
x_test = tf.reshape(x_test, [len(x_test), -1])
y_train = tf.one_hot(y_train, depth=nb_classes)
y_test = tf.one_hot(y_test, depth=nb_classes)

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))

training_epochs = 15
batch_size = 100

dataset = train_data.batch(batch_size)

# for features, label in dataset.take(1):
    # print(features, label)
    
W1 = tf.Variable(tf.random.normal([784, 256]))
b1 = tf.Variable(tf.random.normal([256]))

W2 = tf.Variable(tf.random.normal([256, 256]))
b2 = tf.Variable(tf.random.normal([256]))

W3 = tf.Variable(tf.random.normal([256, nb_classes]))
b3 = tf.Variable(tf.random.normal([nb_classes]))

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

def compute_cost(X, Y):
    X = tf.cast(X, dtype=tf.float32) / 255.0
    L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
    L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
    hypothesis = tf.matmul(L2, W3) + b3
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
    return hypothesis, cost

def train_step(X, Y):
    with tf.GradientTape() as tape:
        hypothesis, cost = compute_cost(X, Y)
    gradients  = tape.gradient(cost, [W1, b2, W2, b2, W3, b3])
    optimizer.apply_gradients(zip(gradients, [W1, b2, W2, b2, W3, b3]))
    return hypothesis, cost

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(train_data) / batch_size

    for batch in dataset:
        x_batch, y_batch = batch
        hypothesis, cost_val = train_step(x_batch, y_batch)
        avg_cost += cost_val / total_batch

    print("Epoch:", "%04d" % (epoch + 1), "cost", "{:.9f}".format(avg_cost))

hypothesis, cost_val = compute_cost(x_test, y_test)
is_correct = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y_test, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
print("Accuracy: ", accuracy)



Epoch: 0001 cost 167.822158813
Epoch: 0002 cost 43.389518738
Epoch: 0003 cost 27.534620285
Epoch: 0004 cost 19.089965820
Epoch: 0005 cost 13.877528191
Epoch: 0006 cost 10.189608574
Epoch: 0007 cost 7.579679966
Epoch: 0008 cost 5.740392208
Epoch: 0009 cost 4.331822395
Epoch: 0010 cost 3.203842163
Epoch: 0011 cost 2.430720329
Epoch: 0012 cost 1.932021976
Epoch: 0013 cost 1.568692446
Epoch: 0014 cost 1.298140645
Epoch: 0015 cost 1.088929176
Accuracy:  tf.Tensor(0.947, shape=(), dtype=float32)


In [31]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
nb_classes = 10
x_train = tf.reshape(x_train, [len(x_train), -1])
x_test = tf.reshape(x_test, [len(x_test), -1])
y_train = tf.one_hot(y_train, depth=nb_classes)
y_test = tf.one_hot(y_test, depth=nb_classes)

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))

training_epochs = 15
batch_size = 100

dataset = train_data.batch(batch_size)

# for features, label in dataset.take(1):
    # print(features, label)

initializer = tf.keras.initializers.GlorotUniform()

W1 = tf.Variable(initializer([784, 256]))
b1 = tf.Variable(tf.random.normal([256]))

W2 = tf.Variable(initializer([256, 256]))
b2 = tf.Variable(tf.random.normal([256]))

W3 = tf.Variable(initializer([256, nb_classes]))
b3 = tf.Variable(tf.random.normal([nb_classes]))

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

def compute_cost(X, Y):
    X = tf.cast(X, dtype=tf.float32) / 255.0
    L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
    L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
    hypothesis = tf.matmul(L2, W3) + b3
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
    return hypothesis, cost

def train_step(X, Y):
    with tf.GradientTape() as tape:
        hypothesis, cost = compute_cost(X, Y)
    gradients  = tape.gradient(cost, [W1, b2, W2, b2, W3, b3])
    optimizer.apply_gradients(zip(gradients, [W1, b2, W2, b2, W3, b3]))
    return hypothesis, cost

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(train_data) / batch_size

    for batch in dataset:
        x_batch, y_batch = batch
        hypothesis, cost_val = train_step(x_batch, y_batch)
        avg_cost += cost_val / total_batch

    print("Epoch:", "%04d" % (epoch + 1), "cost", "{:.9f}".format(avg_cost))

hypothesis, cost_val = compute_cost(x_test, y_test)
is_correct = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y_test, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
print("Accuracy: ", accuracy)



Epoch: 0001 cost 0.306146532
Epoch: 0002 cost 0.119249329
Epoch: 0003 cost 0.077364348
Epoch: 0004 cost 0.052916147
Epoch: 0005 cost 0.037650805
Epoch: 0006 cost 0.027615691
Epoch: 0007 cost 0.024703706
Epoch: 0008 cost 0.023192912
Epoch: 0009 cost 0.016631611
Epoch: 0010 cost 0.012814472
Epoch: 0011 cost 0.014867225
Epoch: 0012 cost 0.010770042
Epoch: 0013 cost 0.010031405
Epoch: 0014 cost 0.011199803
Epoch: 0015 cost 0.008327252
Accuracy:  tf.Tensor(0.9786, shape=(), dtype=float32)


In [33]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
nb_classes = 10
x_train = tf.reshape(x_train, [len(x_train), -1])
x_test = tf.reshape(x_test, [len(x_test), -1])
y_train = tf.one_hot(y_train, depth=nb_classes)
y_test = tf.one_hot(y_test, depth=nb_classes)

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))

training_epochs = 15
batch_size = 100

dataset = train_data.batch(batch_size)

# for features, label in dataset.take(1):
    # print(features, label)

initializer = tf.keras.initializers.GlorotUniform()

W1 = tf.Variable(initializer([784, 512]))
b1 = tf.Variable(tf.random.normal([512]))

W2 = tf.Variable(initializer([512, 512]))
b2 = tf.Variable(tf.random.normal([512]))

W3 = tf.Variable(initializer([512, 512]))
b3 = tf.Variable(tf.random.normal([512]))

W4 = tf.Variable(initializer([512, 512]))
b4 = tf.Variable(tf.random.normal([512]))

W5 = tf.Variable(initializer([512, nb_classes]))
b5 = tf.Variable(tf.random.normal([nb_classes]))

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

def compute_cost(X, Y):
    X = tf.cast(X, dtype=tf.float32) / 255.0
    L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
    L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
    L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)
    L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
    hypothesis = tf.matmul(L4, W5) + b5
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
    return hypothesis, cost

def train_step(X, Y):
    with tf.GradientTape() as tape:
        hypothesis, cost = compute_cost(X, Y)
    gradients  = tape.gradient(cost, [W1, b2, W2, b2, W3, b3, W4, b4, W5, b5])
    optimizer.apply_gradients(zip(gradients, [W1, b2, W2, b2, W3, b3, W4, b4, W5, b5]))
    return hypothesis, cost

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(train_data) / batch_size

    for batch in dataset:
        x_batch, y_batch = batch
        hypothesis, cost_val = train_step(x_batch, y_batch)
        avg_cost += cost_val / total_batch

    print("Epoch:", "%04d" % (epoch + 1), "cost", "{:.9f}".format(avg_cost))

hypothesis, cost_val = compute_cost(x_test, y_test)
is_correct = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y_test, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
print("Accuracy: ", accuracy)



Epoch: 0001 cost 0.288661450
Epoch: 0002 cost 0.111195326
Epoch: 0003 cost 0.071078636
Epoch: 0004 cost 0.055253364
Epoch: 0005 cost 0.043906983
Epoch: 0006 cost 0.036158655
Epoch: 0007 cost 0.028299389
Epoch: 0008 cost 0.027906612
Epoch: 0009 cost 0.024974992
Epoch: 0010 cost 0.021122567
Epoch: 0011 cost 0.021207608
Epoch: 0012 cost 0.018220656
Epoch: 0013 cost 0.017939715
Epoch: 0014 cost 0.015061238
Epoch: 0015 cost 0.015217330
Accuracy:  tf.Tensor(0.979, shape=(), dtype=float32)


In [5]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
nb_classes = 10
x_train = tf.reshape(x_train, [len(x_train), -1])
x_test = tf.reshape(x_test, [len(x_test), -1])
y_train = tf.one_hot(y_train, depth=nb_classes)
y_test = tf.one_hot(y_test, depth=nb_classes)

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))

training_epochs = 15
batch_size = 100

dataset = train_data.batch(batch_size)

# for features, label in dataset.take(1):
    # print(features, label)

initializer = tf.keras.initializers.GlorotUniform()

W1 = tf.Variable(initializer([784, 256]))
b1 = tf.Variable(tf.random.normal([256]))

W2 = tf.Variable(initializer([256, 256]))
b2 = tf.Variable(tf.random.normal([256]))

W3 = tf.Variable(initializer([256, nb_classes]))
b3 = tf.Variable(tf.random.normal([nb_classes]))

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

def compute_cost(X, Y, keep_prob):
    X = tf.cast(X, dtype=tf.float32) / 255.0
    L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
    L1 = tf.nn.dropout(L1, rate=keep_prob)
    L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
    L2 = tf.nn.dropout(L2, rate=keep_prob)
    hypothesis = tf.matmul(L2, W3) + b3
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
    return hypothesis, cost

def train_step(X, Y, keep_prob):
    with tf.GradientTape() as tape:
        hypothesis, cost = compute_cost(X, Y, keep_prob)
    gradients  = tape.gradient(cost, [W1, b2, W2, b2, W3, b3])
    optimizer.apply_gradients(zip(gradients, [W1, b2, W2, b2, W3, b3]))
    return hypothesis, cost

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(train_data) / batch_size

    for batch in dataset:
        x_batch, y_batch = batch
        hypothesis, cost_val = train_step(x_batch, y_batch, keep_prob=0.3)
        avg_cost += cost_val / total_batch

    print("Epoch:", "%04d" % (epoch + 1), "cost", "{:.9f}".format(avg_cost))

hypothesis, cost_val = compute_cost(x_test, y_test, 0)
is_correct = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y_test, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
print("Accuracy: ", accuracy)



Epoch: 0001 cost 0.425826013
Epoch: 0002 cost 0.186285987
Epoch: 0003 cost 0.134083822
Epoch: 0004 cost 0.113129012
Epoch: 0005 cost 0.094623961
Epoch: 0006 cost 0.082894973
Epoch: 0007 cost 0.074179210
Epoch: 0008 cost 0.067577019
Epoch: 0009 cost 0.061436187
Epoch: 0010 cost 0.055878833
Epoch: 0011 cost 0.051597685
Epoch: 0012 cost 0.049780294
Epoch: 0013 cost 0.043976586
Epoch: 0014 cost 0.042386804
Epoch: 0015 cost 0.042665269
Accuracy:  tf.Tensor(0.9813, shape=(), dtype=float32)
