In [23]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tqdm.notebook import tqdm
np.set_printoptions(suppress=True)

(x_train, y_train),(x_test, y_test) = tf.keras.datasets.mnist.load_data()

y_input = tf.keras.utils.to_categorical(y_train)
x_input = (np.reshape(x_train, (x_train.shape[0], 784)) / 255.0).astype(np.float32)

In [26]:
W = tf.Variable(tf.zeros([784,10]),name='W')
b = tf.Variable(tf.zeros([10]),name='b')

# predicted probability for each class
def y_pred(x):
    return tf.nn.softmax(tf.matmul(x,W) + b)

# cross entropy loss function
@tf.function
def loss(x,y):
    y_ = y_pred(x)
    return tf.reduce_mean(-tf.reduce_sum(y * tf.math.log(y_), axis=[1]))



In [27]:
# Set up logging.
logdir = '/home/jovyan/logs'
writer = tf.summary.create_file_writer(logdir)


# Bracket the function call with trace_on and trace_export

tf.summary.trace_on()
# Call only one tf.function when tracing.
z = loss(x_input, y_input)
with writer.as_default():
    tf.summary.trace_export(name='graph',step=0)


In [33]:
# now add in the training bits of the graph


train_steps = 500


# learning rate
lr = 1e-1

# gradient descent optimizer
optimizer = tf.optimizers.SGD(lr)



In [34]:

with writer.as_default():
    for i in tqdm(range(train_steps)):
        with tf.GradientTape() as tape:
            current_loss = loss(x_input,y_input)
        gradients = tape.gradient(current_loss, [W, b])
        optimizer.apply_gradients(zip(gradients, [W ,b]))
        tf.summary.scalar('loss', current_loss, step=i)



  0%|          | 0/500 [00:00<?, ?it/s]

In [38]:
# now add in the training bits of the graph


train_steps = 500


# learning rate
lr = 1e-3

# gradient descent optimizer
optimizer = tf.optimizers.Adam(lr)

W = tf.Variable(tf.zeros([784,10]),name='W')
b = tf.Variable(tf.zeros([10]),name='b')

# predicted probability for each class
def y_pred(x):
    return tf.nn.softmax(tf.matmul(x,W) + b)

# cross entropy loss function
@tf.function
def loss(x,y):
    y_ = y_pred(x)
    return tf.reduce_mean(-tf.reduce_sum(y * tf.math.log(y_), axis=[1]))

In [39]:

pbar = tqdm(range(train_steps))
with writer.as_default():
    for i in pbar:
        with tf.GradientTape() as tape:
            current_loss = loss(x_input,y_input)
        gradients = tape.gradient(current_loss, [W, b])
        optimizer.apply_gradients(zip(gradients, [W ,b]))
        tf.summary.scalar('lossadam', current_loss, step=i)
        pbar.set_description_str(str(current_loss.numpy()))


  0%|          | 0/500 [00:00<?, ?it/s]

In [43]:
# now add in the training bits of the graph


train_steps = 500


# learning rate
lr = 1e-1

# gradient descent optimizer
optimizer = tf.optimizers.SGD(lr)

W = tf.Variable(tf.zeros([784,10]),name='W')
b = tf.Variable(tf.zeros([10]),name='b')

# predicted probability for each class
def y_pred(x):
    return tf.nn.softmax(tf.matmul(x,W) + b)

# cross entropy loss function
@tf.function
def loss(x,y):
    y_ = y_pred(x)
    return tf.reduce_mean(-tf.reduce_sum(y * tf.math.log(y_), axis=[1]))

In [44]:
with writer.as_default():
    for i in tqdm(range(train_steps)):
        with tf.GradientTape() as tape:
            current_loss = loss(x_input,y_input)
        gradients = tape.gradient(current_loss, [W, b])
        optimizer.apply_gradients(zip(gradients, [W ,b]))

        W_slice = tf.expand_dims(tf.reshape(W[:,0],[1, 28,28]),-1)
        tf.summary.image('image', W_slice, step=i)


  0%|          | 0/500 [00:00<?, ?it/s]