In [1]:
from __future__ import absolute_import, division, print_function
import tensorflow as tf

tf.enable_eager_execution()
tf.executing_eagerly()

True

In [2]:
x = [[2.]]
m = tf.matmul(x,x)
print("hello, {}".format(m))

hello, [[4.]]


In [3]:
a = tf.constant([[1,2],
                 [3,4]])
print(a)


tf.Tensor(
[[1 2]
 [3 4]], shape=(2, 2), dtype=int32)


In [4]:
# Broadcasting support
b = tf.add(a,1)
print(b)

tf.Tensor(
[[2 3]
 [4 5]], shape=(2, 2), dtype=int32)


In [5]:
# Operator overloading is supported
print(a * b)

tf.Tensor(
[[ 2  6]
 [12 20]], shape=(2, 2), dtype=int32)


In [6]:
import numpy as np
c = np.multiply(a, b)
print(c)

[[ 2  6]
 [12 20]]


In [7]:
print(a.numpy())
print(type(a.numpy()))

[[1 2]
 [3 4]]
<class 'numpy.ndarray'>


In [8]:
tfe = tf.contrib.eager

In [9]:
# Fizzbuzz
def fizzbuzz(max_num):
    counter = tf.constant(0)
    max_num = tf.convert_to_tensor(max_num)
    for num in range(1, max_num.numpy()+1):
        num = tf.constant(num)
        if int(num % 3) == 0 and int(num % 5) == 0:
            print('FizzBuzz')
        elif int(num % 3) == 0:
            print('Fizz')
        elif int(num % 5) == 0:
            print('Buzz')
        else:
            print(num.numpy())
        counter += 1

In [10]:
fizzbuzz(15)

1
2
Fizz
4
Buzz
Fizz
7
8
Fizz
Buzz
11
Fizz
13
14
FizzBuzz


In [11]:
# build a model
class MySimpleLayer(tf.keras.layers.Layer):
    def __init__(self, output_units):
        super(MySimpleLayer, self).__init__()
        self.output_units = output_units
        
    def build(self, input_shape):
        self.kernel = self.add_variable(
          "kernel", [input_shape[-1], self.output_units])
    
    def call(self, input):
        return tf.matmul(input, self.kernel)

In [12]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, input_shape=(784,)),
    tf.keras.layers.Dense(10)
])

In [13]:
class MNISTModel(tf.keras.Model):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.dense1 = tf.keras.layers.Dense(units=10)
        self.dense2 = tf.keras.layers.Dense(units=10)
        
    def call(self, input):
        result = self.dense1(input)
        result = self.dense2(result)
        result = self.dense2(result) # Reuse variables from dense2 layer
        return result
    
model = MNISTModel()

In [14]:
w = tf.Variable([[1.0]])
with tf.GradientTape() as tape:
    loss = w * w
grad = tape.gradient(loss, w)
print(grad)

tf.Tensor([[2.]], shape=(1, 1), dtype=float32)


In [15]:
# Train a model on the MNIST handwritten digit dataset
(mnist_images, mnist_labels), _ = tf.keras.datasets.mnist.load_data()

dataset = tf.data.Dataset.from_tensor_slices(
  (tf.cast(mnist_images[...,tf.newaxis]/255, tf.float32),
   tf.cast(mnist_labels,tf.int64)))
dataset = dataset.shuffle(1000).batch(32)

In [16]:
# Build Model
mnist_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16,[3,3], activation='relu'),
    tf.keras.layers.Conv2D(16,[3,3], activation='relu'),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(10)
])

for images, labels in dataset.take(1):
    print("Logits: ", mnist_model(images[0:1]).numpy())

Logits:  [[-2.0655438e-02  2.3540262e-02 -3.1226516e-02  3.8858451e-02
   5.7875674e-05  3.9041400e-02  1.4599374e-02 -4.2211253e-02
  -2.9964410e-02 -3.8515501e-02]]


In [17]:
optimizer = tf.train.AdamOptimizer()
loss_history = []

In [18]:
for (batch, (images, labels)) in enumerate(dataset.take(400)):
    if batch % 10 == 0:
        print('.', end='')
    with tf.GradientTape() as tape:
        logits = mnist_model(images, training=True)
        loss_value = tf.losses.sparse_softmax_cross_entropy(labels, logits)
        
    loss_history.append(loss_value.numpy())
    grads = tape.gradient(loss_value, mnist_model.trainable_variables)
    optimizer.apply_gradients(zip(grads, mnist_model.trainable_variables),
                              global_step=tf.train.get_or_create_global_step())

........................................

In [19]:
import matplotlib.pyplot as plt

plt.plot(loss_history)
plt.xlabel('Batch #')
plt.ylabel('Loss [entropy]')

Text(0, 0.5, 'Loss [entropy]')

In [20]:
class Model(tf.keras.Model):
    def __init__(self):
        super(Model, self).__init__()
        self.W = tf.Variable(5., name='weight')
        self.B = tf.Variable(10., name='bias')
    def call(self, inputs):
        return inputs * self.W + self.B

# A toy example of points around 3 * x + 2
NUM_EXAMPLES = 2000
training_inputs = tf.random_normal([NUM_EXAMPLES])
noise = tf.random_normal([NUM_EXAMPLES])
training_outputs = training_inputs * 3 + 2 + noise

# The loss function to be optimized
def loss(model, inputs, targets):
    error = model(inputs) - targets
    return tf.reduce_mean(tf.square(error))

def grad(model, inputs, targets):
    with tf.GradientTape() as tape:
        loss_value = loss(model, inputs, targets)
    return tape.gradient(loss_value, [model.W, model.B])

# Define:
# 1. A model
# 2. Derivatives of a loss function with respect to model parameters
# 3. A strategy for updating the variables based ont he derivatives
model = Model()
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

print("Initial loss: {:.3f}".format(loss(model, training_inputs, training_outputs)))

# Training loop
for i in range(300):
    grads = grad(model, training_inputs, training_outputs)
    optimizer.apply_gradients(zip(grads, [model.W, model.B]),
                              global_step=tf.train.get_or_create_global_step())
    if i % 20 == 0:
        print("Loss at step {:03d}: {:.3f}".format(i, loss(model, training_inputs, training_outputs)))
print("Final loss: {:.3f}".format(loss(model, training_inputs, training_outputs)))
print("W = {}, B = {}".format(model.W.numpy(), model.B.numpy()))

Initial loss: 68.002
Loss at step 000: 65.385
Loss at step 020: 30.024
Loss at step 040: 14.081
Loss at step 060: 6.889
Loss at step 080: 3.642
Loss at step 100: 2.176
Loss at step 120: 1.513
Loss at step 140: 1.213
Loss at step 160: 1.078
Loss at step 180: 1.016
Loss at step 200: 0.989
Loss at step 220: 0.976
Loss at step 240: 0.970
Loss at step 260: 0.968
Loss at step 280: 0.967
Final loss: 0.966
W = 2.968993663787842, B = 2.025365114212036


In [21]:
if tf.test.is_gpu_available():
    with tf.device("gpu:0"):
        v = tf.Variable(tf.random_normal([1000, 1000]))
        v = None # v no longer takes up GPU memory

In [22]:
x = tf.Variable(10.)
checkpoint = tf.train.Checkpoint(x=x)

In [23]:
x.assign(2.)
checkpoint_path = './ckpt/'
checkpoint.save('./ckpt/')

'./ckpt/-1'

In [24]:
x.assign(11.)
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_path))

print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0>


In [25]:
import os
import tempfile

model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, [3,3], activation='relu'),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(10)
])
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
checkpoint_dir = tempfile.mkdtemp()
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
root = tf.train.Checkpoint(optimizer=optimizer,
                          model=model,
                          optimizer_step=tf.train.get_or_create_global_step())
root.save(checkpoint_prefix)
root.restore(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.checkpointable.util.CheckpointLoadStatus at 0x1d94a05ca90>

In [27]:
m = tfe.metrics.Mean("loss")
m(0)
m(5)
m.result()
m([8,9])
m.result()

<tf.Tensor: id=68186, shape=(), dtype=float64, numpy=5.5>

In [28]:
global_step = tf.train.get_or_create_global_step()

logdir = "./tb/"
writer = tf.contrib.summary.create_file_writer(logdir)
writer.set_as_default()

for _ in range(10):
    global_step.assign_add(1)
    # must include record_summaries method
    with tf.contrib.summary.record_summaries_every_n_global_steps(100):
        tf.contrib.summary.scalar('global_step', global_step)

In [30]:
def line_search_step(fn, init_x, rate=1.0):
    with tf.GradientTape() as tape:
        # variables are automatically recorded, but manually watch a tensor
        tape.watch(init_x)
        value = fn(init_x)
    grad = tape.gradient(value, init_x)
    grad_nom = tf.reduce_sum(grad * grad)
    init_value = value
    while value > init_value - rate * grad_norm:
        x = init_x - rate * grad
        value = fn(x)
        rate /= 2.0
    return x, value