In [61]:
import tensorflow as tf
import numpy as np
import timeit
print(tf.__version__)

2.3.0


### Tensors
Tensors are multi-dimensional arrays with a uniform type
All tensors are immutable like Python numbers and strings: you can never update the contents of a tensor, only create a new one

In [55]:
# Tensors
a = tf.constant(1)
b = tf.constant([2, 3, 4])
c = tf.constant([[1, 2],
                  [3, 4],
                  [5, 6]], dtype=tf.float16)

In [18]:
# basic operations
print(a + b)
print(b[:2])

tf.Tensor([3 4 5], shape=(3,), dtype=int32)
tf.Tensor([2 3], shape=(2,), dtype=int32)


In [25]:
# ragged  tensors
ragged_list = [
    [0, 1, 2, 3],
    [4, 5],
    [6, 7, 8],
    [9]
]
a = tf.ragged.constant(ragged_list)
a

<tf.RaggedTensor [[0, 1, 2, 3], [4, 5], [6, 7, 8], [9]]>

In [33]:
# Sparse tensor
a = tf.sparse.SparseTensor(indices=[[0, 0], [1, 2]],
                                       values= [1, 2],
                                       dense_shape=[3, 4])
# print(a)
print(tf.sparse.to_dense(a))

tf.Tensor(
[[1 0 0 0]
 [0 0 2 0]
 [0 0 0 0]], shape=(3, 4), dtype=int32)


### Variables
A tf.Variable represents a tensor whose value can be changed by running ops on it  
Calling assign does not (usually) allocate a new tensor; instead, the existing tensor's memory is reused

In [53]:
# Variables
a = tf.constant(1)
v = tf.Variable(a)
v

<tf.Variable 'Variable:0' shape=() dtype=int32, numpy=1>

In [46]:
v.assign(3)
v

<tf.Variable 'Variable:0' shape=() dtype=int32, numpy=3>

### Automatic Differentiation
By default the tape only watches trainable variables.  
You can use tape.watch() specifically specify the variables to watch.
The gradients will not be computed if - 
1. Replaced a variable with a tensor (Not a variable)
2. Did calculations outside of TensorFlow (like using numpy)
3. Took gradients through an integer or string (dtype is int or string)


In [54]:
# Automatic Differentiation
x = tf.Variable(3.0)

with tf.GradientTape() as tape:
#     tape.watch(x) # If x is not a trainable variable use this
    y = tf.square(x) + 4 * x + 10 

dy_dx = tape.gradient(y, x)
dy_dx

<tf.Tensor: shape=(), dtype=float32, numpy=10.0>

### Graphs and functions
Graphs are data structures that contain a set of tf.Operation objects, which represent units of computation; and tf.Tensor objects, which represent the units of data that flow between operations

In [72]:
# Create an oveerride model to classify pictures
class SequentialModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super(SequentialModel, self).__init__(**kwargs)
        self.flatten = tf.keras.layers.Flatten(input_shape=(28, 28))
        self.dense_1 = tf.keras.layers.Dense(128, activation="relu")
        self.dropout = tf.keras.layers.Dropout(0.2)
        self.dense_2 = tf.keras.layers.Dense(10)
    def call(self, x):
        x = self.flatten(x)
        x = self.dense_1(x)
        x = self.dropout(x)
        x = self.dense_2(x)
        return x

input_data = tf.random.uniform([60, 28, 28])

eager_model = SequentialModel()
graph_model = tf.function(eager_model)

print("Eager time:", timeit.timeit(lambda: eager_model(input_data), number=10000))
print("Graph time:", timeit.timeit(lambda: graph_model(input_data), number=10000))


Eager time: 10.377188899999965
Graph time: 6.047591000000466


### Basic Linear Regression Model

In [91]:
# The actual line
TRUE_W = 3.0
TRUE_B = 2.0

NUM_EXAMPLES = 1000

# A vector of random x values
x = tf.random.normal(shape=[NUM_EXAMPLES])

# Generate some noise
noise = tf.random.normal(shape=[NUM_EXAMPLES])

# Calculate y
y = x * TRUE_W + TRUE_B + noise

In [100]:
# Creating a linear regression model and loss function
# This computes a single loss value for an entire batch
def loss(target_y, predicted_y):
    return tf.reduce_mean(tf.square(target_y - predicted_y))

class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        # Initialize the weights to `5.0` and the bias to `0.0`
        # In practice, these should be randomly initialized
        self.w = tf.Variable(5.0)
        self.b = tf.Variable(0.0)
    def __call__(self, x, **kwargs):
        return self.w * x + self.b

model = MyModel()


In [101]:
EPOCHS = 10
learning_rate=0.1

for epoch in range(EPOCHS):
    # Update the model with the single giant batch
    with tf.GradientTape() as t:
        # Trainable variables are automatically tracked by GradientTape
        current_loss = loss(y, model(x))
    # Use GradientTape to calculate the gradients with respect to W and b
    dw, db = t.gradient(current_loss, [model.w, model.b])
    
    # Subtract the gradient scaled by the learning rate
    model.w.assign_sub(learning_rate * dw)
    model.b.assign_sub(learning_rate * db)

    print("Epoch %2d: W=%1.2f b=%1.2f, loss=%2.5f" %
          (epoch, model.w.numpy(), model.b.numpy(), current_loss))


Epoch  0: W=4.58 b=0.40, loss=9.21292
Epoch  1: W=4.24 b=0.72, loss=6.16160
Epoch  2: W=3.98 b=0.98, loss=4.24380
Epoch  3: W=3.77 b=1.18, loss=3.03835
Epoch  4: W=3.61 b=1.34, loss=2.28061
Epoch  5: W=3.48 b=1.47, loss=1.80427
Epoch  6: W=3.38 b=1.57, loss=1.50479
Epoch  7: W=3.30 b=1.66, loss=1.31651
Epoch  8: W=3.23 b=1.72, loss=1.19811
Epoch  9: W=3.18 b=1.77, loss=1.12367


In [103]:
keras_model = MyModel()

keras_model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.1),
    loss=tf.keras.losses.mean_squared_error,
)

keras_model.fit(x, y, epochs=10, batch_size=1000)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1aac0030288>