# Tensors

In [1]:
import tensorflow as tf
import numpy as np

## Basic

#### A "scalar", rank-0 tensor, no axis:

In [2]:
rank_0_tensor = tf.constant(4)
print(rank_0_tensor)

tf.Tensor(4, shape=(), dtype=int32)


#### A "vector", rank-1 tensor, 1-axis:

In [3]:
rank_1_tensor = tf.constant([2.0, 3.0, 4.0])
print(rank_1_tensor)

tf.Tensor([2. 3. 4.], shape=(3,), dtype=float32)


#### A "matrix", rank-2 tensor, 2-axis

In [4]:
rank_2_tensor = tf.constant([[1,2],
                             [3,4],
                             [5,6]], dtype = tf.float16)
print(rank_2_tensor)

tf.Tensor(
[[1. 2.]
 [3. 4.]
 [5. 6.]], shape=(3, 2), dtype=float16)


#### A rank-3 tensor, 3-axis:

In [5]:
rank_3_tensor = tf.constant([
    [[0,1,2,3,4],
     [5,6,7,8,9]],
    [[10, 11, 12, 13, 14],
     [15, 16, 17, 18, 19]],
    [[20, 21, 22, 23, 24],
     [25, 26, 27, 28, 29]]])
print(rank_3_tensor)

tf.Tensor(
[[[ 0  1  2  3  4]
  [ 5  6  7  8  9]]

 [[10 11 12 13 14]
  [15 16 17 18 19]]

 [[20 21 22 23 24]
  [25 26 27 28 29]]], shape=(3, 2, 5), dtype=int32)


#### Convert a tensor to np.array

In [6]:
np.array(rank_2_tensor)

array([[1., 2.],
       [3., 4.],
       [5., 6.]], dtype=float16)

In [7]:
rank_2_tensor.numpy()

array([[1., 2.],
       [3., 4.],
       [5., 6.]], dtype=float16)

#### Arithmatic

In [8]:
a = tf.constant([[1,2],[3,4]])
b = tf.constant([[1,1],[1,1]])

In [9]:
print(a)
print(b)

tf.Tensor(
[[1 2]
 [3 4]], shape=(2, 2), dtype=int32)
tf.Tensor(
[[1 1]
 [1 1]], shape=(2, 2), dtype=int32)


In [10]:
tf.add(a,b)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[2, 3],
       [4, 5]], dtype=int32)>

In [11]:
tf.multiply(a,b) # element-wise

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1, 2],
       [3, 4]], dtype=int32)>

In [12]:
tf.matmul(a,b)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[3, 3],
       [7, 7]], dtype=int32)>

In [13]:
print(a+b)
print(a*b)
print(a@b)

tf.Tensor(
[[2 3]
 [4 5]], shape=(2, 2), dtype=int32)
tf.Tensor(
[[1 2]
 [3 4]], shape=(2, 2), dtype=int32)
tf.Tensor(
[[3 3]
 [7 7]], shape=(2, 2), dtype=int32)


#### Other operations:

In [14]:
c = tf.constant([[4.0, 5.0], [10.0, 1.0]])

In [15]:
tf.reduce_max(c) # Find largest value

<tf.Tensor: shape=(), dtype=float32, numpy=10.0>

In [16]:
tf.argmax(c) # Index of largest value

<tf.Tensor: shape=(2,), dtype=int64, numpy=array([1, 0])>

In [17]:
s = tf.nn.softmax(c) # compute softmax
s

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[2.6894143e-01, 7.3105860e-01],
       [9.9987662e-01, 1.2339458e-04]], dtype=float32)>

In [18]:
s[0,0] + s[0,1] # Note: each row sums to 1

<tf.Tensor: shape=(), dtype=float32, numpy=1.0>

## Shapes

In [19]:
rank_4_tensor = tf.zeros([3,2,4,5])

In [20]:
rank_4_tensor.dtype

tf.float32

In [21]:
rank_4_tensor.ndim

4

In [22]:
rank_4_tensor.shape

TensorShape([3, 2, 4, 5])

In [23]:
rank_4_tensor.shape[0]

3

In [24]:
rank_4_tensor.shape[-1]

5

In [25]:
tf.size(rank_4_tensor).numpy()

120

#### Keeping track of each index's meaning
Often axes are ordered from global to local: The batch axis first, followed by spatial dimensions, and features for each location last. This way feature vectors are contiguous regions of memory.
For our rank-4 tensor:
(Batch, Width, Height, Features)

## Indexing

In [26]:
rank_1_tensor = tf.constant([0, 1, 1, 2, 3, 5, 8, 13, 21, 34])
rank_1_tensor.numpy()

array([ 0,  1,  1,  2,  3,  5,  8, 13, 21, 34], dtype=int32)

In [27]:
print("First:", rank_1_tensor[0].numpy())
print("Second:", rank_1_tensor[2].numpy())
print("Last:", rank_1_tensor[-1].numpy())

First: 0
Second: 1
Last: 34


#### Slicing:

In [28]:
print("Everything: ", rank_1_tensor[:].numpy())
print("Before 4: ", rank_1_tensor[:4].numpy())
print("From 4 to the end:", rank_1_tensor[4:].numpy())
print("From 2, before 7:", rank_1_tensor[2:7].numpy())
print("Every other item:", rank_1_tensor[::2].numpy())
print("Reversed:", rank_1_tensor[::-1].numpy())

Everything:  [ 0  1  1  2  3  5  8 13 21 34]
Before 4:  [0 1 1 2]
From 4 to the end: [ 3  5  8 13 21 34]
From 2, before 7: [1 2 3 5 8]
Every other item: [ 0  1  3  8 21]
Reversed: [34 21 13  8  5  3  2  1  1  0]


#### Multi-axis Indexing:

In [29]:
print(rank_2_tensor.numpy())

[[1. 2.]
 [3. 4.]
 [5. 6.]]


In [30]:
rank_2_tensor[1,1].numpy() # a scalar

4.0

In [31]:
print("Second row: ", rank_2_tensor[1,:].numpy())
print("Second column: ", rank_2_tensor[:,1].numpy())
print("Last row: ", rank_2_tensor[-1,:].numpy())
print("First item in last column: ", rank_2_tensor[0,-1].numpy())
print("Skip the first row: \n", rank_2_tensor[1:,:].numpy())

Second row:  [3. 4.]
Second column:  [2. 4. 6.]
Last row:  [5. 6.]
First item in last column:  2.0
Skip the first row: 
 [[3. 4.]
 [5. 6.]]


#### Example with 3-axis tensor:

In [32]:
print(rank_3_tensor.numpy())
print(np.shape(rank_3_tensor.numpy()))

[[[ 0  1  2  3  4]
  [ 5  6  7  8  9]]

 [[10 11 12 13 14]
  [15 16 17 18 19]]

 [[20 21 22 23 24]
  [25 26 27 28 29]]]
(3, 2, 5)


In [33]:
rank_3_tensor[:,:,4]

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[ 4,  9],
       [14, 19],
       [24, 29]], dtype=int32)>

## Manipulate Shapes

In [34]:
var_x = tf.Variable(tf.constant([[1],[2],[3]]))
var_x.shape # Shape returns a `TensorShape` object that shows the size on each dimension

TensorShape([3, 1])

In [35]:
var_x.shape.as_list() # Convert to Python list

[3, 1]

In [36]:
reshaped = tf.reshape(var_x, [1,3])

In [37]:
reshaped

<tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[1, 2, 3]], dtype=int32)>

In [38]:
print(var_x)

<tf.Variable 'Variable:0' shape=(3, 1) dtype=int32, numpy=
array([[1],
       [2],
       [3]], dtype=int32)>


In [39]:
print(var_x.shape)
print(reshaped.shape)

(3, 1)
(1, 3)


In [40]:
print(rank_3_tensor)

tf.Tensor(
[[[ 0  1  2  3  4]
  [ 5  6  7  8  9]]

 [[10 11 12 13 14]
  [15 16 17 18 19]]

 [[20 21 22 23 24]
  [25 26 27 28 29]]], shape=(3, 2, 5), dtype=int32)


TensorFlow uses C-style "row-major" memory ordering, where incrementing the right-most index corresponds to a single step in memory. If you flatten a tensor you can see what order it is laid out in memory.

In [41]:
# A `-1` passed in the `shape` argument says "Whatever fits".
tf.reshape(rank_3_tensor, [-1])

<tf.Tensor: shape=(30,), dtype=int32, numpy=
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], dtype=int32)>

Typically the only reasonable uses of tf.reshape are to combine or split adjacent axes (or add/remove 1s).

For this 3x2x5 tensor, reshaping to (3x2)x5 or 3x(2x5) are both reasonable things to do, as the slices do not mix:

In [42]:
tf.reshape(rank_3_tensor, [3*2, 5])

<tf.Tensor: shape=(6, 5), dtype=int32, numpy=
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29]], dtype=int32)>

In [43]:
tf.reshape(rank_3_tensor, [3, -1])

<tf.Tensor: shape=(3, 10), dtype=int32, numpy=
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]], dtype=int32)>

#### Swapping axes in tf.reshape does not work, you need tf.transpose for that.
Reshaping will "work" for any new shape with the same total number of elements, but it will not do anything useful if you do not respect the order of the axes.
#### Bad Examples:

In [44]:
tf.reshape(rank_3_tensor, [2,3,5]) # You can't reorder axes with reshape.

<tf.Tensor: shape=(2, 3, 5), dtype=int32, numpy=
array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]],

       [[15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29]]], dtype=int32)>

In [45]:
tf.reshape(rank_3_tensor, [5,6]) # This is a mess

<tf.Tensor: shape=(5, 6), dtype=int32, numpy=
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]], dtype=int32)>

In [46]:
try:
    tf.reshape(rank_3_tensor, [7, -1])
except Exception as e:
    print(f"{type(e).__name__}: {e}")

InvalidArgumentError: Input to reshape is a tensor with 30 values, but the requested shape requires a multiple of 7 [Op:Reshape]


## Data Types

#### Casting

In [47]:
the_f64_tensor = tf.constant([2.3, 3.3, 4.4], dtype = tf.float64)
the_f16_tensor = tf.cast(the_f64_tensor, dtype = tf.float16)
the_u8_tensor = tf.cast(the_f16_tensor, dtype = tf.uint8)
print(the_u8_tensor)

tf.Tensor([2 3 4], shape=(3,), dtype=uint8)


## Broadcasting

In [48]:
x = tf.constant([1,2,3])
y = tf.constant(2)
z = tf.constant([2, 2, 2])

In [49]:
# Below are the same:
print(tf.multiply(x, 2))
print( x * y)
print(x * z)

tf.Tensor([2 4 6], shape=(3,), dtype=int32)
tf.Tensor([2 4 6], shape=(3,), dtype=int32)
tf.Tensor([2 4 6], shape=(3,), dtype=int32)


In [50]:
x = tf.reshape(x, [3,1])
y = tf.range(1, 5)

print(x, "\n")
print(y, "\n")


tf.Tensor(
[[1]
 [2]
 [3]], shape=(3, 1), dtype=int32) 

tf.Tensor([1 2 3 4], shape=(4,), dtype=int32) 



In [51]:
tf.multiply(x, y)

<tf.Tensor: shape=(3, 4), dtype=int32, numpy=
array([[ 1,  2,  3,  4],
       [ 2,  4,  6,  8],
       [ 3,  6,  9, 12]], dtype=int32)>

In [52]:
# The above is equivalent to the following, with x and y broadcasted:
x_stretch = tf.constant([[1,1,1,1],
                          [2,2,2,2],
                          [3,3,3,3]])
                          
y_stretch = tf.constant([[1,2,3,4],
                        [1,2,3,4],
                        [1,2,3,4]])

print(x_stretch * y_stretch)

tf.Tensor(
[[ 1  2  3  4]
 [ 2  4  6  8]
 [ 3  6  9 12]], shape=(3, 4), dtype=int32)


####  See what broadcasting looks like

In [53]:
tf.broadcast_to(tf.constant([1,2,3]),[3,3])

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]], dtype=int32)>

## Ragged Tensor

A tensor with variable numbers of elements along some axis is called "ragged". Use tf.ragged.RaggedTensor for ragged data.

In [54]:
ragged_list =[
    [0,1,2,3],
    [4,5],
    [6,7,8],
    [9]
]

In [55]:
try:
    tensor = tf.constant(ragged_list)
except Exception as e:
    print(f"{type(e).__name__}: {e}")
    

ValueError: Can't convert non-rectangular Python sequence to Tensor.


In [56]:
ragged_tensor = tf.ragged.constant(ragged_list)

In [57]:
print(ragged_tensor)
print(ragged_tensor.shape)

<tf.RaggedTensor [[0, 1, 2, 3], [4, 5], [6, 7, 8], [9]]>
(4, None)


## String Tensors

tf.string is a dtype, which is to say we can represent data as strings (variable-length byte arrays) in tensors.

The strings are atomic and cannot be indexed the way Python strings are. The length of the string is not one of the dimensions of the tensor. See [tf.strings](https://www.tensorflow.org/api_docs/python/tf/strings) for functions to manipulate them.

In [58]:
scalar_string_tensor = tf.constant("Gray wolf")
print(scalar_string_tensor)

tf.Tensor(b'Gray wolf', shape=(), dtype=string)


In [59]:
# If we have three string tensors of different lengths, this is OK.
tensor_of_strings = tf.constant(["Gray wolf",
                                "Quick brown fox",
                                "Lazy dog"])

# Note that the shape is (3,), indicating that it is 3 x unknown.
print(tensor_of_strings)

tf.Tensor([b'Gray wolf' b'Quick brown fox' b'Lazy dog'], shape=(3,), dtype=string)


In the above printout the b prefix indicates that tf.string dtype is not a unicode string, but a byte-string. See the [Unicode Tutorial](https://www.tensorflow.org/tutorials/load_data/unicode) for more about working with unicode text in TensorFlow.
#### If you pass unicode characters they are utf-8 encoded.

In [60]:
tf.constant("🥳👍")

<tf.Tensor: shape=(), dtype=string, numpy=b'\xf0\x9f\xa5\xb3\xf0\x9f\x91\x8d'>

#### Basic string functions

In [61]:
# We can use split to split a string into a set of tensors
print( tf.strings.split(scalar_string_tensor, sep = " ") )

tf.Tensor([b'Gray' b'wolf'], shape=(2,), dtype=string)


#### Split tensor of strings becomes RaggedTensor

In [62]:
print( tf.strings.split(tensor_of_strings) )

<tf.RaggedTensor [[b'Gray', b'wolf'], [b'Quick', b'brown', b'fox'], [b'Lazy', b'dog']]>


#### tf.strings.to_number: 

In [63]:
text = tf.constant("1 10 100")
print( tf.strings.to_number(tf.strings.split(text, " ")) )

tf.Tensor([  1.  10. 100.], shape=(3,), dtype=float32)


Although you can't use tf.cast to turn a string tensor into numbers, you can convert it into bytes, and then into numbers.

In [64]:
byte_strings = tf.strings.bytes_split(tf.constant("Duck"))
byte_ints = tf.io.decode_raw(tf.constant("Duck"), tf.uint8)

In [65]:
print(byte_strings)
print(byte_ints)

tf.Tensor([b'D' b'u' b'c' b'k'], shape=(4,), dtype=string)
tf.Tensor([ 68 117  99 107], shape=(4,), dtype=uint8)


In [66]:
unicode_bytes = tf.constant("アヒル 🦆")
unicode_char_bytes = tf.strings.unicode_split(unicode_bytes, "UTF-8")
unicode_values = tf.strings.unicode_decode(unicode_bytes, "UTF-8")

In [67]:
print(unicode_bytes)
print(unicode_char_bytes)
print(unicode_values)

tf.Tensor(b'\xe3\x82\xa2\xe3\x83\x92\xe3\x83\xab \xf0\x9f\xa6\x86', shape=(), dtype=string)
tf.Tensor([b'\xe3\x82\xa2' b'\xe3\x83\x92' b'\xe3\x83\xab' b' ' b'\xf0\x9f\xa6\x86'], shape=(5,), dtype=string)
tf.Tensor([ 12450  12498  12523     32 129414], shape=(5,), dtype=int32)


The [tf.string](https://www.tensorflow.org/api_docs/python/tf#string) dtype is used for all raw bytes data in TensorFlow. The [tf.io](https://www.tensorflow.org/api_docs/python/tf/io) module contains functions for converting data to and from bytes, including decoding images and parsing csv.

## Sparse Tensors

Sometimes, your data is sparse, like a very wide embedding space. TensorFlow supports [tf.sparse.SparseTensor](https://www.tensorflow.org/api_docs/python/tf/sparse/SparseTensor) and related operations to store sparse data efficiently.

In [68]:
sparse_tensor = tf.sparse.SparseTensor(indices = [[0,0], [1,2]], 
                                       values = [1,2], 
                                       dense_shape=[3,4])

In [69]:
print(sparse_tensor)

SparseTensor(indices=tf.Tensor(
[[0 0]
 [1 2]], shape=(2, 2), dtype=int64), values=tf.Tensor([1 2], shape=(2,), dtype=int32), dense_shape=tf.Tensor([3 4], shape=(2,), dtype=int64))


In [70]:
# We can convert sparse tensors to dense
print(tf.sparse.to_dense(sparse_tensor))

tf.Tensor(
[[1 0 0 0]
 [0 0 2 0]
 [0 0 0 0]], shape=(3, 4), dtype=int32)


# The Sequential Model

In [71]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [72]:
model = keras.Sequential([
    layers.Dense(2, activation = "relu", name = "layer1"),
    layers.Dense(3, activation = "relu", name = "layer2"),
    layers.Dense(4, name = "layer3")
])

# Call model on test input
x = tf.ones((3, 3))
y = model(x)

In [73]:
# Equivalent to above
layer1 = layers.Dense(2, activation = "relu", name = "layer1")
layer2 = layers.Dense(3, activation = "relu", name = "layer2")
layer3 = layers.Dense(4, name = "layer3")

# Call model on test input
x = tf.ones((3,3))
y = layer3(layer2(layer1(x)))

## Create Sequential Model

In [74]:
model = keras.Sequential(
    [
        layers.Dense(2, activation = "relu"),
        layers.Dense(3, activation = "relu"),
        layers.Dense(4)
    ]
)

In [75]:
# To access the layers
model.layers

[<tensorflow.python.keras.layers.core.Dense at 0x139a77c10>,
 <tensorflow.python.keras.layers.core.Dense at 0x139a79990>,
 <tensorflow.python.keras.layers.core.Dense at 0x139a79b50>]

In [76]:
# Create sequential model incrementally via add()
model = keras.Sequential()
model.add(layers.Dense(2, activation = "relu"))
model.add(layers.Dense(3, activation = "relu"))
model.add(layers.Dense(4))

In [77]:
# remove a layer with pop
model.pop()
print(model.layers)
print(len(model.layers))

[<tensorflow.python.keras.layers.core.Dense object at 0x1399f7e10>, <tensorflow.python.keras.layers.core.Dense object at 0x131563610>]
2


In [78]:
# Sequential constructor accepts a name argument.
# Useful to annotate TensorBoard graphs with semantically meaningful names.
model = keras.Sequential(name = "my_sequential")
model.add(layers.Dense(2, activation = "relu", name = "layer1"))
model.add(layers.Dense(3, activation = "relu", name = "layer2"))
model.add(layers.Dense(4, name = "layer3"))

## Specify input shape in advance

In [79]:
# initially, no weights
layer = layers.Dense(3)
layer.weights

[]

In [80]:
# Create weights first time it is called on an input. Weights depend on input.
x = tf.ones((1,4))
y = layer(x)
layer.weights # Now it has weights, of shape (4, 3) and (3,)

[<tf.Variable 'dense_6/kernel:0' shape=(4, 3) dtype=float32, numpy=
 array([[-0.45335189,  0.8692441 , -0.39932537],
        [-0.02130711,  0.18410814, -0.08009285],
        [-0.37473416, -0.06044829,  0.6966802 ],
        [ 0.06376815, -0.476156  ,  0.54320514]], dtype=float32)>,
 <tf.Variable 'dense_6/bias:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>]

In [81]:
# Same goes to Sequential Models. Without an input shape, it isn't built and has no weights
# Weights created when model first sees some data
model = keras.Sequential(
    [
        layers.Dense(2, activation = "relu"),
        layers.Dense(3, activation = "relu"),
        layers.Dense(4)
    ]
)


In [82]:
# no weights at this stage
try:
    model.weights
except Exception as e:
    print(f"{type(e).__name__}: {e}")
    

ValueError: Weights for model sequential_3 have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape`.


In [83]:
# Call model on test input
x = tf.ones((1,4))
y = model(x)
# Number of weights after calling the model
len(model.weights)

6

In [84]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              multiple                  10        
_________________________________________________________________
dense_8 (Dense)              multiple                  9         
_________________________________________________________________
dense_9 (Dense)              multiple                  16        
Total params: 35
Trainable params: 35
Non-trainable params: 0
_________________________________________________________________


In [85]:
model = keras.Sequential()
model.add(keras.Input(shape = (4,))) # start model by passing an Input object so that it knows its input shape from the start
model.add(layers.Dense(2, activation = 'relu'))
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 2)                 10        
Total params: 10
Trainable params: 10
Non-trainable params: 0
_________________________________________________________________


In [86]:
# Note that the Input object is not displayed as part of model.layers, since it isn't a layer
model.layers

[<tensorflow.python.keras.layers.core.Dense at 0x1399f7d10>]

In [87]:
# A simple alternative is to just pass an input_shape argument to your first layer
model = keras.Sequential()
model.add(layers.Dense(2, activation = 'relu', input_shape = (4, )))
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 2)                 10        
Total params: 10
Trainable params: 10
Non-trainable params: 0
_________________________________________________________________


Models built with a predefined input shape like this always have weights (even before seeing any data) and always have a defined output shape.

In general, it's a recommended best practice to always specify the input shape of a Sequential model in advance if you know what it is.

## A common debugging workflow: add() + summary()
When building a new Sequential architecture, it's useful to incrementally stack layers with add() and frequently print model summaries. For instance, this enables you to monitor how a stack of Conv2D and MaxPooling2D layers is downsampling image feature maps:

In [88]:
model = keras.Sequential()
model.add(keras.Input(shape = (250, 250, 3))) # 250 x 250 RBG images
model.add(layers.Conv2D(32, 5, strides = 2, activation = 'relu'))
model.add(layers.Conv2D(32, 3, activation = 'relu'))
model.add(layers.MaxPooling2D(3))

In [89]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 123, 123, 32)      2432      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 121, 121, 32)      9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 40, 40, 32)        0         
Total params: 11,680
Trainable params: 11,680
Non-trainable params: 0
_________________________________________________________________


In [90]:
# Note Max Pooing Layer is of size 3x3, and strides of 3:
print(model.layers[2].pool_size)
print(model.layers[2].strides)

(3, 3)
(3, 3)


In [91]:
# Continue adding layers
model.add(layers.Conv2D(32, 3, activation = 'relu'))
model.add(layers.Conv2D(32, 3, activation = 'relu'))
model.add(layers.MaxPooling2D(3))
model.add(layers.Conv2D(32, 3, activation = 'relu'))
model.add(layers.Conv2D(32, 3, activation = 'relu'))
model.add(layers.MaxPooling2D(2))

In [92]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 123, 123, 32)      2432      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 121, 121, 32)      9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 40, 40, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 38, 38, 32)        9248      
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 36, 36, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 10, 10, 32)       

In [93]:
# Now that we have 4x4 feature maps, time to apply global max pooling.
model.add(layers.GlobalMaxPooling2D()) # What's global max pooling?

In [94]:
# Finally, we add a classification layer.
model.add(layers.Dense(10))

In [95]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 123, 123, 32)      2432      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 121, 121, 32)      9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 40, 40, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 38, 38, 32)        9248      
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 36, 36, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 10, 10, 32)       

#### After the model is build:
- [Train your model, evaluate it, and run inference.](https://www.tensorflow.org/guide/keras/train_and_evaluate/).
- [Save your model to disk and restore it.](https://www.tensorflow.org/guide/keras/save_and_serialize/)



## Feature extraction with a Sequential model

In [96]:
initial_model = keras.Sequential(
    [
        keras.Input(shape = (250, 250, 3)),
        layers.Conv2D(32, 5, strides = 2, activation = 'relu'),
        layers.Conv2D(32, 3, activation = 'relu'),
        layers.Conv2D(32, 3, activation = 'relu')
    ]
)

In [97]:
feature_extractor = keras.Model(
    inputs = initial_model.inputs,
    outputs = [layer.output for layer in initial_model.layers]
)

In [98]:
# Call feature_extractor to test input
x = tf.ones((1, 250, 250, 3))
features = feature_extractor(x)

#### example that only extract features from one layer

In [99]:
initial_model = keras.Sequential([
    keras.Input(shape = (250, 250, 3)),
    layers.Conv2D(32, 5, strides = 2, activation = 'relu'),
    layers.Conv2D(32, 3, activation = 'relu', name = 'my_intermediate_layer'),
    layers.Conv2D(32, 3, activation = 'relu')
])

In [100]:
feature_extractor = keras.Model(
    inputs = initial_model.inputs,
    outputs = initial_model.get_layer(name = 'my_intermediate_layer').output
)

In [101]:
x = tf.ones((1, 250, 250, 3))
features = feature_extractor(x)

## Transfer learning with a Sequential model

First, let's say that you have a Sequential model, and you want to freeze all layers except the last one. In this case, you would simply iterate over model.layers and set layer.trainable = False on each layer, except the last one. 

In [102]:
model = keras.Sequential(
    [
        keras.Input(shape = (784)),
        layers.Dense(32, activation = 'relu'),
        layers.Dense(32, activation = 'relu'),
        layers.Dense(32, activation = 'relu'),
        layers.Dense(10)
    ]
)

In [107]:
# Presumably you would want to first load pre-trained weights.
# model.load_weights(...)

In [106]:
# Freeze all layers except the last one.
for layer in model.layers[:-1]:
    layer.trainable = False


In [108]:
# Recompile and train (this will only update the weights of the last layer).
# model.compile(...)
# model.fit(...)

Use a Sequential model to stack a pre-trained model and some freshly initialized classification layers.

In [113]:
# Load a convolutional base with pre-trained weights
base_model = keras.applications.Xception(
    weights = 'imagenet',
    include_top = False, 
    pooling = 'avg'
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [114]:
# Freeze the base model
base_model.trainable = False

In [115]:
# Use a Sequential model to add a trainable classifier on top
model = keras.Sequential(
    [
        base_model,
        layers.Dense(1000)
    ]
)

In [116]:
# compile and train
# model.compile(...)
# model.fit(...)

# Word Embeddings