# TensorFlow 2 basic setup

In this notebook, I would like to implement a basic framework of model development, 
using *tf.Module* and *keras*.

In [37]:
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.keras import layers

## Dataset

In [49]:
# Load dataset
data_train, data_test = tfds.load('mnist', split=['train','test'],
                                  shuffle_files=True, as_supervised=True,
                                 batch_size=-1)

# Convert data and label into numpy array
data_train, label_train = data_train[0].numpy(), data_train[1].numpy()
data_test, label_test = data_test[0].numpy(), data_test[1].numpy()

# Preprocessing
data_train = data_train/255
data_test = data_test/255

## Build a model using Keras functional API

In [50]:
def bulid_model_keras_functional(name=None):

    input_ = layers.Input(shape=[28,28,1], name='input')
    flat = layers.Flatten(name='flat')(input_)
    dense1 = layers.Dense(128, activation='relu',name='dense1')(flat)
    dense2 = layers.Dense(64, activation='relu',name='dense2')(dense1)
    output = layers.Dense(10, activation='softmax',name='output')(dense2)

    return tf.keras.models.Model(inputs=[input_], outputs=[output],name=name)


In [85]:
# model building
model = bulid_model_keras_functional()
model.summary()

Model: "model_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 28, 28, 1)]       0         
                                                                 
 flat (Flatten)              (None, 784)               0         
                                                                 
 dense1 (Dense)              (None, 128)               100480    
                                                                 
 dense2 (Dense)              (None, 64)                8256      
                                                                 
 output (Dense)              (None, 10)                650       
                                                                 
Total params: 109386 (427.29 KB)
Trainable params: 109386 (427.29 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [158]:
model = bulid_model_keras_functional()

# training
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.legacy.RMSprop(),
    metrics=['sparse_categorical_crossentropy','sparse_categorical_accuracy'],
)

history = model.fit(data_train, label_train, validation_data=(data_test,label_test),
                    batch_size=60000, epochs=10, shuffle=False, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [159]:
model = bulid_model_keras_functional()

# training
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.legacy.RMSprop(),
    metrics=['sparse_categorical_crossentropy','sparse_categorical_accuracy'],
)

history = model.fit(data_train, label_train, validation_data=(data_test,label_test),
                    batch_size=60000, epochs=10, shuffle=True, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Train the model using Gradient tape

In [87]:
model2 = bulid_model_keras_functional()
epochs=10
loss = tf.keras.losses.SparseCategoricalCrossentropy()
accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
optimizer = tf.keras.optimizers.legacy.RMSprop()

for e in range(epochs):

    with tf.GradientTape() as tape:
        y_pred = model2(data_train)
        loss_value = loss(label_train, y_pred)
        # print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(),
        #                                   loss_value.numpy()))
        
    grads = tape.gradient(loss_value, model2.trainable_variables)

    optimizer.apply_gradients(zip(grads, model2.trainable_variables))


    print("Step: {}".format(optimizer.iterations.numpy()))
    print("Training Loss: {}, Accuracy: {}".format(loss(label_train, model2(data_train)).numpy(), accuracy(label_train, model2(data_train)).numpy()))
    print("Val Loss: {}, Accuracy: {}".format(loss(label_test, model2(data_test)).numpy(), accuracy(label_test, model2(data_test)).numpy()))
        
    # print("Step: {}, Loss: {}".format(optimizer.iterations.numpy(),
    #                                       loss(label_train, model2(data_train)).numpy()))

Step: 1
Training Loss: 2.0964486598968506, Accuracy: 0.36356666684150696
Val Loss: 2.0900580883026123, Accuracy: 0.364385724067688
Step: 2
Training Loss: 1.8530442714691162, Accuracy: 0.4502153992652893
Val Loss: 1.8434109687805176, Accuracy: 0.45765000581741333
Step: 3
Training Loss: 1.624595046043396, Accuracy: 0.5078999996185303
Val Loss: 1.6104457378387451, Accuracy: 0.513533353805542
Step: 4
Training Loss: 1.4611223936080933, Accuracy: 0.5393074154853821
Val Loss: 1.4485867023468018, Accuracy: 0.5425035953521729
Step: 5
Training Loss: 1.35591721534729, Accuracy: 0.5665705800056458
Val Loss: 1.339763879776001, Accuracy: 0.5698428750038147
Step: 6
Training Loss: 1.2168372869491577, Accuracy: 0.5875316858291626
Val Loss: 1.201808214187622, Accuracy: 0.5900999903678894
Step: 7
Training Loss: 1.087450385093689, Accuracy: 0.6144333481788635
Val Loss: 1.0692230463027954, Accuracy: 0.6180469393730164
Step: 8
Training Loss: 0.9919915199279785, Accuracy: 0.6355018019676208
Val Loss: 0.97229

## Build a model using tf.keras.layer

In [160]:
class ModelKerasLayer(tf.keras.layers.Layer):

    def __init__(self):
        super().__init__()

    def build(self, input):
        # self.input = layers.Input(shape=[28,28,1], name='input')
        # self.flat = layers.Flatten(name='flat')
        self.dense1 = tf.Variable(tf.random.normal([784, 128],stddev=0.1), name='dense1')
        self.bias1 = tf.Variable(tf.zeros([128]), name='b1')
        self.dense2 = tf.Variable(tf.random.normal([128,64],stddev=0.1), name='dense2')
        self.bias2 = tf.Variable(tf.zeros([64]), name='b2')
        self.dense3 = tf.Variable(tf.random.normal([64,10],stddev=0.1), name='dense3')
        self.bias3 = tf.Variable(tf.zeros([10]), name='b3')
        
        # self.dense1 = layers.Dense(128, activation='relu',name='dense1')
        # self.dense2 = layers.Dense(64, activation='relu',name='dense2')
        # self.output = layers.Dense(10, activation='softmax',name='output')
        
    def call(self, input):
        flat_input = layers.Flatten()(input)
        output_d1 = tf.keras.activations.relu(tf.matmul(flat_input,self.dense1) + self.bias1)
        output_d2 = tf.keras.activations.relu(tf.matmul(output_d1,self.dense2) + self.bias2)
        output_d3 = tf.matmul(output_d2,self.dense3) + self.bias3
        return tf.nn.softmax(output_d3)
        # return tf.keras.activations.softmax(output_d3)
        return output_d3


In [161]:
modelkl = ModelKerasLayer()
modelkl(data_train[:3,:,:,:])

<tf.Tensor: shape=(3, 10), dtype=float32, numpy=
array([[0.07279766, 0.16578269, 0.17470877, 0.13109253, 0.11420494,
        0.05171773, 0.04314727, 0.07552552, 0.06847826, 0.10254459],
       [0.08055943, 0.18254331, 0.14771058, 0.09612502, 0.11575445,
        0.05314843, 0.04808422, 0.07273257, 0.08116436, 0.12217763],
       [0.07576714, 0.16737936, 0.22198503, 0.09159083, 0.11512569,
        0.03125234, 0.03503455, 0.09289027, 0.06404123, 0.10493358]],
      dtype=float32)>

### Train the model using Gradient tape

In [163]:
modelkl = ModelKerasLayer()
epochs=20
loss = tf.keras.losses.SparseCategoricalCrossentropy()
accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate=0.001)

for e in range(epochs):

    with tf.GradientTape() as tape:
        y_pred = modelkl(data_train)
        loss_value = loss(label_train, y_pred)
        # print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(),
        #                                   loss_value.numpy()))
        
    grads = tape.gradient(loss_value, modelkl.trainable_variables)

    optimizer.apply_gradients(zip(grads, modelkl.trainable_variables))


    print("Step: {}".format(optimizer.iterations.numpy()))
    print("Training Loss: {}, Accuracy: {}".format(loss(label_train, modelkl(data_train)).numpy(), accuracy(label_train, modelkl(data_train)).numpy()))
    print("Val Loss: {}, Accuracy: {}".format(loss(label_test, modelkl(data_test)).numpy(), accuracy(label_test, modelkl(data_test)).numpy()))
        
    # print("Step: {}, Loss: {}".format(optimizer.iterations.numpy(),
    #                                       loss(label_train, model2(data_train)).numpy()))

Step: 1
Training Loss: 2.1773428916931152, Accuracy: 0.23493333160877228
Val Loss: 2.1734559535980225, Accuracy: 0.23471428453922272
Step: 2
Training Loss: 2.0106985569000244, Accuracy: 0.3255692422389984
Val Loss: 2.0018320083618164, Accuracy: 0.3330928683280945
Step: 3
Training Loss: 1.844391942024231, Accuracy: 0.397599995136261
Val Loss: 1.8344064950942993, Accuracy: 0.4047999978065491
Step: 4
Training Loss: 1.6777935028076172, Accuracy: 0.4576740860939026
Val Loss: 1.6633347272872925, Accuracy: 0.46434643864631653
Step: 5
Training Loss: 1.5218833684921265, Accuracy: 0.4985441267490387
Val Loss: 1.507598876953125, Accuracy: 0.5032914280891418
Step: 6
Training Loss: 1.3919932842254639, Accuracy: 0.5320170521736145
Val Loss: 1.3714207410812378, Accuracy: 0.5363237857818604
Step: 7
Training Loss: 1.2854193449020386, Accuracy: 0.5551791787147522
Val Loss: 1.269489049911499, Accuracy: 0.5578693747520447
Step: 8
Training Loss: 1.1827666759490967, Accuracy: 0.5773981809616089
Val Loss: 1.

## Build a model using tf.Module

In [164]:
class SimpleModule(tf.Module):
  def __init__(self, name=None):
    super().__init__(name=name)
    self.dense1 = tf.Variable(tf.random.normal([784, 128],stddev=0.1), name='dense1')
    self.bias1 = tf.Variable(tf.zeros([128]), name='b1')
    self.dense2 = tf.Variable(tf.random.normal([128,64],stddev=0.1), name='dense2')
    self.bias2 = tf.Variable(tf.zeros([64]), name='b2')
    self.dense3 = tf.Variable(tf.random.normal([64,10],stddev=0.1), name='dense3')
    self.bias3 = tf.Variable(tf.zeros([10]), name='b3')
    
  def __call__(self, input):
      
    flat_input = layers.Flatten()(input)
    output_d1 = tf.keras.activations.relu(tf.matmul(flat_input,self.dense1) + self.bias1)
    output_d2 = tf.keras.activations.relu(tf.matmul(output_d1,self.dense2) + self.bias2)
    output_d3 = tf.matmul(output_d2,self.dense3) + self.bias3
    return tf.nn.softmax(output_d3)


In [165]:
modelm = SimpleModule()
epochs=10
loss = tf.keras.losses.SparseCategoricalCrossentropy()
accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate=0.001)

for e in range(epochs):

    with tf.GradientTape() as tape:
        y_pred = modelm(data_train)
        loss_value = loss(label_train, y_pred)
        # print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(),
        #                                   loss_value.numpy()))
        
    grads = tape.gradient(loss_value, modelm.trainable_variables)

    optimizer.apply_gradients(zip(grads, modelm.trainable_variables))


    print("Step: {}".format(optimizer.iterations.numpy()))
    print("Training Loss: {}, Accuracy: {}".format(loss(label_train, modelm(data_train)).numpy(), accuracy(label_train, modelm(data_train)).numpy()))
    print("Val Loss: {}, Accuracy: {}".format(loss(label_test, modelm(data_test)).numpy(), accuracy(label_test, modelm(data_test)).numpy()))
        
    # print("Step: {}, Loss: {}".format(optimizer.iterations.numpy(),
    #                                       loss(label_train, model2(data_train)).numpy()))

Step: 1
Training Loss: 2.123445987701416, Accuracy: 0.29649999737739563
Val Loss: 2.11830997467041, Accuracy: 0.29750001430511475
Step: 2
Training Loss: 1.9512200355529785, Accuracy: 0.3685307800769806
Val Loss: 1.9438292980194092, Accuracy: 0.3748857080936432
Step: 3
Training Loss: 1.7751579284667969, Accuracy: 0.4346350133419037
Val Loss: 1.7652899026870728, Accuracy: 0.4418904781341553
Step: 4
Training Loss: 1.5992164611816406, Accuracy: 0.48531851172447205
Val Loss: 1.586988091468811, Accuracy: 0.4912142753601074
Step: 5
Training Loss: 1.4339470863342285, Accuracy: 0.5287264585494995
Val Loss: 1.4181851148605347, Accuracy: 0.5342543125152588
Step: 6
Training Loss: 1.2878201007843018, Accuracy: 0.5631975531578064
Val Loss: 1.269445538520813, Accuracy: 0.567559540271759
Step: 7
Training Loss: 1.1723593473434448, Accuracy: 0.590904176235199
Val Loss: 1.1512410640716553, Accuracy: 0.5945183634757996
Step: 8
Training Loss: 1.0780192613601685, Accuracy: 0.6129836440086365
Val Loss: 1.056

In [279]:
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
loss([1,2],[[0.1,0.2,0.7], [0.1,0.5,0.4]])

<tf.Tensor: shape=(), dtype=float32, numpy=1.2628644>

In [280]:
-(np.log(0.2)+np.log(0.4))/2

1.2628643221541276

In [169]:
import numpy as np

In [275]:
loss([1],[[1.0,2.0,1.0]])

<tf.Tensor: shape=(), dtype=float32, numpy=0.55144465>

In [230]:
-np.log(0.29411764)

1.2237754556221159

In [183]:
loss([1,2],[[0.2,0.4,1.4], [0.1,0.5,0.4]])

<tf.Tensor: shape=(), dtype=float32, numpy=1.1512924>

In [186]:
-(np.log(0.4/2)+np.log(0.4))/2

1.2628643221541276

In [196]:
-np.log(0.5/(0.2+1.4+0.5))

1.4350845252893225

In [232]:
0.5/(0.2+1.4)

0.3125

In [276]:
-np.log(2)

-0.6931471805599453

In [284]:
a = tf.reshape(tf.constant(range(12)),[2,2,3])

In [288]:
a[:, 1, None]

<tf.Tensor: shape=(2, 1, 3), dtype=int32, numpy=
array([[[ 3,  4,  5]],

       [[ 9, 10, 11]]], dtype=int32)>

In [289]:
a[:, 1, :]

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[ 3,  4,  5],
       [ 9, 10, 11]], dtype=int32)>

In [290]:
a[:, 1]

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[ 3,  4,  5],
       [ 9, 10, 11]], dtype=int32)>

In [328]:
b = tf.reshape(tf.constant(range(12)),[3,2,2])
b[:, 1, None]

<tf.Tensor: shape=(3, 1, 2), dtype=int32, numpy=
array([[[ 2,  3]],

       [[ 6,  7]],

       [[10, 11]]], dtype=int32)>

In [330]:
b[:, 0]

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[0, 1],
       [4, 5],
       [8, 9]], dtype=int32)>

In [295]:
[tf.ones(3, tf.int32) * 10]

[<tf.Tensor: shape=(3,), dtype=int32, numpy=array([10, 10, 10], dtype=int32)>]

In [296]:
output = tf.Variable([[i for i in range(10)]])
output = tf.broadcast_to(output, [10, 10])
output*=10
output+= tf.reshape(tf.Variable([[i for i in range(10)]]),[10,1])
output

<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90],
       [ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91],
       [ 2, 12, 22, 32, 42, 52, 62, 72, 82, 92],
       [ 3, 13, 23, 33, 43, 53, 63, 73, 83, 93],
       [ 4, 14, 24, 34, 44, 54, 64, 74, 84, 94],
       [ 5, 15, 25, 35, 45, 55, 65, 75, 85, 95],
       [ 6, 16, 26, 36, 46, 56, 66, 76, 86, 96],
       [ 7, 17, 27, 37, 47, 57, 67, 77, 87, 97],
       [ 8, 18, 28, 38, 48, 58, 68, 78, 88, 98],
       [ 9, 19, 29, 39, 49, 59, 69, 79, 89, 99]], dtype=int32)>

In [297]:
c = tf.Variable([[i for i in range(10)]])
c

<tf.Variable 'Variable:0' shape=(1, 10) dtype=int32, numpy=array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], dtype=int32)>

In [298]:
output+c

<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[  0,  11,  22,  33,  44,  55,  66,  77,  88,  99],
       [  1,  12,  23,  34,  45,  56,  67,  78,  89, 100],
       [  2,  13,  24,  35,  46,  57,  68,  79,  90, 101],
       [  3,  14,  25,  36,  47,  58,  69,  80,  91, 102],
       [  4,  15,  26,  37,  48,  59,  70,  81,  92, 103],
       [  5,  16,  27,  38,  49,  60,  71,  82,  93, 104],
       [  6,  17,  28,  39,  50,  61,  72,  83,  94, 105],
       [  7,  18,  29,  40,  51,  62,  73,  84,  95, 106],
       [  8,  19,  30,  41,  52,  63,  74,  85,  96, 107],
       [  9,  20,  31,  42,  53,  64,  75,  86,  97, 108]], dtype=int32)>

In [309]:
a = tf.constant([1,2,3])
b = []
b.append(a)

In [310]:
b.append(a+3)

In [311]:
b

[<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 2, 3], dtype=int32)>,
 <tf.Tensor: shape=(3,), dtype=int32, numpy=array([4, 5, 6], dtype=int32)>]

In [303]:
type(b)

list

In [312]:
c = tf.stack(b)
c

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[1, 2, 3],
       [4, 5, 6]], dtype=int32)>

In [315]:
c[-1,:]

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([4, 5, 6], dtype=int32)>

In [321]:
tf.one_hot(tf.fill([3],1),5)

<tf.Tensor: shape=(3, 5), dtype=float32, numpy=
array([[0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.]], dtype=float32)>

In [320]:
tf.fill([3],1)

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 1, 1], dtype=int32)>

In [335]:
d = tf.reshape(tf.constant(range(6)),[3,2])
e =[d,d,d,d]

In [336]:
tf.stack(e)

<tf.Tensor: shape=(4, 3, 2), dtype=int32, numpy=
array([[[0, 1],
        [2, 3],
        [4, 5]],

       [[0, 1],
        [2, 3],
        [4, 5]],

       [[0, 1],
        [2, 3],
        [4, 5]],

       [[0, 1],
        [2, 3],
        [4, 5]]], dtype=int32)>

In [338]:
kk = tf.stack(e,axis=1)
kk

<tf.Tensor: shape=(3, 4, 2), dtype=int32, numpy=
array([[[0, 1],
        [0, 1],
        [0, 1],
        [0, 1]],

       [[2, 3],
        [2, 3],
        [2, 3],
        [2, 3]],

       [[4, 5],
        [4, 5],
        [4, 5],
        [4, 5]]], dtype=int32)>

In [345]:
kk[:,-1,:]

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[0, 1],
       [2, 3],
       [4, 5]], dtype=int32)>

In [None]:
tf.one_hot(tf.fill([batch_size], self.out_voc.bos_ix), len(self.out_voc)) + 1e-30)

In [346]:
tf.fill([3], 1)

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 1, 1], dtype=int32)>

In [350]:
tf.one_hot(tf.fill([3], 1), 5)+ 1e-30

<tf.Tensor: shape=(3, 5), dtype=float32, numpy=
array([[1.e-30, 1.e+00, 1.e-30, 1.e-30, 1.e-30],
       [1.e-30, 1.e+00, 1.e-30, 1.e-30, 1.e-30],
       [1.e-30, 1.e+00, 1.e-30, 1.e-30, 1.e-30]], dtype=float32)>

In [354]:
ee = tf.math.log(tf.one_hot(tf.fill([3], 1), 5)+ 1e-30)
ee

<tf.Tensor: shape=(3, 5), dtype=float32, numpy=
array([[-69.07755,   0.     , -69.07755, -69.07755, -69.07755],
       [-69.07755,   0.     , -69.07755, -69.07755, -69.07755],
       [-69.07755,   0.     , -69.07755, -69.07755, -69.07755]],
      dtype=float32)>

In [352]:
tf.argmax(tf.math.log(tf.one_hot(tf.fill([3], 1), 5)+ 1e-30), axis=-1, output_type=tf.int32)

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 1, 1], dtype=int32)>

In [355]:
tf.nn.softmax(ee)

<tf.Tensor: shape=(3, 5), dtype=float32, numpy=
array([[9.999999e-31, 1.000000e+00, 9.999999e-31, 9.999999e-31,
        9.999999e-31],
       [9.999999e-31, 1.000000e+00, 9.999999e-31, 9.999999e-31,
        9.999999e-31],
       [9.999999e-31, 1.000000e+00, 1.000000e-30, 1.000000e-30,
        1.000000e-30]], dtype=float32)>

In [359]:
tf.nn.softmax(ee).numpy()[0,1] == 1

True

In [360]:
w1 = tf.Variable(tf.random.normal((3, 2)), name='w')
b1 = tf.Variable(tf.zeros(2, dtype=tf.float32), name='b')
x = [[1., 2., 3.]]

with tf.GradientTape(persistent=True) as tape:
  y = x @ w1 + b1
  loss = tf.reduce_mean(y**2)

In [362]:
[dl_dw, dl_db] = tape.gradient(loss, [w1, b1])

In [363]:
my_vars = {
    'w': w1,
    'b': b1
}

grad = tape.gradient(loss, my_vars)
grad['b']

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 3.9153435, -4.8815207], dtype=float32)>

In [364]:
tf.zeros([1,2],dtype=tf.float32)

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[0., 0.]], dtype=float32)>

In [None]:
linear_enc = tf.matmul(enc,self.w_enc) # [batch,ninp,hidden_size]
linear_dec = tf.reshape(tf.matmul(dec,self.w_dec),[dec.shape[0], 1, self.hid_size]) # [batch,1,hidden_size]

# [batch,ninp]
logits = tf.reshape(tf.matmul(self.activ(linear_enc+linear_dec), self.w_out),[enc.shape[0],enc.shape[1]])

In [390]:
c = tf.reshape(tf.constant([[1,2,3]]*3),[3,1,3])
c

<tf.Tensor: shape=(3, 1, 3), dtype=int32, numpy=
array([[[1, 2, 3]],

       [[1, 2, 3]],

       [[1, 2, 3]]], dtype=int32)>

In [374]:
d = tf.reshape(tf.constant(range(18)),[3,2,3])
d

<tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy=
array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]],

       [[12, 13, 14],
        [15, 16, 17]]], dtype=int32)>

In [375]:
c+d

<tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy=
array([[[ 1,  3,  5],
        [ 4,  6,  8]],

       [[ 7,  9, 11],
        [10, 12, 14]],

       [[13, 15, 17],
        [16, 18, 20]]], dtype=int32)>

In [380]:
(c+d)[:,0,:]

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[ 1,  3,  5],
       [ 7,  9, 11],
       [13, 15, 17]], dtype=int32)>

In [382]:
(c+d)[:,1,:]

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[ 4,  6,  8],
       [10, 12, 14],
       [16, 18, 20]], dtype=int32)>

In [391]:
t1 = [[1, 2, 3], [4, 5, 6]] # 2*3
t2 = [[7, 8, 9], [10, 11, 12]] # 2*3

In [393]:
tf.concat([t1, t2], -1)

<tf.Tensor: shape=(2, 6), dtype=int32, numpy=
array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]], dtype=int32)>

In [435]:
gru = tf.keras.layers.GRUCell(5, name='enc_gru')

In [440]:
input = tf.Variable(tf.ones([3,10]))
state = tf.Variable(tf.zeros([3,5]))

In [443]:
output, state_ = gru(input, state)

In [444]:
output.shape, state_.shape

(TensorShape([3, 5]), TensorShape([3, 5]))

In [447]:
output[0,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([-0.08688324, -0.38587573,  0.3107743 , -0.3868887 ,  0.18288043],
      dtype=float32)>

In [448]:
state_[0,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([-0.08688324, -0.38587573,  0.3107743 , -0.3868887 ,  0.18288043],
      dtype=float32)>

In [413]:
o1,s1 = gru(input, state_)

In [414]:
o1

<tf.Tensor: shape=(3, 5), dtype=float32, numpy=
array([[ 0.35364175,  0.41628957,  0.49529314, -0.0158013 ,  0.09351439],
       [ 0.35364175,  0.41628957,  0.49529314, -0.0158013 ,  0.09351439],
       [ 0.35364175,  0.41628957,  0.49529314, -0.0158013 ,  0.09351438]],
      dtype=float32)>

In [415]:
s1

<tf.Tensor: shape=(3, 5), dtype=float32, numpy=
array([[ 0.35364175,  0.41628957,  0.49529314, -0.0158013 ,  0.09351439],
       [ 0.35364175,  0.41628957,  0.49529314, -0.0158013 ,  0.09351439],
       [ 0.35364175,  0.41628957,  0.49529314, -0.0158013 ,  0.09351438]],
      dtype=float32)>

In [442]:
output, state_ = gru(output, state)

InvalidArgumentError: Exception encountered when calling layer 'enc_gru' (type GRUCell).

{{function_node __wrapped__MatMul_device_/job:localhost/replica:0/task:0/device:CPU:0}} Matrix size-incompatible: In[0]: [3,5], In[1]: [10,15] [Op:MatMul] name: 

Call arguments received by layer 'enc_gru' (type GRUCell):
  • inputs=tf.Tensor(shape=(3, 5), dtype=float32)
  • states=<tf.Variable 'Variable:0' shape=(3, 5) dtype=float32, numpy=
array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]], dtype=float32)>
  • training=None

In [445]:
input = tf.Variable(tf.ones([3,10]))

In [446]:
output, state_ = gru(input, state)

In [450]:
tf.config.list_physical_devices('CPU') 

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

In [451]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]