<a href="https://colab.research.google.com/github/hellocybernetics/TensorFlow2.0_Eager_Execution_Tutorials/blob/master/tutorials/01_basics/BasicStyle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q --upgrade tf-nightly-2.0-preview

[K    100% |████████████████████████████████| 96.1MB 175kB/s 
[K    100% |████████████████████████████████| 358kB 20.4MB/s 
[K    100% |████████████████████████████████| 61kB 22.4MB/s 
[K    100% |████████████████████████████████| 3.0MB 11.1MB/s 
[?25h

In [0]:
import tensorflow as tf

## Make DataSet

In [0]:
'''
If you have numpy data, you can use
tensor_data = tf.convert_to_tensor(numpy_data, dtype=tf.float32)
for translation into tf.Tensor.
'''
# example training data
feature = tf.random.normal(shape=[50000, 1000])
target = tf.random.normal(shape=[50000, 10])

# example validation data
val_feature = tf.random.normal(shape=[10000, 1000])
val_target = tf.random.normal(shape=[10000, 10])

# example test data
test_feature = tf.random.normal(shape=[5000, 1000])
test_target = tf.random.normal(shape=[5000, 10])


# make dataset
dataset = tf.data.Dataset.from_tensor_slices((feature, target))
val_dataset = tf.data.Dataset.from_tensor_slices((val_feature, val_target))
test_dataset = tf.data.Dataset.from_tensor_slices((test_feature, test_target))

In [13]:
# A dataset have shape information except batchsize and data type.
dataset

<TensorSliceDataset shapes: ((1000,), (10,)), types: (tf.float32, tf.float32)>

In [0]:
# Training data should be shuffled every epoch.
# 10000 is buffer size.
dataset = dataset.shuffle(10000)

# For mini-batch training.
# 256 is batch size.
dataset = dataset.batch(256)

# Of course we can write same code as follows
# dataset = dataset.shuffle(10000).batch(256)

# validation data and test data do NOT need shuffle.
# batch size is as big as possible.
val_dataset = val_dataset.batch(10000)
test_dataset = test_dataset.batch(5000)

In [17]:
# dataset is set for batch training.
dataset

<BatchDataset shapes: ((None, 1000), (None, 10)), types: (tf.float32, tf.float32)>

## Make Network

In [0]:
class MyNet(tf.keras.Model):
    '''
    We use basically tf.keras.Model for making network.
    This class will manage layers and that's trainable parameters.
    '''
    def __init__(self):
        super(MyNet, self).__init__()
        
        
        # We can use tf.keras.Sequential 
        # which has a role of putting together some layers.
        # This class inherits tf.keras.Model, so this can manege parameters too.
        # This class only receive layers.Layer class.
        # (Note that tf.keras.Sequential receive tf.keras.layers.ReLU())
       
        self.layer1 = tf.keras.Sequential([
            tf.keras.layers.Dense(1024),
            tf.keras.layers.ReLU(),
            tf.keras.layers.BatchNormalization(axis=-1),
            tf.keras.layers.Dropout(rate=0.2),
        ])
        
        # Of course we can write some layers separately.
        
        self.dense = tf.keras.layers.Dense(256)
        self.bn = tf.keras.layers.BatchNormalization(axis=-1)
        self.do = tf.keras.layers.Dropout(rate=0.2)
        
        self.dense_output = tf.keras.layers.Dense(10)
    
    # tf.function is jit compiler which translate python code into TF graph.
    @tf.function
    def call(self, x, training=False):
        # tf.keras.Sequential class have training propaty
        # which manege behavior of dropout and batchnormalization etc.
        h = self.layer1(x, training=training)
        
        h = self.dense(h)
        # we can use tf.nn.relu function instead of tf.keras.layers.ReLU()
        h = tf.nn.relu(h)

        # BatchNormalization and Dropout class also have training property.
        h = self.bn(h, training=training)
        h = self.do(h, training=training)
        
        return self.dense_output(h)

In [0]:
model = MyNet()

In [30]:
# test execution.
model(tf.random.normal(shape=[1, 1000]))

<tf.Tensor: id=1160, shape=(1, 10), dtype=float32, numpy=
array([[-0.34420356, -1.2885818 ,  0.1190961 , -0.99393785,  0.15190971,
         0.09836806,  0.10659645, -1.4151433 ,  0.02022073, -1.2858809 ]],
      dtype=float32)>

In [31]:
# We can check model compose with model.summary() after first execution.
model.summary()

Model: "my_net_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_2 (Sequential)    multiple                  1029120   
_________________________________________________________________
dense_7 (Dense)              multiple                  262400    
_________________________________________________________________
batch_normalization_5 (Batch multiple                  1024      
_________________________________________________________________
dropout_5 (Dropout)          multiple                  0         
_________________________________________________________________
dense_8 (Dense)              multiple                  2570      
Total params: 1,295,114
Trainable params: 1,292,554
Non-trainable params: 2,560
_________________________________________________________________


## Training by hand

In [0]:
optimizer = tf.optimizers.Adam()
loss_fn = tf.losses.MeanSquaredError()

In [0]:
@tf.function
def train_step(feature, target):

    with tf.GradientTape() as tape:
        y_pred = model(feature, training=True)
        loss = loss_fn(target, y_pred)
    
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(zip(grads, model.variables))
    
    return loss

@tf.function
def val_step(feature, target):
    
    y_pred = model(feature)
    loss = loss_fn(target, y_pred)
    
    return loss

In [50]:
for i in range(10):
    
    running_loss = 0
    running_val_loss = 0
    
    for i, (batch_feature, batch_target) in enumerate(dataset):
        loss_ = train_step(batch_feature, batch_target)
        running_loss += loss_
        
    for j, (batch_feature, batch_target) in enumerate(val_dataset):
        loss_ = val_step(batch_feature, batch_target)
        running_val_loss += loss_
        
    print("----------epoch {}--------".format(i+1))
    print("loss: {},  val_loss: {}".format(running_loss/(i+1), 
                                           running_val_loss/(j+1)))

W0407 11:15:35.531675 140100668761984 optimizer_v2.py:961] Gradients does not exist for variables ['sequential_2/batch_normalization_4/moving_mean:0', 'sequential_2/batch_normalization_4/moving_variance:0', 'batch_normalization_5/moving_mean:0', 'batch_normalization_5/moving_variance:0'] when minimizing the loss.
W0407 11:15:35.860958 140100668761984 optimizer_v2.py:961] Gradients does not exist for variables ['sequential_2/batch_normalization_4/moving_mean:0', 'sequential_2/batch_normalization_4/moving_variance:0', 'batch_normalization_5/moving_mean:0', 'batch_normalization_5/moving_variance:0'] when minimizing the loss.
W0407 11:15:44.869919 140100668761984 optimizer_v2.py:961] Gradients does not exist for variables ['sequential_2/batch_normalization_4/moving_mean:0', 'sequential_2/batch_normalization_4/moving_variance:0', 'batch_normalization_5/moving_mean:0', 'batch_normalization_5/moving_variance:0'] when minimizing the loss.


----------epoch 196--------
loss: 0.7635518908500671,  val_loss: 1.0994179248809814
----------epoch 196--------
loss: 0.7118832468986511,  val_loss: 1.120743989944458
----------epoch 196--------
loss: 0.6699531078338623,  val_loss: 1.143028974533081
----------epoch 196--------
loss: 0.6334821581840515,  val_loss: 1.15768301486969
----------epoch 196--------
loss: 0.6007084250450134,  val_loss: 1.1724363565444946
----------epoch 196--------
loss: 0.5760673880577087,  val_loss: 1.1808465719223022
----------epoch 196--------
loss: 0.5515231490135193,  val_loss: 1.1818556785583496
----------epoch 196--------
loss: 0.5310335159301758,  val_loss: 1.189349889755249
----------epoch 196--------
loss: 0.5123671293258667,  val_loss: 1.1940315961837769
----------epoch 196--------
loss: 0.49744296073913574,  val_loss: 1.1978554725646973
