# Setup a neural network using TensorFlow2.x

In this tutorial, we will start using TensorFlow to build our very first neural network.

We will also talk about the compiling and fitting processes, how to save and load model to save time for next time.

In [1]:
# ! pip install tensorflow --upgrade

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

## 4 ways to build the network

We introduce four ways as a demonstration to build the most simple neural network without using any activation function or speical structure.

The following methods are all for fully connected neural networks. Check https://www.tensorflow.org/api_docs/python/tf/keras/layers if it is not fully connected.

**Example** (Not fully connected model)

        input1 = tf.keras.layers.Input(shape=(16,))
        x1 = tf.keras.layers.Dense(8, activation='relu')(input1)
        input2 = tf.keras.layers.Input(shape=(32,))
        x2 = tf.keras.layers.Dense(8, activation='relu')(input2)
        avg = tf.keras.layers.Average()([x1, x2])
        out = tf.keras.layers.Dense(4)(avg)
        model0 = tf.keras.models.Model(inputs=[input1, input2], outputs=out)

https://www.tensorflow.org/api_docs/python/tf/keras/layers/Average#example

Since TensorFlow will randomly initialize all the parameter values when you construct a model, `model.weights` will return different values each time you run it. To initalize the model with the same values, you can use `tf.random.set_seed(1234)` at the begining to

In [3]:
## Method 1
## define NN with 3 hidden layers
## input:8 -> 10 -> 8 -> 3 -> output:1
model1 = Sequential()
model1.add(Dense(10, input_shape=(8,)))  #or input_dim=8
model1.add(Dense(8))  #activation='sigmoid'
model1.add(Dense(3))
model1.add(Dense(1))

# model.weights   #return the weights and biases (randomly initialized by TensorFlow)
model1.summary()   #summary of the structure of the neural network

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                90        
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 88        
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 27        
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 4         
Total params: 209
Trainable params: 209
Non-trainable params: 0
_________________________________________________________________


In [4]:
## Method 2
## define NN with 3 hidden layers
## input:8 -> 10 -> 8 -> 3 -> output:1
model2 = Sequential()
model2.add(tf.keras.Input(shape=(8,)))
model2.add(Dense(10))
model2.add(Dense(8))
model2.add(Dense(3))
model2.add(Dense(1))

# model1 == model2
model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 10)                90        
_________________________________________________________________
dense_5 (Dense)              (None, 8)                 88        
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 27        
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 4         
Total params: 209
Trainable params: 209
Non-trainable params: 0
_________________________________________________________________


In [5]:
## Method 3
## define NN with 3 hidden layers
## input:8 -> 10 -> 8 -> 3 -> output:1
model3 = Sequential()
model3.add(Dense(10))
model3.add(Dense(8))
model3.add(Dense(3))
model3.add(Dense(1))       #if no input_dim, model.weights no values
model3.build((None,8))

model3.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 10)                90        
_________________________________________________________________
dense_9 (Dense)              (None, 8)                 88        
_________________________________________________________________
dense_10 (Dense)             (None, 3)                 27        
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 4         
Total params: 209
Trainable params: 209
Non-trainable params: 0
_________________________________________________________________


In [6]:
## Method 4
## example of a model defined with the functional api
## input:8 -> 10 -> 8 -> 3 -> output:1
x_in = tf.keras.Input(shape=(8,))
x1 = Dense(10)(x_in)
x2 = Dense(8)(x1)
x3 = Dense(3)(x2)
x_out = Dense(1)(x3)
model4 = tf.keras.Model(inputs=x_in, outputs=x_out)    

model4.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 8)]               0         
_________________________________________________________________
dense_12 (Dense)             (None, 10)                90        
_________________________________________________________________
dense_13 (Dense)             (None, 8)                 88        
_________________________________________________________________
dense_14 (Dense)             (None, 3)                 27        
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 4         
Total params: 209
Trainable params: 209
Non-trainable params: 0
_________________________________________________________________


In [7]:
model1.metrics_names    #Note: metrics_names are available only after a keras.Model has been trained/evaluated on actual data.

[]

## Train the neural network

After we constructed our first neural network, we will need to feed it with our training data to train the model. There are two steps needed, compile and fit. 

### Step 1: compile

    compile(
        optimizer='rmsprop', loss=None, metrics=None, loss_weights=None,
        weighted_metrics=None, run_eagerly=None, steps_per_execution=None, **kwargs
    )

eg.

    model.compile(optimizer="Adam", loss="mse", metrics=['accuracy', 'mse'])
    model.metrics_names   
    
https://www.tensorflow.org/api_docs/python/tf/keras/Model#compile

**- optimizer**

Specify the algorithm to approximate that is used to find the minimum of the loss function.

- `tf.keras.optimizers.Adam` or `Adam` https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam

check https://www.tensorflow.org/api_docs/python/tf/keras/optimizers for more optimizers

**- loss**

Loss function is used to measure the difference between predict and truth. The training process is trying to minimize this value.

- `'mean_absolute_error'` https://www.tensorflow.org/api_docs/python/tf/keras/losses/MeanAbsoluteError
- `'mean_squared_error'`  https://www.tensorflow.org/api_docs/python/tf/keras/losses/MeanSquaredError
- `'categorical_crossentropy'` https://www.tensorflow.org/api_docs/python/tf/keras/losses/CategoricalCrossentropy

check https://www.tensorflow.org/api_docs/python/tf/keras/losses for more loss functions

### Step 2: fit

    fit(
        x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None,
        validation_split=0.0, validation_data=None, shuffle=True, class_weight=None,
        sample_weight=None, initial_epoch=0, steps_per_epoch=None,
        validation_steps=None, validation_batch_size=None, validation_freq=1,
        max_queue_size=10, workers=1, use_multiprocessing=False
    )
    
https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit

**- batch_size:** Total number of training examples present in a single batch.

**- epochs** (times the model sees the data): The number of passes of training data

**- verbose** 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.

We take `model1` as an example to demonstrate how to train a model as follows.

In [8]:
## training data
size = 500
train_x = np.random.normal(0,1,(size,8))
train_y = np.random.normal(0,1,(size,1))

In [9]:
model1.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(0.01))
model1.fit(train_x,train_y,epochs=1500,verbose=0)

<tensorflow.python.keras.callbacks.History at 0x7f04eddd3f10>

In [10]:
model1.metrics_names   #Note: metrics_names are available only after a keras.Model has been trained/evaluated on actual data.

['loss']

## Activation function

In the models we constructed above, we didn't specifiy any activation function, now we will have a short discussion here about it. After adding activation function during construction process, the following training process is the same.

check https://www.tensorflow.org/api_docs/python/tf/keras/activations

eg.

    model = Sequential()
    model.add(Dense(5,activation='sigmoid',input_dim=1))
    model.add(Dense(3,activation='relu'))
    model.add(Dense(1))
    
The purpose of the activation function is to introduce non-linearity into the output of a neuron. 
- Activation functions have a major effect on the neural network’s ability to converge and the convergence speed, or in some cases, activation functions might prevent neural networks from converging in the first place 
- The activation function is a mathematical “gate” in between the input feeding the current neuron and its output going to the next layer. 
- Non-linear activation functions can help the network learn complex data, compute and learn almost any function representing a question

The activation function is attached to each neuron in the network, and determines whether it should be activated (“fired”) or not, based on whether each neuron’s input is relevant for the model’s prediction. Activation functions also help normalize the output of each neuron to a range between 1 and 0 or between -1 and 1. An additional aspect of activation functions is that they must be computationally efficient because they are calculated across thousands or even millions of neurons for each data sample.

## Attributes and Methods

We have covered the process of building and training a neural network. Now we will talk about some commonly-used attributes and methods of the model.

In [14]:
## model1.weights and model1.trainable_variables are the same for the above model.

model1.weights == model1.trainable_variables

True

In [15]:
## print out the weights as a list of Variable objects.
## You can use indexing `model1.weights[i]` to print the specific parameter values
model1.weights    #A list of Variable objects.

[<tf.Variable 'dense/kernel:0' shape=(8, 10) dtype=float32, numpy=
 array([[-4.7745174e-03, -4.2704954e-03,  2.0147045e-03, -6.0886233e-03,
          1.6906815e-04,  2.9910391e-03, -1.2138552e-02,  3.9200662e-03,
          9.7083038e-04,  7.8460521e-01],
        [ 1.5564065e-04, -4.9153781e-03,  5.1136962e-03, -2.8920257e-03,
          2.2271478e-03,  1.7226229e-03,  8.3738118e-03,  2.4565703e-03,
         -1.5089725e-03,  1.2535113e+00],
        [-3.8777268e-04, -2.9222372e-03,  6.7038345e-06,  4.1895998e-03,
          4.2966832e-03, -5.6971674e-04,  1.0847520e-02, -5.1253634e-03,
          1.1978279e-02, -5.0348192e-01],
        [ 1.6874436e-03, -1.5508595e-03,  2.1394780e-03, -1.0563543e-03,
          5.0894222e-03, -4.9723790e-04,  6.3263075e-03, -1.1619112e-03,
          2.2266451e-02, -4.8005381e-01],
        [-6.8500419e-03, -6.9415063e-04, -7.9676770e-03, -8.6388607e-03,
          2.9368100e-03, -5.4954947e-04, -4.4616669e-02,  3.6588751e-03,
          4.3160364e-02, -9.6152526

In [16]:
## Only print out the value
model1.get_weights()         #A list of arrays, weights and biases every other term

[array([[-4.7745174e-03, -4.2704954e-03,  2.0147045e-03, -6.0886233e-03,
          1.6906815e-04,  2.9910391e-03, -1.2138552e-02,  3.9200662e-03,
          9.7083038e-04,  7.8460521e-01],
        [ 1.5564065e-04, -4.9153781e-03,  5.1136962e-03, -2.8920257e-03,
          2.2271478e-03,  1.7226229e-03,  8.3738118e-03,  2.4565703e-03,
         -1.5089725e-03,  1.2535113e+00],
        [-3.8777268e-04, -2.9222372e-03,  6.7038345e-06,  4.1895998e-03,
          4.2966832e-03, -5.6971674e-04,  1.0847520e-02, -5.1253634e-03,
          1.1978279e-02, -5.0348192e-01],
        [ 1.6874436e-03, -1.5508595e-03,  2.1394780e-03, -1.0563543e-03,
          5.0894222e-03, -4.9723790e-04,  6.3263075e-03, -1.1619112e-03,
          2.2266451e-02, -4.8005381e-01],
        [-6.8500419e-03, -6.9415063e-04, -7.9676770e-03, -8.6388607e-03,
          2.9368100e-03, -5.4954947e-04, -4.4616669e-02,  3.6588751e-03,
          4.3160364e-02, -9.6152526e-01],
        [ 9.4094407e-03, -8.9300368e-03,  1.7354518e-02,  3.

In [17]:
## Return the weights and biases for a specified layer
model1.layers[3].get_weights()

[array([[ 1.4687005e-04],
        [-6.5220369e-04],
        [ 1.9679068e-01]], dtype=float32),
 array([-0.03459695], dtype=float32)]

In [18]:
## model prediction  #https://www.tensorflow.org/api_docs/python/tf/keras/Model#predict
x = np.random.normal(size=(10,8))
model1.predict(x)

array([[ 0.25113365],
       [-0.10175361],
       [-0.17832017],
       [-0.07764949],
       [ 0.20510267],
       [-0.12055007],
       [ 0.05584735],
       [-0.05570502],
       [ 0.07623062],
       [-0.06173233]], dtype=float32)

## save and load model

It is important to know how to save and load a given model especially when the model is expensive to train twice or you want to reuse the model later.

https://www.tensorflow.org/guide/keras/save_and_serialize

Keras also supports saving a single HDF5 file containing the model's architecture, weights values, and `compile()` information. It is a light-weight alternative to SavedModel.

https://www.tensorflow.org/guide/keras/save_and_serialize#keras_h5_format

In [19]:
## save model
model1.save('Model/model1') #or tf.keras.models.save_model()

INFO:tensorflow:Assets written to: Model/model1/assets


In [20]:
## load model
model_load = tf.keras.models.load_model('Model/model1')
model_load.predict(x)   ##just for testing

array([[ 0.25113365],
       [-0.10175361],
       [-0.17832017],
       [-0.07764949],
       [ 0.20510267],
       [-0.12055007],
       [ 0.05584735],
       [-0.05570502],
       [ 0.07623062],
       [-0.06173233]], dtype=float32)

In [21]:
## save weights
model1.save_weights('Model/model1_weights')    #3 files

In [22]:
## To use load_weights(), you first need to have a model with the same structure
model2 = Sequential()
model2.add(Dense(10, input_shape=(8,)))  #or input_dim=8
model2.add(Dense(8))  #activation='sigmoid'
model2.add(Dense(3))
model2.add(Dense(1))

model2.load_weights('Model/model1_weights') 

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f04e444e820>

In [23]:
model2.predict(x)   ##just for testing

array([[ 0.25113365],
       [-0.10175361],
       [-0.17832017],
       [-0.07764949],
       [ 0.20510267],
       [-0.12055007],
       [ 0.05584735],
       [-0.05570502],
       [ 0.07623062],
       [-0.06173233]], dtype=float32)

In [24]:
## To save time constructing the model, we can also do the following

def build_model():
    model = Sequential()

    model.add(Dense(10, input_shape=(8,)))
    model.add(Dense(8))
    model.add(Dense(3))
    model.add(Dense(1))

    # you can either compile or not the model
    model.compile(loss='mse', optimizer='Adam', metrics=['accuracy'])
    return model

In [25]:
model2 = build_model()
model2.load_weights('Model/model1_weights') 
model2.predict(x)

array([[ 0.25113365],
       [-0.10175361],
       [-0.17832017],
       [-0.07764949],
       [ 0.20510267],
       [-0.12055007],
       [ 0.05584735],
       [-0.05570502],
       [ 0.07623062],
       [-0.06173233]], dtype=float32)

Example

In [None]:
#work/python/keras maybe??

In [None]:
#work/DNN/NN-tf   Iris problem

In [None]:
#work/DNN/Comparison-Copy1

In [None]:
#work/DNN/Comparison-activation