In [None]:
from __future__ import absolute_import, division, print_function
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

![NASA](http://www.nasa.gov/sites/all/themes/custom/nasatwo/images/nasa-logo.svg)

<center>
<h1><font size="+3">NCCS Training Course Series</font></h1>
</center>

---

<center>
    <h2><font color="red">Machine Learning Regression Model with Tensorflow</font></h2>
</center>

## Useful Reference

- <a href="https://www.mygreatlearning.com/blog/what-is-tensorflow-machine-learning-library-explained/">What is TensorFlow? The Machine Learning Library Explained</a>
- <a href="https://www.tensorflow.org/tutorials/keras/regression">Basic regression: Predict fuel efficiency</a>
- <a href="https://stackabuse.com/tensorflow-2-0-solving-classification-and-regression-problems/">Tensorflow 2.0: Solving Classification and Regression Problems</a>
- <a href="https://www.toptal.com/machine-learning/tensorflow-machine-learning-tutorial">Getting Started with TensorFlow: A Machine Learning Tutorial</a>
- <a href="https://sebastianraschka.com/faq/docs/tensorflow-vs-scikitlearn.html">What is the main difference between TensorFlow and scikit-learn?</a>
- <a href="https://adventuresinmachinelearning.com/python-tensorflow-tutorial/">Python TensorFlow Tutorial – Build a Neural Network</a>

## <font color="red">What is TensorFlow?</font>
- Tensorflow is an open-source library for numerical computation and large-scale machine learning that ease `Google Brain TensorFlow`, the process of acquiring data, training models, serving predictions, and refining future results.
- Tensorflow bundles together Machine Learning and Deep Learning models and algorithms.
- Tensorflow allows developers to create a graph of computations to perform. Nodes in the graph represent mathematical operations and connections (edges) represent data which usually are multidimensional data arrays or tensors, that are communicated between these edges.
- The name `TensorFlow` is derived from the operations which neural networks perform on multidimensional data arrays or tensors! It’s literally a flow of tensors.


**First Example of TensorFlow Graph**

Consider the expression:
<center>
    a = (b + c) * (c + 2)
</center>
We can break this down into:
<center>
    d = b + c
    
    e = c + 2
    
    a = d * e
</center>
Now we can represent these operations graphically as:

![fig_gr1](https://i1.wp.com/adventuresinmachinelearning.com/wp-content/uploads/2017/03/Simple-graph-example.png)
Image Source: adventuresinmachinelearning.com

Note that the operations `d = b + c` and `e = c + 2` can be performed in parallel: potential of distributing such calcultions across CPUs and GPUs. 

**Second Example of TensorFlow Graph**

The graph below shows the computational graph of a three-layer neural network.
The animated data flows between different nodes in the graph are tensors which are multi-dimensional data arrays. 

![fig_gr2](https://i1.wp.com/adventuresinmachinelearning.com/wp-content/uploads/2017/03/TensorFlow-data-flow-graph.gif)

### Main Steps of a ML Program
    
![FIG_AXES](https://www.altudo.co/-/media/altudo/images/resources/blogs/5-steps-to-define-ml-flow-to-deliver-custom-user-experience/2.ashx?la=en&hash=0A8E8BEC05A4C64C37908FB87757285E)


### Load the modules

In [None]:
%matplotlib inline
import sys
import csv
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

## <font color="red">Problem Statement</font>

We consider the function: <br>
$$
f(x,y) = (1-(x^2 + y^3))e^{-\frac{1}{2}(x^2 + y^2)}
$$
<br>
defined in the domain $D=[-3,3] \times [-3,3]$.
<OL>
<LI> We randomnly select $n$ points in the domain $D$ and compute the function on those points to create a dataset containing the pairs points/values.
<LI> We use the dataset for training a ML algorithm.
<LI> We generate a uniform set of points in $D$ to test the algorithm.
</OL>

## <font color="red">Generating the Data</font>

#### Define the Function

In [None]:
def ff(x,y):
    return (1-(x**2+y**3))*np.exp(-(x**2+y**2)/2)

#### Create the Data

In [None]:
num_dims = 2
nx = 30
ny = 30
num_points = nx * ny

# Boundary of the domain
a_min = -3.0
a_max = 3.0

<font color="blue">Generate dataset for training</font>
- The grid points are randomly generated over the domain
- The arrays are 1D

In [None]:
yt = np.zeros(num_points)  # 1D targets for training
xt = np.zeros((num_points, num_dims))  # grid points for training

x = np.random.uniform(a_min, a_max, nx) # Feature vectors
y = np.random.uniform(a_min, a_max, ny) # Labels

k = 0
for i in range(nx):
    for j in range(ny):
        xt[k, 0] = x[i]
        xt[k, 1] = y[j]
        yt[k] = ff(x[i], y[j])
        k += 1

<font color="blue">Add noise in the training targets</font>

Gaussian normal distribution with `noise_mean` as mean and `noise_std` as standard deviation.

In [None]:
noise_mean = 0.0
noise_std  = 1.0e-2
noise = np.random.normal(noise_mean, noise_std, num_points)
yt = yt + noise

<font color="blue">Generate dataset for validation</font>
- The grid points are uniformly distributed over the domain
- The arrays are 1D

In [None]:
yv = np.zeros(num_points)  # 1D targets for validation
xv = np.zeros((num_points, num_dims))  # grid points for validation

x = np.linspace(-3.0, 3.0, nx)
y = np.linspace(-3.0, 3.0, ny)

k = 0
for i in range(nx):
    for j in range(ny):
        xv[k,0] = x[i]
        xv[k,1] = y[j]
        yv[k] = ff(x[i],y[j])
        k += 1

## <font color="red">Data Gathering and Basic Analyses</font>

#### Data to be used for training

In [None]:
train_data = pd.DataFrame({"x0": xt[:,0], "x1": xt[:,1], "TargetValues": yt[:]})
print(train_data.head(5))                          

In [None]:
print(len(train_data.keys()))

#### Data to be used for validation

In [None]:
valid_data  = pd.DataFrame({"x0": xv[:,0], "x1": xv[:,1], "TargetValues": yv[:]})
print(valid_data.head(5))

#### Plot the data to be trained

In [None]:
threedee = plt.figure().gca(projection='3d');
threedee.scatter(train_data['x0'], train_data['x1'], train_data['TargetValues']);
threedee.set_xlabel('x');
threedee.set_ylabel('y');
threedee.set_zlabel('f(x,y)');
plt.show();

#### Display the joint distribution of the columns from the training set

In [None]:
sns.pairplot(train_data.drop(columns=["TargetValues"]));

Do something similar for the data used for validation

In [None]:
sns.pairplot(valid_data.drop(columns=["TargetValues"]));

#### Check the overall statistics

In [None]:
train_stats = train_data.describe()
train_stats.pop("TargetValues")
train_stats = train_stats.transpose()
print(train_stats)

#### Split features from labels
- Separate the target value, or `label`, from the features.
- This `label` is the value that you will train the model to predict.

In [None]:
train_labels = train_data.pop('TargetValues')
valid_labels = valid_data.pop('TargetValues')

## <font color="red">Normailized the Data</font>

- It is good practice to normalize features that use different scales and ranges. 
- Although the model might converge without feature normalization, it makes training more difficult, and it makes the resulting model dependent on the choice of units used in the input.

In [None]:
def normalize_data(x):
  return (x - train_stats['mean']) / train_stats['std']

# This normalized data is what we will use to train the model.
normed_train_data = normalize_data(train_data)
normed_valid_data = normalize_data(valid_data)

## <font color="red">Build the Model</font>

#### Instantiate a sequential model using `keras`
- `keras` is TensorFlow's high-level API for building and training deep learning models. It's used for fast prototyping, state-of-the-art research, and production.
- The sequential model is the simplest model to use, especially when getting started.
- It involves defining a Sequential class and adding layers to the model one by one in a linear manner, from input to output.
- The model needs to know what input shape (`input_shape`) it should expect. The first layer of the `Sequential` model needs to receive the information.

In the model below:

- The model expects rows of data with num_shape variables (the input_shape=num_shape argument)
- The first hidden layer has 64 nodes and uses the `relu` activation function.
- The second hidden layer has 64 nodes and uses the `relu` activation function.
- The output layer has one node and uses no activation function.

The rectified linear activation function (`relu`) is a piecewise linear function that will output the input directly if is positive, otherwise, it will output zero. 
- Because rectified linear units are nearly linear, they preserve many of the properties that make linear models easy to optimize with gradient-based methods. They also preserve many of the properties that make linear models generalize well.
- It has become the default activation function for many types of neural networks because a model that uses it is easier to train and often achieves better performance.
- 

In [None]:
num_shape = len(train_data.keys())
num_nodes = 64

model = keras.Sequential([
             layers.Dense(num_nodes, activation=tf.nn.relu, input_shape=[num_shape]),
             layers.Dense(num_nodes, activation=tf.nn.relu),
             layers.Dense(1) ])

The above model creation can also be written as:

```python
model = keras.Sequential()
model.add(layers.Dense(num_nodes, activation=tf.nn.relu, input_shape=[num_shape]))
model.add(layers.Dense(num_nodes, activation=tf.nn.relu))
model.add(layers.Dense(1))
```

Dense layers represent a function that maps the input tensor `x` to an output tensor `y` via the equation `y = Ax + b` where `A` (the kernel) and `b` (the bias) are parameters of the dense layer.

#### Compile the model
- Once you have specified the architecture of the network, you need to specify the method for back-propagation by choosing an optimizer and specify the loss.
- Compiling the model uses the efficient numerical libraries (Theano or TensorFlow) in the background.

Define the optimizer:

In [None]:
optimizer = tf.keras.optimizers.RMSprop(0.001)

Required to provide a loss function and an optimizer: 
- We are asking the network to use the `rmsprop` optimizer to change weights in such a way that the loss `mse` (mean squared error) is minimized at each iteration.

In [None]:
model.compile(loss = 'mse',
              optimizer = optimizer,
              metrics = ['mae', 'mse'])

#### Inspect the model

`model.summary()` is a useful method if you want to get an overview of your model and see the total number of parameters.

In [None]:
model.summary()

#### Try the model

10 samples from the training data and call `model.predict`.

In [None]:
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
print(example_result)

It seems to be working, and it produces a result of the expected shape and type.

## <font color="red">Train the Model</font>

Training occurs over epochs and each epoch is split into batches.

- **Epoch**: One pass through all of the rows in the training dataset.
- **Batch**: One or more samples considered by the model within an epoch before weights are updated.
- One epoch is comprised of one or more batches, based on the chosen batch size and the model is fit for many epochs. 
- The model is "fit" to the training data using the `fit` method. We also specify the `batch_size` and the maximum number of `epochs` we want training to go on.
- The callback function is applied at given stages of the training procedure. We use it to get a view on internal states and statistics of the model during training.

In [None]:
# Train the model for 1000 epochs, and record the training and 
# validation accuracy in the history object

# Display training progress by printing a single dot for each completed epoch
class PrintDot(keras.callbacks.Callback):
      def on_epoch_end(self, epoch, logs):
          if epoch % 100 == 0: 
             print('')
          print('.', end='')

# How many times we go through the entire dataset
EPOCHS = 1000

history = model.fit(normed_train_data, train_labels,    
                    epochs=EPOCHS, verbose=1, callbacks=[PrintDot()])
#epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[PrintDot()])

#### Visualize the model's training progress

In [None]:
# Use the stats stored in the history object.
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [None]:
print(history.history.keys())

In [None]:
def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Abs Error [Target]')
    plt.plot(hist['epoch'], hist['mean_absolute_error'],
             label='Train Error')
#    plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
#             label = 'Val Error')
    plt.legend()
    plt.ylim([min(hist['mean_absolute_error']) ,max(hist['mean_absolute_error'])])

    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Square Error [$Target^2$]')
    plt.plot(hist['epoch'], hist['mean_squared_error'],
             label='Train Error')
#    plt.plot(hist['epoch'], hist['val_mean_squared_error'],
#             label = 'Val Error')
    plt.legend()
    plt.ylim([0,max(hist['mean_squared_error'])])

plot_history(history)

## <font color="red">Evaluate the Model on Test Data</font>

**Compute the Scores**

In [None]:
loss, mae, mse = model.evaluate(normed_valid_data, valid_labels, verbose=1)
#print("Testing set Mean Abs Error: {} ".format(mae))

**Make Prediction**

In [None]:
valid_predictions = model.predict(normed_valid_data).flatten()

#### Do the 45-degree plot

In [None]:
plt.scatter(valid_labels, valid_predictions);
plt.xlabel('True Values');
plt.ylabel('Predictions');
plt.axis('equal');
plt.axis('square');
plt.xlim([0,plt.xlim()[1]]);
plt.ylim([0,plt.ylim()[1]]);
_ = plt.plot([-100, 100], [-100, 100]);

**Error Distribution**

In [None]:
sns.distplot(valid_predictions - valid_labels);

#### Plotting Function Using Predicted Values

In [None]:
threedee = plt.figure().gca(projection='3d');
threedee.scatter(valid_data['x0'], valid_data['x1'], valid_predictions);
threedee.set_xlabel('x');
threedee.set_ylabel('y');
threedee.set_zlabel('f(x,y)');
plt.show();

## <font color="blue">Exercise</font>

Consider the 2D problem presented here.
- Create a dataset of 1000 randomly selected points (in the domain) and their associated targets.
- Randomly choose 80% of the data for training and the remaining for testing
- Create your ML model and test it.