# Deep neural networks training

Activation (non-linear) functions that do not saturate:<br>
Rectifier Linear Unit, ReLU: $y=max(0,x)$, $y \in [0,\infty]$, learning rate $\alpha \rightarrow 0$<br>
Leaky ReLU: $y=max(s·x,x)$ , typically $s=0.01$<br>
Exponential Linear Unit, ELU: $y=s(e^{x}-1$), usually $s=1$. if $s=1$, then $y \in [-1,\infty]$ 

In [22]:
# https://medium.com/@margaretmz/anaconda-jupyter-notebook-tensorflow-and-keras-b91f381405f8
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import tensorflow as tf

In [23]:
dataset = pd.read_csv('../datasets/regression/rating-cereals.csv')
dataset.columns = [re.sub('[^a-zA-Z]*', '', column) for column in dataset.columns]
scaled_dataset = (dataset-dataset.min())/(dataset.max()-dataset.min())
shuffle_scaled_dataset = shuffle(scaled_dataset)
shuffle_scaled_dataset

Unnamed: 0,calories,protein,fat,sodium,fiber,rating
14,0.545455,0.2,0.0,0.875000,0.000000,0.309299
69,0.454545,0.4,0.2,0.625000,0.214286,0.378208
62,0.363636,0.4,0.0,0.000000,0.285714,0.745818
24,0.454545,0.4,0.0,0.000000,0.214286,0.532662
9,0.636364,0.0,0.4,0.687500,0.000000,0.000000
66,0.363636,0.2,0.0,0.046875,0.214286,0.546128
13,0.545455,0.0,0.2,0.562500,0.000000,0.062034
59,0.545455,0.0,0.0,0.750000,0.000000,0.316619
37,0.818182,0.4,0.2,0.531250,0.142857,0.243565
56,0.636364,0.4,0.2,0.656250,0.357143,0.280409


In [25]:
nrows = len(shuffle_scaled_dataset)
X = shuffle_scaled_dataset[['calories', 'protein', 'fat', 'sodium', 'fiber']]
y = shuffle_scaled_dataset[['rating']]
X_train, X_test, t_train, t_test = train_test_split(X, y, test_size=0.2, shuffle=False)
X_train, X_validation, t_train, t_validation = train_test_split(X_train, t_train, test_size=0.1, shuffle=False)

In [43]:
# This sentence can be generalized given the initial split process.
INPUTS = X_train.shape[1]
OUTPUTS = t_train.shape[1]
NUM_TRAINING_EXAMPLES = int(round(X_train.shape[0]/2))
NUM_TEST_EXAMPLES = int(round(X_test.shape[0]/2))

n_hidden_structure = [4, 3, 1] # Distribution of the neurons along the hidden layers.
epochs = 100 # Number of epochs of the training process of a neural network.
batch_size = 4 # Batch size is the subgroups that will be created from the training dataframe.
learning_rate = 0.01

n_learning_iters = int((len(train)/batch_size)+1)*epochs
X = tf.placeholder(dtype=tf.float32, shape=(None, INPUTS), name='X')
t = tf.placeholder(dtype=tf.float32, shape=(None, OUTPUTS), name="t")

In [5]:
hidden_layers = []
hidden_layers.append(tf.layers.dense(X, n_hidden_structure[0], activation=tf.nn.relu))
i = 0
for layer in n_hidden_structure[1:]:
    hidden_layers.append(tf.layers.dense(hidden_layers[i], layer, activation=tf.nn.relu))
    i += 1
net_out = tf.layers.dense(hidden_layers[len(hidden_layers)-1], OUTPUT, name="y")

## Regression neural network

In [6]:
y = net_out

## Classification neural network

In [7]:
def get_dict(x_val, t_val){
    return {X: x_val, t: t_val}
}

y = tf.nn.softmax(logits=net_out, name="y")
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=t, logits=net_out)
mean_log_loss = tf.reduce_mean(cross_entropy, name="cost")
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(mean_log_loss)
correct_predictions = tf.equal(tf.argmax(y, 1), tf.argmax(t, 1))
accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))
init = tf.global_variables_initializer()
accuracy_train_history = []
with tf.Session() as sess:
    sess.run(init)
    for iteration in range(n_learning_iters):
        offset = (iteration*batch_size)%(NUM_TRAINING_EXAMPLES-batch_size)
        dictionary = get_dict(X_train[offset:(offset+batch_size)], t_train[offset:(offset+batch_size)])
        sess.run(train_step, feed_dict=dictionary)
        accuracy_train_history.append(accuracy.eval(feed_dict=dictionary))
    accuracy_train = accuracy.eval(feel_dict={
        X: X_train[:NUM_TRAINING_EXAMPLES],
        t: t_train[:NUM_TRAINING_EXAMPLES]
    })
    accuracy_test = accuracy.eval(feel_dict={
        X: X_test[:NUM_TEST_EXAMPLES],
        t: t_test[:NUM_TEST_EXAMPLES]
    })
    predictions = y.eval(feed_dict={
        X: X_test[:NUM_TEST_EXAMPLES]
    })