# Deep neural networks training

Activation (non-linear) functions that do not saturate:<br>
Rectifier Linear Unit, ReLU: $y=max(0,x)$, $y \in [0,\infty]$, learning rate $\alpha \rightarrow 0$<br>
Leaky ReLU: $y=max(s·x,x)$ , typically $s=0.01$<br>
Exponential Linear Unit, ELU: $y=s(e^{x}-1$), usually $s=1$. if $s=1$, then $y \in [-1,\infty]$ 

In [1]:
# https://medium.com/@margaretmz/anaconda-jupyter-notebook-tensorflow-and-keras-b91f381405f8
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
dataset = pd.read_csv('../datasets/regression/rating-cereals.csv')
dataset.columns = [re.sub('[^a-zA-Z]*', '', column) for column in dataset.columns]
INPUT = len(dataset.columns) - 1
OUTPUT = 1
scaled_dataset = (dataset-dataset.min())/(dataset.max()-dataset.min())
shuffle_scaled_dataset = shuffle(scaled_dataset)
shuffle_scaled_dataset

Unnamed: 0,calories,protein,fat,sodium,fiber,rating
12,0.545455,0.4,0.4,0.437500,0.142857,0.295490
46,0.636364,0.2,0.2,0.593750,0.000000,0.157033
71,0.545455,0.0,0.2,0.437500,0.000000,0.128340
5,0.545455,0.2,0.0,0.390625,0.071429,0.199985
3,0.000000,0.6,0.0,0.437500,1.000000,1.000000
33,0.636364,0.0,0.4,0.687500,0.071429,0.050599
40,0.545455,0.2,0.2,0.562500,0.000000,0.114875
44,1.000000,0.4,0.4,0.468750,0.214286,0.162175
15,0.454545,0.2,0.0,0.906250,0.071429,0.367694
27,0.545455,0.0,0.2,0.421875,0.000000,0.131941


In [3]:
nrows = len(shuffle_scaled_dataset)
train, rest = train_test_split(shuffle_scaled_dataset, train_size=0.7, test_size=0.3, shuffle=False)
validation, test = train_test_split(rest, train_size=0.7, test_size=0.3, shuffle=False)
print('Train length:', len(train), '\nValidation length:', len(validation), '\nTest length:', len(test))

Train length: 52 
Validation length: 16 
Test length: 7


In [4]:
# This sentence can be generalized given the initial split process.
n_hidden_structure = [4, 3, 1] # Distribution of the neurons along the hidden layers.
epochs = 100 # Number of epochs of the training process of a neural network.
batch_size = 4 # Batch size is the subgroups that will be created from the training dataframe.
learning_rate = 0.01

n_learning_iters = int((len(train)/batch_size)+1)*epochs
X = tf.placeholder(dtype=tf.float32, shape=(None, INPUT), name='X')
t = tf.placeholder(dtype=tf.float32, shape=(None, OUTPUT), name="t")

In [6]:
hidden_layers = []
hidden_layers.append(tf.layers.dense(X, n_hidden_structure[0], activation=tf.nn.relu))
i = 0
for layer in n_hidden_structure[1:]:
    hidden_layers.append(tf.layers.dense(hidden_layers[i], layer, activation=tf.nn.relu))
    i += 1
net_out = tf.layers.dense(hidden_layers[len(hidden_layers)-1], OUTPUT, name="y")

## Regression neural network

In [None]:
y = net_out

## Classification neural network

In [None]:
y = tf.nn.softmax(logits=net_out, name="y")
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=t, logits=net_out)
mean_log_loss = tf.reduce_mean(cross_entropy, name="cost")
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(mean_log_loss)
correct_predictions = tf.equal(tf.argmax(y, 1), tf.argmax(t, 1))
accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

In [None]:
init = tf.global_variables_initializer()
accuracy_train_history = []
with tf.Session() as sess:
    sess.run(init)
    for iteration in range(n_learning_iters):
        offset = (iteration*batch_size)%(len(train)-batch_size)
        