#  Chapter 10: Introduction to Artificial Neural Networks
## The Perceptron

Scikit-Learn provides a `Perceptron` class that implements a single LTU network:

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron

iris = load_iris()
X = iris.data[:, (2, 3)]  # petal length, petal width
y = (iris.target == 0).astype(np.int)  # Iris Setosa?

per_clf = Perceptron(random_state = 42)
per_clf.fit(X, y)

y_pred = per_clf.predict([[2, 0.5]])



# Training an MLP with TensorFlow's High-Level API

Simplest way to do it is to use the high-level API TF.Learn, which offers a Scikit-Learn compatible API. The following code trains a DNN for classification with two hidden layers (one with 300 neurons and another with 100) and a softmax output layer with 10 neurons:

In [2]:
import tensorflow as tf

feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units = [300, 100], n_classes = 10,
                                         feature_columns = feature_cols)
dnn_clf = tf.contrib.learn.SKCompat(dnn_clf)  # if TensorFlow >= 1.1
dnn_clf.fit(X_train, y_train, batch_size = 50, steps = 40000)

  return f(*args, **kwds)


NameError: name 'X_train' is not defined

# Training a DNN Using Plain TensorFlow
## Construction Phase

In [7]:
import tensorflow as tf

n_inputs = 28 * 28. # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [8]:
# We need to use placeholder nodes again for the batch gradient descent
X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = 'X')
y = tf.placeholder(tf.int64, shape = (None), name = 'y')

In [9]:
# Placeholder X is the input layer. Need to create the two hidden layers and the output layer. 
# Create a function to create one layer at a time

def neuron_layer(X, n_neurons, name, activation = None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs + n_neurons)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev = stddev)
        W = tf.Variable(init, name = 'kernel')
        b = tf.Variable(tf.zeros([n_neurons]), name = 'bias')
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        return Z

In [10]:
# Do it again, using built-in TF functionality

with tf.name_scope('dnn'):
    hidden1 = tf.layers.dense(X, n_hidden1, name = 'hidden1', activation = tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name = 'hidden2', activation = tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name = 'outputs')

In [11]:
# Need to now define the cost function for training, using cross entropy.
# Gives us a 1D tensor containing the cross entropy for each instance, then use reduce_mean to get a mean cross entropy

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy, name = 'loss')
    


In [12]:
# Next create a GradientDescentOptimizer to tweak the model parameters to minimize the cost function

learning_rate = 0.01

with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [13]:
# Last step in construction is to specify how to evaluate the model. Use accuracy.
# For each instance, check if the prediction is correct

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [14]:
# Create a node to initialize all variables, and create a Saver
init = tf.global_variables_initializer()
saver = tf.train.Saver()

## Execution Phase
First, load MNIST. 

In [16]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


Exception: URL fetch failure on https://s3.amazonaws.com/img-datasets/mnist.npz: None -- [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:749)

In [None]:
# Define number of epochs and size of the mini-batches
n_epochs = 40
batch_size = 50

# Train the model!

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict = {X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict = {X: mnist.validation.images, y: mnist.validation.labels})
        print(epoch, "Train accuracy:", acc_train, "Val accuracy", acc_val)
        
    
# Now that the neural network is trained, can use it to make predictions,
# use the same construction phase, change the execution phase like this

with tf.Session() as sess:
    saver.restore(sess, './my_model_final.ckpt')
    X_new_scaled = [...]  # Some new images (scaled from 0 to 1)
    Z = logits.eval(feed_dict = {X: X_new_scaled})
    y_pred = np.argmax(Z, axis = 1)

## Exercises
### 9

In [16]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split



In [17]:
import pickle
data = pickle.load(open('mnist.pkl', 'rb'), encoding = 'bytes')

(X_train, y_train), (X_test, y_test) = data
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]



In [18]:
n_inputs = 28 * 28. # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

# We need to use placeholder nodes again for the batch gradient descent
X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = 'X')
y = tf.placeholder(tf.int64, shape = (None), name = 'y')

In [38]:
# Do it again, using built-in TF functionality

with tf.name_scope('dnn'):
    hidden1 = tf.layers.dense(X, n_hidden1, name = 'hidden1', activation = tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name = 'hidden2', activation = tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name = 'outputs')
    
# Need to now define the cost function for training, using cross entropy.
# Gives us a 1D tensor containing the cross entropy for each instance, then use reduce_mean to get a mean cross entropy

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy, name = 'loss')
    loss_summary = tf.summary.scalar('log_loss', loss)

ValueError: Variable hidden1/kernel already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "<ipython-input-19-97bc7527b24f>", line 4, in <module>
    hidden1 = tf.layers.dense(X, n_hidden1, name = 'hidden1', activation = tf.nn.relu)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):


In [30]:
# Next create a GradientDescentOptimizer to tweak the model parameters to minimize the cost function

learning_rate = 0.01

with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
# Last step in construction is to specify how to evaluate the model. Use accuracy.
# For each instance, check if the prediction is correct

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

In [31]:
# Create a node to initialize all variables, and create a Saver
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [32]:


from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)



In [33]:


logdir = log_dir("mnist_dnn")



In [34]:
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [35]:
m, n = X_train.shape

In [36]:


def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch



In [37]:
import os

n_epochs = 10001
batch_size = 50
n_batches = int(np.ceil(m / batch_size))

checkpoint_path = "/tmp/my_deep_mnist_model.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./my_deep_mnist_model"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 50

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        # if the checkpoint file exists, restore the model and load the epoch number
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Training was interrupted. Continuing at epoch", start_epoch)
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)

    for epoch in range(start_epoch, n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_valid, y: y_valid})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 5 == 0:
            print("Epoch:", epoch,
                  "\tValidation accuracy: {:.3f}%".format(accuracy_val * 100),
                  "\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 5
                if epochs_without_progress > max_epochs_without_progress:
                    print("Early stopping")
                    break

NameError: name 'loss_summary' is not defined

In [9]:
import pickle
data = pickle.load(open('mnist.pkl', 'rb'), encoding = 'bytes')
(x_train, y_train), (x_test, y_test) = data