In [1]:
import numpy  as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron

In [2]:
iris = load_iris()
x = iris.data[:, [2,3]] # petal length, petal width
y = (iris.target == 0).astype(np.int) # iris setosa?

In [3]:
per_clf = Perceptron(random_state = 42)
per_clf.fit(x, y)
y_pred = per_clf.predict([[2,0.5]])

In [4]:
# perceptron learning algorithm strongly resembles Stochastic Gradient
# Descent. scikit_learn perceptron class is equivalent to using an 
# SGDClassifier with the following hyperparameters: loss = 'perceptron',
# learning_rate = 'constant', eta0 = 1(the learning rate) and penalty = None
# (no regularization).
import sklearn as sk

## TRANIING AN MLP WITH TENSORFLOW'S HIGH-LEVEL API

In [5]:
import tensorflow as tf

In [9]:
import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf
from sklearn.cross_validation import train_test_split
x_train, x_test,y_train, y_test = train_test_split(x,y, train_size = 0.2, random_state = 42)

from sklearn.preprocessing import MinMaxScaler
scale = MinMaxScaler()

feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(scale.fit_transform(x_train))
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units = [300,100], n_classes = 10,
                                        feature_columns = feature_cols)
dnn_clf = tf.contrib.learn.SKCompat(dnn_clf) # if Tensorflow <= 1.1
dnn_clf.fit(x_train, y_train, batch_size = 50, steps = 40000)


from sklearn.metrics import accuracy_score
y_pred = dnn_clf.predict(scale.fit_transform(x_test))
accuracy_score(y_test, y_pred['classes'])


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000000009222400>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'C:\\Users\\ACER\\AppData\\Local\\Temp\\tmpc03kpxuk'}
Instructions for updating:
Please switch to tf.train.get_global_step
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\ACER\AppData\Local\Temp\tmpc03kpxuk\model.ckpt.
INFO:tensorflow:loss = 2.90583, step = 1
INFO:tensorflow:global_step/

0.35833333333333334

## Training a DNN using plain TensorFlow

In [10]:
# construction phase
# in this section we will build the same model as before using this
# API, and we will implement the gradient Descent to train it on the
# MNIST dataset.
# the first step is the construction phase, building the tensorflow graph.
# The second step is the execution phase, where you actually run the graph
# mto train the model

In [12]:
# construction phase
# starting by specifying the number of inputs and outputs, and set the number
# of hidden neurons in each layer:

import tensorflow as tf
n_inputs = 28*28 # MNIST
n_hidden1 = 300
n_hidden2= 100
n_outputs = 10

# we can use placeholder nodes to represent the training data and 
# target data. The shape of x is only partially defined.
# since we do not know the number of training batch for x and y
# we set the shape for x to be (none, inputs) and y as (none).

x = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "x")
y = tf.placeholder(tf.int64, shape = (None), name = "y")

# the placeholder X will act as the input layer; during the execution phase
# it will be replaced with one training batch at a time.
# we need to create the two hidden layers and the output layers.
# The two hidden layers are almost identical: they differ only by the inputs
# they are connected to and by the number of neurons they contain.

# the output layer is also very similar, but it uses a softmax activation function
# instead of a ReLU activiation function.

#The function neuron_layer() is used to create one layer at a time.

def neuron_layer(x, n_neurons, name, activation = None):
    with tf.name_scope(name):
        n_inputs = int(x.get_shape()[1])
        stddev = 2/np.sqrt(n_inputs + n_neurons)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev = stddev)
        W = tf.Variable(init, name = "kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name = "bias")
        z = tf.matmul(x,W) + b
        if activation is not None:
            return activation(z)
        
        else:
            return z
# if activation parameter is provided, such as tf.nn.relu(i.e.max(0,Z)),
# then the code returns activation(Z), or else it just returns Z.

# since we have crated the neuron layer. we can now create the deep
# neural network.

with tf.name_scope("dnn"):
    hidden1 = neuron_layer(x, n_hidden1, name = "hidden1", 
                          activation = tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name = "hidden2", 
                          activation = tf.nn.relu)
    logits = neuron_layer(hidden2, n_outputs, name = "outputs")
    
    
# as you might expect tensorflow comes with many handy function
# TensorFlow's tf.layers.dense() function (previously called tf.contrib.layers.fully_connected())
#creates a fully connected layer, where all the inputs are connected to all
# neurons in the layer.

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(x, n_hidden1, name = "hidden1", activation = tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name = "hidden2", activation = tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name = "outputs")
    
# we need to define the cost function that we will use to train the neural network model.
# we will use cross entropy. It penalizes models that estimate a low probability for the 
# target class.

# using the sparse_soft_max_cross_entropy_with_logits(), it computes
# the cross entropy based on the logits.

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy, name = 'loss')
    
# this computes the cross entropy based on the "logit".

# now we have the neural network model, we have the cost function and
# now we need to define a GradientDescentOptimizer that will tweak the 
# model parameters to minimize the cost function.

learing_rate = 0.01
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
# evaluation and performance measure of the neural network model.
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y,1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    #  create node to initialize all variables and also create a saver
    # to save our trained model parameters to disk.
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    # this concludes the construction phase.
    
# we created a function to build a neuron layer, we used it to create the
# DNN, we defined the cost function, we created an optimizer, and finally we
# defined the performance measure.


ValueError: Variable hidden1/kernel already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "C:\Users\ACER\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access
  File "C:\Users\ACER\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 2956, in create_op
    op_def=op_def)
  File "C:\Users\ACER\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)


In [None]:
# execution phase

# first let's load the MNIST
# Tensorflow offers its own helper that fetches the data, scales it 
# (between 0 and 1), shuffle it, and provides a simple function to load one
# mini-batch a time. moreover the data has been split into training, validation
# and test set.

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(r"C:\Users\ACER\Desktop\pyreach\Life Data\Tensorflow")

In [None]:
# now defining the number of epochs that we want to run as well as the
# size of the mini-batches:
n_epochs = 40
batch_size = 50

# train the model

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            x_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict = {x: x_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict = {x: x_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict = {x: mnist.validation.images, 
                                            y: mnist.validation.labels})
        print (epoch, "train accuracy: ", acc_train, "val_accuracy: ", acc_val)
    save_path = saver.save(sess, "C:\Users\ACER\Desktop\pyreach\Life Data\Tensorflow\neural_model.ckpt")
    

In [None]:
# using the Neural Network
# now that the neural network is trained, we can now use it to make
# predictions.

with tf.Session() as sess:
    saver.restore(sess, "rC:\Users\ACER\Desktop\pyreach\Life Data\Tensorflow\neural_model.ckpt")
    x_new_scaled = [....] # some new images to be predicted
    z = logits.eval(feed_dict = {x: x_new_scaled})
    y_pred = np.argmax(z, axis = 1)
    
# parameter hypertuning, through the use of oscar or randomized search instead of gridsearch
# and cross_validation.

# Number of hidden layers.
# 

In [1]:
from sklearn.datasets import load_iris
iris = load_iris()
x, y = iris.data, iris.target

import tensorflow as tf
n_hidden1 = 10

In [5]:
# using the Xavier initialization strategy we can speed p training
# considerably, and it is one of the tricks that led to the current success
# of deep learning.

# by default, the tf.layers.dense() function uses Xavier intialization (with a uniform)
# distribution). You can change this to He intialization by using the variance_scaling_initializer()
# function like this

he_init = tf.contrib.layers.variance_scaling_initializer()
hidden1 = tf.layers.dense(x, n_hidden1, activation = tf.nn.relu, kernel_initializer = he_init,
                         name = 'hidden1')

AttributeError: 'numpy.dtype' object has no attribute 'base_dtype'

In [None]:
# nonsaturating activation functions 
# the use of leakyRelu function out perfroms the normal ReLU function.
# leakyRelu function out perfroms the relu on large image datasets but on smaller datasets
# it runs the risk of overfitting the training set.

# a 2015 paper by Djork-Arne Clevert et al proposed a new activiation function
# called the exponential linear unit (ELU) that outperformed all the RELU
# variants in their experiments.

# the major drawback of ELU activation function is that it is slower to compute
# than the ReLu and its variants (due to the use of exponential function), but 
# during training this is compensated by the faster convergence rate.

# Tensorflow offers an elu() function that you can use to build your neural network.

hidden1 = tf.layers.dense(x, n_hidden1, activation = tf.nn.elu, name = 'hidden1')

def leaky_relu(z, name = None):
    return tf.maximum(0.01 * z,z, name = name)


hidden1 = tf.layers.dense(x, n_hidden1, activation = leaky_relu, name = "hidden1")

In [None]:
# Batch Normalization

# although using He initialization along with ELU (or any variant of Relu) 
# can significantly reduce the vanishing/exploding gradients problems at the
# beginning of training, it doesn't gurantee that they won't come back during
# training.

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

x = tf.placeholder(tf.float32, shape = (None, n_inputs), name = 'x')

training = tf.placeholder_with_default(False, shape = (), name = 'training')

hidden1 = tf.layers.dense(x, n_hidden1, name = "hidden1")
bn1 = tf.layers.batch_normalization(hidden1, training = training, momentum = 0.9)
bn1_act = tf.nn.elu(bn1)
hidden2 = tf.layers.dense(bn1_act, n_hidden2, name = "hidden2")
bn2 = tf.layers.batch_normalization(hidden2, training= training, momentum = 0.9)
bn2_act = tf.nn.elu(bn2)
logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name = 'outputs')
logits = tf.layers.batch_normalization(logits_before_bn, training = training, 
                                      momentum = 0.9)