In [4]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedShuffleSplit

tf.reset_default_graph()

n_samples = 10000
n_hidden_1 = 300
n_hidden_2 = 100
n_outputs = 10
learning_rate = 0.01

digits = load_digits()
digits_raw = digits['data']
digits_cls = digits['target']
digits_input = np.c_[digits_raw, digits_cls]

sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
for train_indices, test_indices in sss.split(digits_input, digits_cls):
    digits_train = digits_input[train_indices]
    digits_test = digits_input[test_indices]

X = tf.placeholder(name="X", shape=(None, digits_raw.shape[1]), dtype=tf.float32)
Y = tf.placeholder(name="Y", shape=(None), dtype=tf.int32)

with tf.name_scope("dnn"):
    hidden_1 = tf.layers.dense(X, n_hidden_1, name="hidden_1", activation=tf.nn.elu)
    hidden_2 = tf.layers.dense(hidden_1, n_hidden_2, name="hidden_2", activation=tf.nn.elu)
    logits = tf.layers.dense(hidden_2, n_outputs, name="output")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=Y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, Y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()


Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.


In [6]:
import numpy as np
n_epochs = 40
batch_size = 50

class RandomBatch():
    def __init__(self, data, size):
        self.data = data
        self.b_sz = size
        self.iter_cnt = 0
        self.v = range(0, len(data))
        
    def __iter__(self):
        self.iter_cnt = 0
        return self
        
    def next(self):
        if self.iter_cnt < (len(self.v) / self.b_sz) :
            self.iter_cnt = self.iter_cnt + 1
            return self.data[np.array(np.random.choice(self.v, self.b_sz, replace=False))]
        else:
            raise StopIteration

            
from datetime import datetime

random_batch = RandomBatch(digits_input, batch_size)
summary_writer = tf.summary.FileWriter('/home/tf_logs/dnn_{}_{}_{}'.format(datetime.utcnow().strftime("%Y%m%d%H%M%S"),
                                                                           n_epochs, 
                                                                           batch_size), graph=tf.get_default_graph())

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch in iter(random_batch):
            x, y = batch[:,:-1], batch[:,-1:].reshape(batch_size)
            sess.run(train_op, feed_dict={X: x, Y: y})
        summary = tf.Summary()
        
        acc_train = accuracy.eval(feed_dict={X: digits_train[:,:-1], Y: digits_train[:,-1:].flatten()})
        acc_test = accuracy.eval(feed_dict={X: digits_test[:,:-1], Y: digits_test[:,-1:].flatten()})
        summary.value.add(tag="acc_train", simple_value=acc_train)
        summary.value.add(tag="acc_test", simple_value=acc_test)
        summary_writer.add_summary(summary, epoch * (n_samples / batch_size))
        
    saver.save(sess, '/home/tf_logs/model/dnn_mnist.ckpt')

summary_writer.close()
        


# Chapter 10. ANN
* ANN has been not very successful for a while
* has become popular nowadays due to..
  * Huge Data
  * Comupting Power
  * Training Algorithm


## Artificial neuron
* Simple model of the biological neuron
* output is activated if certain number of inputs are activated

## Perceptron 
* LTU (Linear Threshold Unit)
  * composed of weighted sum of inputs
  * activate output by applying step function over the weighted sum
* Training of Perceptron
  * Hebb's rule 
    * *"Cells that fire together, wire together"*
  * by reinforcing weight of the inputs which contribute correct prediction
  * unlike logistic regression, perceptron doesn't output probability
    * that is why logictic regression is prefered
    
* Weakness 
  * XOR problem 
    * unable to solve XOR problem. 
    * many researcher discarded perceptron with disappointment 
    * Turns out to be solvable by stacking multiple perceptron (MLP)


In [11]:
## Perceptron in Scikit learn 
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron

iris = load_iris()
x = iris.data[:, (2,3)]
y = (iris.target == 0).astype(np.int)

per_clf = Perceptron(random_state=42)
per_clf.fit(x,y)

y_pred = per_clf.predict([[2, 0.5]])
print(y_pred)

# Perceptron = SGDClassifier in scikit learn

[1]



### MLP and backpropagation
* MLP Composed of..
  * one Input Layer
  * one or multiple hidden layer
  * one output layer 
* Every layer has bias neuron which is fully connected to the next layer
* When ANN has two or more hidden layer, it's called DNN 
* Backpropagation 
  * Training algorithm for MLP 
  * measure the contribution of each input for the error, and adjust the weight of the input (with optimization algorithm e.g. gradient descent) while propagating error to backwards from the output of network
  * Step function -> Logitic function (sigmoid)
    * Not appropriate for backpropagation (not suitable for gradient descent)
    * sigmoid has non zero derivatives everywhere
* Activation functions
  * tanh (hyperbolic tangent)
  * ReLU 

In [33]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score

digits = load_digits()
digits_data = digits['data'].astype(np.float32)
digits_label = digits['target'].astype(np.int32)

feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(digits_data)
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300,100], n_classes=10, optimizer= tf.train.AdamOptimizer,dropout=0.5, feature_columns=feature_cols,activation_fn=tf.nn.elu)
dnn_clf = tf.contrib.learn.SKCompat(dnn_clf)

dnn_clf.fit(digits_data, digits_label, batch_size=50, steps=1000)
y_pred = dnn_clf.predict(digits_data)
accuracy_score(y_pred['classes'], digits_label)




INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_train_distribute': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f51f6a7a050>, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_model_dir': '/tmp/tmpV3olr7', '_tf_random_seed': None, '_master': '', '_device_fn': None, '_session_creation_timeout_secs': 7200, '_num_worker_replicas': 0, '_task_id': 0, '_log_step_count_steps': 100, '_experimental_max_worker_delay_secs': None, '_evaluation_master': '', '_eval_distribute': None, '_environment': 'local', '_save_summary_steps': 100}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done runni

0.996661101836394

## Building DNN with Tensorflow Low level API 

In [34]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedShuffleSplit

tf.reset_default_graph()

n_samples = 10000
n_hidden_1 = 300
n_hidden_2 = 100
n_outputs = 10
learning_rate = 0.01

digits = load_digits()
digits_raw = digits['data']
digits_cls = digits['target']
digits_input = np.c_[digits_raw, digits_cls]

sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
for train_indices, test_indices in sss.split(digits_input, digits_cls):
    digits_train = digits_input[train_indices]
    digits_test = digits_input[test_indices]

X = tf.placeholder(name="X", shape=(None, digits_raw.shape[1]), dtype=tf.float32)
Y = tf.placeholder(name="Y", shape=(None), dtype=tf.int32)

with tf.name_scope("dnn"):
    hidden_1 = tf.layers.dense(X, n_hidden_1, name="hidden_1", activation=tf.nn.elu)
    hidden_2 = tf.layers.dense(hidden_1, n_hidden_2, name="hidden_2", activation=tf.nn.elu)
    logits = tf.layers.dense(hidden_2, n_outputs, name="output")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=Y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, Y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()


In [36]:
import numpy as np
n_epochs = 40
batch_size = 50

class RandomBatch():
    def __init__(self, data, size):
        self.data = data
        self.b_sz = size
        self.iter_cnt = 0
        self.v = range(0, len(data))
        
    def __iter__(self):
        self.iter_cnt = 0
        return self
        
    def next(self):
        if self.iter_cnt < (len(self.v) / self.b_sz) :
            self.iter_cnt = self.iter_cnt + 1
            return self.data[np.array(np.random.choice(self.v, self.b_sz, replace=False))]
        else:
            raise StopIteration

            
from datetime import datetime

random_batch = RandomBatch(digits_input, batch_size)
summary_writer = tf.summary.FileWriter('/home/tf_logs/dnn_{}_{}_{}'.format(datetime.utcnow().strftime("%Y%m%d%H%M%S"),
                                                                           n_epochs, 
                                                                           batch_size), graph=tf.get_default_graph())

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch in iter(random_batch):
            x, y = batch[:,:-1], batch[:,-1:].reshape(batch_size)
            sess.run(train_op, feed_dict={X: x, Y: y})
        summary = tf.Summary()
        
        acc_train = accuracy.eval(feed_dict={X: digits_train[:,:-1], Y: digits_train[:,-1:].flatten()})
        acc_test = accuracy.eval(feed_dict={X: digits_test[:,:-1], Y: digits_test[:,-1:].flatten()})
        summary.value.add(tag="acc_train", simple_value=acc_train)
        summary.value.add(tag="acc_test", simple_value=acc_test)
        summary_writer.add_summary(summary, epoch * (n_samples / batch_size))
        
    saver.save(sess, '/home/tf_logs/model/dnn_mnist.ckpt')

summary_writer.close()       
