## Feed Forward Soft Max Model Example

This notebook demonstrates a feed forward model using a soft-max output with cross-entropy cost.

In [1]:
import tensorflow as tf
import numpy as np

---
### Generate Data

In [2]:
from sklearn import datasets

In [3]:
iris_dataset = datasets.load_iris()

In [4]:
iris_dataset.data.shape

(150, 4)

In [5]:
iris_dataset.target.shape

(150,)

### Model Class

We can use classes to encapsulate tensorflow models. The below skeleton shows one way of using classes with tensorflow.

The Model class has methods, variables, and properties that capture both the graph and the tensorflow session

#### Tensorflow Graph

A tensorflow graph is a computational graph of different tensorflow operations. It defines the computation and how different operations and tensors relate, but it doesn't actually do the computation or store the values of the variables. All of that magic happens within the tensorflow session.

#### Tensorflow Session

A tensorflow session is the context where values for tensorflow variables are instantiated and computations are run. So if you are saving a model's weights, you are actually saving the weights of the tensorflow session. If you are loading a model's weights, you need to load them into a session. When variables are initialized, that has to happen within a session. In a way, the graph is stateless. State is stored in sessions. The session also takes care of running computations, so if you are running training, those need to be run in the session.

A session is instantiated with a graph, typically the current default graph. A session is only able to run computations on the graph that is tied to the session.

In [6]:
class DeepLearningModel():
    def __init__():
        return
    
    def gen_uniform_random_weights(self, k_out, k_in, scale, dtype=np.float32):
        """
        Returns weights of shape (k_in, k_out) initialized between [-scale, scale]
        """
        return ((np.random.rand(k_in, k_out) * 2 - 1) * scale).astype(dtype)

    def gen_random_weights_tanh(self, k_out, k_in, dtype=np.float32):
        scale = (6. / (k_in + k_out)) ** .5
        return self.gen_uniform_random_weights(k_out, k_in, scale, dtype=dtype)

    def gen_random_weights_sigmoid(self, k_out, k_in, dtype=np.float32):
        scale = 4. * (6. / (k_in + k_out)) ** .5
        return self.gen_uniform_random_weights(k_out, k_in, scale, dtype=dtype)

    def gen_random_weights_reLu(self, k_out, k_in, dtype=np.float32):
        scale = (2. / (k_in + k_out)) ** .5
        return self.gen_uniform_random_weights(k_out, k_in, scale, dtype=dtype)

    def gen_biases(self, k, dtype=np.float32):
        """
        Initialize biases as zero.
        """
        return np.zeros((k, ), dtype=dtype)
    
    def clip_gradient(self, grad, magnitude=1.0):
        """returns a clipped gradient, where it is between [-magnitude and magnitude]"""
        magnitude = abs(magnitude)
        return tf.maximum(tf.minimum(grad, magnitude), - magnitude)

In [9]:
class FeedForwardSoftMaxModel(DeepLearningModel):
    """
    Tutorial Model
    """
    
    def __init__(self, num_layers, k_hidden, k_input, k_softmax, activation_function=tf.nn.tanh):
        """
        args:
            num_layers: number of hidden layers
            k_hidden: number of units in the hidden layers
            k_input: dimensionality of the input
            k_softmax: dimensionality of the output layer
        """
        self._graph = None
        self._session = None
        self.num_layers = num_layers
        self.k_hidden = k_hidden
        self.k_input = k_input
        self.k_softmax = k_softmax
        self.activation_function = activation_function
        
        self._merged_training_summary = None
        self._merged_validation_summary = None
    
    
    def load_model(self, model_filename):
        with self.graph.as_default():
            model_saver = tf.train.Saver()
        
        self._session = tf.Session(graph=self.graph)
        model_saver.restore(self._session, model_filename)
        return
    
    
    def save_model(self, model_filename):
        with self.graph.as_default():
            model_saver = tf.train.Saver()
            
        model_saver.save(self.session, model_filename)
        
    def create_graph(self):
        self.W = {}
        self.b = {}
        self.Z = {}
        self.A = {}
        self.A_dropped_out = {}
        self.new_W_value = {}
        self.new_b_value = {}
        self.assign_new_W = {}
        self.assign_new_b = {}
                
        self._graph = tf.Graph()
        with self._graph.as_default():
            with tf.name_scope("inputs"):                
                self.learning_rate = tf.placeholder(tf.float32, shape=(), name="learning_rate")
                self.adam_beta1 = tf.placeholder(tf.float32, shape=(), name="adam_beta1")
                self.adam_beta2 = tf.placeholder(tf.float32, shape=(), name="adam_beta2")
                self.adam_epsilon = tf.placeholder(tf.float32, shape=(), name="adam_epsilon")
                self.input_dropout_keep_prob = tf.placeholder(tf.float32, shape=(), name="input_dropout_keep_prob")
                self.hidden_dropout_keep_prob = tf.placeholder(tf.float32, shape=(), name="hidden_dropout_keep_prob")
                
                self.X = tf.placeholder(tf.float32, name="X")
                self.Y = tf.placeholder(tf.int32, shape=(None,), name="Y")
                self.A[0] = tf.identity(self.X, name="A_0")
                self.A_dropped_out[0] = tf.nn.dropout(self.A[0], self.input_dropout_keep_prob, name="A_droppedout_0")
            
            with tf.name_scope("hidden_layers"):
                for layer in range(1, self.num_layers + 1):
                    if layer == 1:
                        k_in = self.k_input
                    else:
                        k_in = self.k_hidden

                    self.W[(layer, layer - 1)] = tf.Variable(self.gen_random_weights_tanh(self.k_hidden, k_in), dtype=tf.float32, name="W_%i_%i" % (layer, layer - 1))
                    self.b[layer] = tf.Variable(self.gen_biases(self.k_hidden), dtype=tf.float32, name="b_%i" % (layer, ))
                    self.Z[layer] = tf.add(tf.matmul(self.A_dropped_out[layer - 1], self.W[(layer, layer - 1)]), self.b[layer], name="Z_%i" % (layer, ))
                    self.A[layer] = self.activation_function(self.Z[layer], name="A_%i" % (layer, ))
                    self.A_dropped_out[layer] = tf.nn.dropout(self.A[layer], self.hidden_dropout_keep_prob, name="A_droppedout_%i" % (layer, ))
                        
            with tf.name_scope("softmax"):
                layer = "softmax"
                self.W[(layer, self.num_layers)] = tf.Variable(self.gen_random_weights_tanh(self.k_softmax, self.k_hidden), dtype=tf.float32, name="W_%s_%i" % (layer, self.num_layers))
                self.b[layer] = tf.Variable(self.gen_biases(self.k_softmax), dtype=tf.float32, name="b_%s" % (layer, ))
                self.Z[layer] = tf.add(tf.matmul(self.A[self.num_layers], self.W[(layer, self.num_layers)]), self.b[layer], name="Z_%s" % (layer, ))
                self.softmax = tf.nn.softmax(self.Z[layer], name="soft_max")
            
            with tf.name_scope("parameter_assignment"):
                for layer_pair in self.W.keys():
                    self.new_W_value[layer_pair] = tf.placeholder(tf.float32)
                    self.assign_new_W[layer_pair] = self.W[layer_pair].assign(self.new_W_value[layer_pair])
                for layer in self.b.keys():
                    self.new_b_value[layer] = tf.placeholder(tf.float32)
                    self.assign_new_b[layer] = self.b[layer].assign(self.new_b_value[layer])

            
            with tf.name_scope("cost"):
                with tf.name_scope("regularization"):
                    self.L2_reg = tf.placeholder(tf.float32, name="L2_reg")
                    for layer in range(1, self.num_layers + 1):
                        if layer == 1:
                            k_in = self.k_input
                            self.cost_L2 = self.L2_reg * tf.reduce_mean(tf.square(self.W[(layer, layer - 1)]))
                        else:
                            k_in = self.k_hidden
                            self.cost_L2 = self.cost_L2 + self.L2_reg * tf.reduce_mean(tf.square(self.W[(layer, layer - 1)]))
                    self.cost_L2 = tf.identity(self.cost_L2, 'cost_L2_regularization')
                
                with tf.name_scope("error"):
                    self.cross_entropy_error = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.Y, logits=self.Z["softmax"]))
                self.total_cost = tf.add(self.cost_L2, self.cross_entropy_error)
            
            with tf.name_scope("optimization"):
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.adam_beta1, beta2=self.adam_beta2, epsilon=self.adam_epsilon)
                self.grads_and_vars = self.optimizer.compute_gradients(self.total_cost)                    
                self.clipped_grads_and_vars = [(self.clip_gradient(gv[0]), gv[1]) for gv in self.grads_and_vars]
                self.update_op = self.optimizer.apply_gradients(self.clipped_grads_and_vars)
                
            self.init_op = tf.global_variables_initializer()
        return
            
    def create_tensorboard_summaries(self):
        with self.graph.as_default():
            with tf.name_scope("summaries"):
                tf.summary.scalar('cross_entropy_error', self.cross_entropy_error, collections=['train'])
                tf.summary.scalar('cost_L2_regularization', self.cost_L2, collections=['train'])

                tf.summary.scalar('cross_entropy_error_validation', self.cross_entropy_error, collections=['validation'])

                for layer in range(1, self.num_layers + 1):
                    tf.summary.histogram("W_%i_%i" % (layer, layer - 1), self.W[(layer, layer - 1)], collections=['train'])
                    tf.summary.histogram("b_%i" % (layer, ), self.b[layer], collections=['train'])
                for layer in ["softmax"]:
                    tf.summary.histogram("W_%s_%i" % (layer, self.num_layers), self.W[(layer, self.num_layers)], collections=['train'])
                    tf.summary.histogram("b_%s" % (layer, ), self.b[layer], collections=['train'])

                self._merged_training_summary = tf.summary.merge_all(key='train')
                self._merged_validation_summary = tf.summary.merge_all(key='validation')
        return
    
    def create_tensorboard_writer(self, tensorboard_directory="./"):
        """I'm not sure if this needs to be within a session"""
        self._tensorboard_writer = tf.summary.FileWriter(tensorboard_directory, graph=self.graph)
    
    def write_graph(self):
        self.tensorboard_writer.add_graph(self.graph)
        return
    
    def init_model(self, adam_beta1=0.9, adam_beta2=0.999):
        self.session.run(self.init_op, 
                         feed_dict={
                             self.adam_beta1: adam_beta1,
                             self.adam_beta2: adam_beta2
                         })
    
    def assign_W(self, layer, value):
        """
        args:
            layer: typically a tuple indicating the output and input layer (output_layer, input_layer), e.g. (2, 1) to indicate the connection from 2 to 1.
        """
        self.session.run(self.assign_new_W[layer], 
                         feed_dict={self.new_W_value[layer]: value})
    
    def assign_b(self, layer, value):
        """
        args:
            layer: typically a tuple indicating the output and input layer (output_layer, input_layer), e.g. (2, 1) to indicate the connection from 2 to 1.
        """
        self.session.run(self.assign_new_b[layer], 
                         feed_dict={self.new_b_value[layer]: value})
    
    def train_model(self, X, Y, learning_rate=1e-2, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, 
                    input_dropout_keep_prob=1.0, hidden_dropout_keep_prob=1.0,
                    L2_reg=1e-4):
        """
        learning_rate: A Tensor or a floating point value. The learning rate.
        beta1: A float value or a constant float tensor. The exponential decay rate for the 1st moment estimates.
        beta2: A float value or a constant float tensor. The exponential decay rate for the 2nd moment estimates.
        epsilon: A small constant for numerical stability. This epsilon is "epsilon hat" in the Kingma and Ba paper (in the formula just before Section 2.1), not the epsilon in Algorithm 1 of the paper.
        """
        self.session.run(self.update_op,
                         feed_dict = {
                             self.X: X,
                             self.Y: Y,
                             self.learning_rate: learning_rate,
                             self.adam_beta1: adam_beta1,
                             self.adam_beta2: adam_beta2,
                             self.adam_epsilon: adam_epsilon,
                             self.input_dropout_keep_prob: input_dropout_keep_prob,
                             self.hidden_dropout_keep_prob: hidden_dropout_keep_prob,
                             self.L2_reg: L2_reg
                         })
        return
    
    def write_validation_summary(self, X, Y, step,
                                 L2_reg=1e-4):
        summary = self.session.run(self.merged_validation_summary,
                                   feed_dict = {
                                       self.X: X,
                                       self.Y: Y,
                                       self.input_dropout_keep_prob: 1.0,
                                       self.hidden_dropout_keep_prob: 1.0,
                                       self.L2_reg: L2_reg
                                   })
        self.tensorboard_writer.add_summary(summary, step)
        return
    
    def write_training_summary(self, X, Y, step,
                               L2_reg=1e-4):
        summary = self.session.run(self.merged_training_summary,
                                   feed_dict = {
                                       self.X: X,
                                       self.Y: Y,
                                       self.input_dropout_keep_prob: 1.0,
                                       self.hidden_dropout_keep_prob: 1.0,
                                       self.L2_reg: L2_reg
                                   })
        self.tensorboard_writer.add_summary(summary, step)
        return
    
    
    @property
    def graph(self):
        if self._graph is None:
            self.create_graph()
        return self._graph
    
    @property
    def session(self):
        if self._session is None:
            self._session = tf.Session(graph=self.graph)
        return self._session
    
    @property
    def merged_training_summary(self):
        if self._merged_training_summary is None:
            self.create_tensorboard_summaries()
        return self._merged_training_summary
    
    @property
    def merged_validation_summary(self):
        if self._merged_validation_summary is None:
            self.create_tensorboard_summaries()
        return self._merged_validation_summary
    
    @property
    def tensorboard_writer(self):
        if self._tensorboard_writer is None:
            self.create_tensorboard_writer()
        return self._tensorboard_writer

In [10]:
num_layers = 2
k_hidden = 10
k_input = 4
k_softmax = 3

In [11]:
# create a model instance with 2 hidden layers and 10 hidden units.

model_a = FeedForwardSoftMaxModel(num_layers, k_hidden, k_input, k_softmax)

### Lazy Properties

The class doesn't actually create the graph or session until the graph and session properties are called. The @property decorator functions above are used to create a graph or session if none exists.

In [12]:
model_a._graph is None

True

In [13]:
model_a._session is None

True

In [14]:
model_a.graph

<tensorflow.python.framework.ops.Graph at 0x115ff2490>

In [15]:
model_a.session

<tensorflow.python.client.session.Session at 0x114f4b410>

In [16]:
model_a.W

{(1, 0): <tf.Variable 'hidden_layers/W_1_0:0' shape=(4, 10) dtype=float32_ref>,
 (2,
  1): <tf.Variable 'hidden_layers/W_2_1:0' shape=(10, 10) dtype=float32_ref>,
 ('softmax',
  2): <tf.Variable 'softmax/W_softmax_2:0' shape=(10, 3) dtype=float32_ref>}

In [17]:
model_a.b

{1: <tf.Variable 'hidden_layers/b_1:0' shape=(10,) dtype=float32_ref>,
 2: <tf.Variable 'hidden_layers/b_2:0' shape=(10,) dtype=float32_ref>,
 'softmax': <tf.Variable 'softmax/b_softmax:0' shape=(3,) dtype=float32_ref>}

In [18]:
model_a.merged_training_summary

<tf.Tensor 'summaries/Merge/MergeSummary:0' shape=() dtype=string>

In [19]:
model_a.merged_validation_summary

<tf.Tensor 'summaries/Merge_1/MergeSummary:0' shape=() dtype=string>

In [20]:
model_a.merged_training_summary

<tf.Tensor 'summaries/Merge/MergeSummary:0' shape=() dtype=string>

In [21]:
model_a.merged_validation_summary

<tf.Tensor 'summaries/Merge_1/MergeSummary:0' shape=() dtype=string>

In [22]:
model_a.init_model()

In [23]:
model_a.session.run(model_a.W)

{(1,
  0): array([[-0.53383142, -0.26800293,  0.30587319,  0.41528845,  0.53037804,
          0.54393131,  0.25059569,  0.3829636 ,  0.52883643,  0.22119667],
        [ 0.43764803, -0.18851139,  0.08263879,  0.5383625 ,  0.39127269,
         -0.2205787 , -0.25064543,  0.48002577, -0.52354157, -0.05719423],
        [ 0.32512787, -0.29318693, -0.02891652, -0.5973413 , -0.50016761,
         -0.18938035,  0.38525584,  0.39774391,  0.615116  , -0.43968138],
        [-0.64172977, -0.60338551, -0.01295396, -0.48267376, -0.33937457,
          0.54366893,  0.39634818,  0.49564573, -0.58831275,  0.43020582]], dtype=float32),
 (2, 1): array([[  4.59960073e-01,  -1.37809038e-01,  -2.03138635e-01,
           1.28261879e-01,  -1.21349864e-01,   5.51811745e-03,
           1.35973215e-01,   1.35233074e-01,   9.69059393e-02,
           5.28214693e-01],
        [  2.33344719e-01,  -4.80115898e-02,   1.37993753e-01,
          -3.29962254e-01,  -2.87121356e-01,  -2.33356997e-01,
          -3.46505344e-02,

In [24]:
model_a.session.run(model_a.b)

{1: array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32),
 2: array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32),
 'softmax': array([ 0.,  0.,  0.], dtype=float32)}

In [25]:
model_a.create_tensorboard_writer("./tensorboard/model_a")

In [26]:
model_a.write_graph()

### Assignment Operations

The class adds some convenience functions for assigning weights. Tensorflow can only assign values to tensor variables using assignment operations, and a combination of a placeholder and assignment operation are used to allow the assignment through a function.

In [27]:
model_a.W

{(1, 0): <tf.Variable 'hidden_layers/W_1_0:0' shape=(4, 10) dtype=float32_ref>,
 (2,
  1): <tf.Variable 'hidden_layers/W_2_1:0' shape=(10, 10) dtype=float32_ref>,
 ('softmax',
  2): <tf.Variable 'softmax/W_softmax_2:0' shape=(10, 3) dtype=float32_ref>}

In [28]:
model_a.W.keys()

[(1, 0), ('softmax', 2), (2, 1)]

In [29]:
model_a.new_W_value

{(1,
  0): <tf.Tensor 'parameter_assignment/Placeholder:0' shape=<unknown> dtype=float32>,
 (2,
  1): <tf.Tensor 'parameter_assignment/Placeholder_2:0' shape=<unknown> dtype=float32>,
 ('softmax',
  2): <tf.Tensor 'parameter_assignment/Placeholder_1:0' shape=<unknown> dtype=float32>}

In [30]:
model_a.assign_W((1, 0), np.ones((4, 10)))

In [31]:
model_a.assign_W(('softmax', 2), np.ones((10, 3)))

In [32]:
model_a.assign_W((2, 1), np.ones((10, 10)))

In [33]:
model_a.session.run(model_a.W)

{(1, 0): array([[ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]], dtype=float32),
 (2, 1): array([[ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]], dtype=float32),
 ('softmax', 2): array([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1., 

In [34]:
model_a.b

{1: <tf.Variable 'hidden_layers/b_1:0' shape=(10,) dtype=float32_ref>,
 2: <tf.Variable 'hidden_layers/b_2:0' shape=(10,) dtype=float32_ref>,
 'softmax': <tf.Variable 'softmax/b_softmax:0' shape=(3,) dtype=float32_ref>}

In [35]:
model_a.new_b_value

{1: <tf.Tensor 'parameter_assignment/Placeholder_3:0' shape=<unknown> dtype=float32>,
 2: <tf.Tensor 'parameter_assignment/Placeholder_4:0' shape=<unknown> dtype=float32>,
 'softmax': <tf.Tensor 'parameter_assignment/Placeholder_5:0' shape=<unknown> dtype=float32>}

In [36]:
model_a.assign_b(1, np.ones((10,)))

In [37]:
model_a.assign_b('softmax', np.ones((3, )))

In [38]:
model_a.assign_b(2, np.ones((10,)))

In [39]:
model_a.session.run(model_a.b)

{1: array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.], dtype=float32),
 2: array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.], dtype=float32),
 'softmax': array([ 1.,  1.,  1.], dtype=float32)}

In [40]:
model_a.save_model("./saved_model/test_saved_model.cpkt")

### Load the saved model into a second model

In [41]:
model_b = FeedForwardSoftMaxModel(num_layers, k_hidden, k_input, k_softmax)

In [42]:
model_b.load_model("./saved_model/test_saved_model.cpkt")

INFO:tensorflow:Restoring parameters from ./saved_model/test_saved_model.cpkt


In [43]:
model_b.W

{(1, 0): <tf.Variable 'hidden_layers/W_1_0:0' shape=(4, 10) dtype=float32_ref>,
 (2,
  1): <tf.Variable 'hidden_layers/W_2_1:0' shape=(10, 10) dtype=float32_ref>,
 ('softmax',
  2): <tf.Variable 'softmax/W_softmax_2:0' shape=(10, 3) dtype=float32_ref>}

In [44]:
model_b.session.run(model_b.W)

{(1, 0): array([[ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]], dtype=float32),
 (2, 1): array([[ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]], dtype=float32),
 ('softmax', 2): array([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1., 

## Model Training

In [45]:
model_c = FeedForwardSoftMaxModel(num_layers, k_hidden, k_input, k_softmax)

In [46]:
model_c.init_model()

In [47]:
model_c.session.run(model_c.b)

{1: array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32),
 2: array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32),
 'softmax': array([ 0.,  0.,  0.], dtype=float32)}

In [48]:
model_c.session.run(model_c.W)

{(1,
  0): array([[ 0.28000289,  0.23105153, -0.33771443, -0.31311759,  0.53268135,
          0.25044283,  0.58518004, -0.59958947, -0.60120815, -0.34895009],
        [ 0.2810252 , -0.17252007, -0.61412835, -0.34781274,  0.37581402,
         -0.59056562,  0.46723518,  0.33566508, -0.43742433, -0.40618837],
        [ 0.22088052,  0.59920073,  0.07000663, -0.03320396,  0.10626927,
         -0.11497881, -0.29285419, -0.43204159,  0.1853268 ,  0.55036724],
        [ 0.01022691,  0.60928577,  0.62619817, -0.48394573, -0.46712208,
         -0.22698082, -0.36513788,  0.37369412,  0.22148553,  0.13890925]], dtype=float32),
 (2,
  1): array([[ 0.3068825 , -0.2885817 ,  0.22892857,  0.31222558,  0.22021452,
          0.30423072,  0.48374948, -0.36762428, -0.52571899,  0.33897048],
        [-0.47151569, -0.47213507,  0.11783093, -0.11754851,  0.41255018,
          0.31946424, -0.21307269, -0.03902682, -0.45915243, -0.38658398],
        [-0.28733075, -0.39249817,  0.05153377,  0.11287121,  0.50044

In [49]:
iris_dataset.data

array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2],
       [ 5. ,  3.6,  1.4,  0.2],
       [ 5.4,  3.9,  1.7,  0.4],
       [ 4.6,  3.4,  1.4,  0.3],
       [ 5. ,  3.4,  1.5,  0.2],
       [ 4.4,  2.9,  1.4,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 5.4,  3.7,  1.5,  0.2],
       [ 4.8,  3.4,  1.6,  0.2],
       [ 4.8,  3. ,  1.4,  0.1],
       [ 4.3,  3. ,  1.1,  0.1],
       [ 5.8,  4. ,  1.2,  0.2],
       [ 5.7,  4.4,  1.5,  0.4],
       [ 5.4,  3.9,  1.3,  0.4],
       [ 5.1,  3.5,  1.4,  0.3],
       [ 5.7,  3.8,  1.7,  0.3],
       [ 5.1,  3.8,  1.5,  0.3],
       [ 5.4,  3.4,  1.7,  0.2],
       [ 5.1,  3.7,  1.5,  0.4],
       [ 4.6,  3.6,  1. ,  0.2],
       [ 5.1,  3.3,  1.7,  0.5],
       [ 4.8,  3.4,  1.9,  0.2],
       [ 5. ,  3. ,  1.6,  0.2],
       [ 5. ,  3.4,  1.6,  0.4],
       [ 5.2,  3.5,  1.5,  0.2],
       [ 5.2,  3.4,  1.4,  0.2],
       [ 4.7,  3.2,  1.6,  0.2],
       [ 4

In [50]:
iris_dataset.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [51]:
model_c.create_tensorboard_writer("./tensorboard/model_c")

In [52]:
model_c.write_graph()

In [53]:
model_c.write_training_summary(iris_dataset.data, iris_dataset.target, 0)

In [54]:
model_c.write_training_summary(iris_dataset.data, iris_dataset.target, 10)

In [55]:
model_c.write_training_summary(iris_dataset.data, iris_dataset.target, 20)

In [58]:
model_c.tensorboard_writer.flush()

In [59]:
model_c.tensorboard_writer.close()

In [60]:
model_c.session.close()