<a href="https://colab.research.google.com/github/hanocha/try-tensorflow/blob/master/try_cnn_with_low_level_api.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np

mnist = tf.keras.datasets.mnist

(X_data, y_data), (X_test, y_test) = mnist.load_data()

X_train, y_train = X_data[:50000, :], y_data[:50000]
X_valid, y_valid = X_data[50000:, :], y_data[50000:]

X_train = X_train.reshape([50000, -1])
X_valid = X_valid.reshape([10000, -1])
X_test = X_test.reshape([10000, -1])

print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)

mean_vals = np.mean(X_train, axis=0)
std_val = np.std(X_train)

X_train_centered = (X_train - mean_vals) / std_val
X_valid_centered = X_valid - mean_vals
X_test_centered = (X_test - mean_vals) / std_val

print(X_train_centered.shape, y_train.shape)
print(X_valid_centered.shape, y_valid.shape)
print(X_test_centered.shape, y_test.shape)

print(y_train)
print(y_train.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(50000, 784)
(10000, 784)
(10000, 784)
(50000, 784) (50000,)
(10000, 784) (10000,)
(10000, 784) (10000,)
[5 0 4 ... 8 4 8]
(50000,)


In [0]:
def batch_generator(X, y, batch_size=64, shuffle=False, random_seed=None):
  idx = np.arange(y.shape[0])

  if shuffle:
    rng = np.random.RandomState(random_seed)
    rng.shuffle(idx)
    X = X[idx]
    y = y[idx]

  for i in range(0, X.shape[0], batch_size):
    yield(
        X[i:i+batch_size, :],
        y[i:i+batch_size],
    )

In [0]:
def conv_layer(input_tensor, name, kernel_size, n_output_channels, padding_mode='SAME', strides=(1, 1, 1, 1)):
  with tf.variable_scope(name):
    input_shape = input_tensor.get_shape().as_list()
    n_input_channels = input_shape[-1]

    weights_shape = (
        list(kernel_size) + [n_input_channels, n_output_channels]
    )

    weights = tf.get_variable(name='_weights', shape=weights_shape)
    print(weights)

    biases = tf.get_variable(name='_biases', initializer=tf.zeros(shape=[n_output_channels]))
    print(biases)

    conv = tf.nn.conv2d(
        input = input_tensor,
        filter = weights,
        strides = strides,
        padding = padding_mode,
    )
    print(conv)

    conv = tf.nn.bias_add(conv, biases, name='net_pre-activation')
    print(conv)

    conv = tf.nn.relu(conv, name='activation')
    print(conv)

    return conv

In [5]:
g = tf.Graph()

with g.as_default():
  x = tf.placeholder(
      tf.float32,
      shape=[None, 28, 28, 1],
  )

  conv_layer(x, name='convtest', kernel_size=(3, 3), n_output_channels=32)

del g, x

<tf.Variable 'convtest/_weights:0' shape=(3, 3, 1, 32) dtype=float32_ref>
<tf.Variable 'convtest/_biases:0' shape=(32,) dtype=float32_ref>
Tensor("convtest/Conv2D:0", shape=(?, 28, 28, 32), dtype=float32)
Tensor("convtest/net_pre-activation:0", shape=(?, 28, 28, 32), dtype=float32)
Tensor("convtest/activation:0", shape=(?, 28, 28, 32), dtype=float32)


In [0]:
def fc_layer(input_tensor, name, n_output_units, activation_fn=None):
  with tf.variable_scope(name):
    input_shape = input_tensor.get_shape().as_list()[1:]
    n_input_units = np.prod(input_shape)

    if len(input_shape) > 1:
      input_tensor = tf.reshape(input_tensor, shape=(-1, n_input_units))

    weights_shape = [n_input_units, n_output_units]
    weights = tf.get_variable(name='_weights', shape=weights_shape)
    print(weights)
    
    biases = tf.get_variable(name='_biases', initializer=tf.zeros(shape=[n_output_units]))
    print(biases)

    layer = tf.matmul(input_tensor, weights)
    print(layer)

    layer = tf.nn.bias_add(layer, biases, name='net_pre-activation')
    print(layer)

    if activation_fn is None:
      return layer
    
    layer = activation_fn(layer, name='activation')
    print(layer)
    return layer

In [7]:
g = tf.Graph()

with g.as_default():
  x = tf.placeholder(
      tf.float32,
      shape=[None, 28, 28, 1],
  )

  fc_layer(x, name='fctest', n_output_units=32, activation_fn=tf.nn.relu)

del g, x

<tf.Variable 'fctest/_weights:0' shape=(784, 32) dtype=float32_ref>
<tf.Variable 'fctest/_biases:0' shape=(32,) dtype=float32_ref>
Tensor("fctest/MatMul:0", shape=(?, 32), dtype=float32)
Tensor("fctest/net_pre-activation:0", shape=(?, 32), dtype=float32)
Tensor("fctest/activation:0", shape=(?, 32), dtype=float32)


In [0]:
def build_cnn():
  tf_x = tf.placeholder(tf.float32, shape=[None, 784], name='tf_x')
  tf_y = tf.placeholder(tf.int32, shape=[None], name='tf_y')

  tf_x_image = tf.reshape(tf_x, shape=[-1, 28, 28, 1], name='tf_x_reshaped')
  tf_y_onehot = tf.one_hot(
      indices=tf_y,
      depth=10,
      dtype=tf.float32,
      name='tf_y_onehot',
  )

  h1 = conv_layer(
      tf_x_image,
      name='conv_1',
      kernel_size=(5,5),
      padding_mode='VALID',
      n_output_channels=32,
  )
  
  h1_pool = tf.nn.max_pool(
      h1,
      ksize=[1, 2, 2, 1],
      strides=[1, 2, 2, 1],
      padding='SAME',
  )

  h2 = conv_layer(
      h1_pool,
      name='conv_2',
      kernel_size=(5,5),
      padding_mode='VALID',
      n_output_channels=64,
  )

  h2_pool = tf.nn.max_pool(
      h2,
      ksize=[1, 2, 2, 1],
      strides=[1, 2, 2, 1],
      padding='SAME',
  )

  h3 = fc_layer(
      h2_pool,
      name='fc_3',
      n_output_units=1024,
      activation_fn=tf.nn.relu,
  )

  keep_prob = tf.placeholder(tf.float32, name='fc_keep_prob')
  h3_drop = tf.nn.dropout(h3, keep_prob=keep_prob, name='dropout_layer')

  h4 = fc_layer(
      h3_drop,
      name='fc_4',
      n_output_units=10,
      activation_fn=None,
  )

  predictions = {
      'probabilities': tf.nn.softmax(h4, name='probabilities'),
      'labels': tf.cast(tf.argmax(h4, axis=1), tf.int32, name='labels'),
  }

  cross_entropy_loss = tf.reduce_mean(
      tf.nn.softmax_cross_entropy_with_logits(
          logits=h4,
          labels=tf_y_onehot,
      ),
      name='cross_entropy_loss',
  )

  optimizer = tf.train.AdamOptimizer(leaning_rate)
  optimizer = optimizer.minimize(cross_entropy_loss, name='train_op')

  correct_predictions = tf.equal(predictions['labels'], tf_y, name='correct_preds')
  accuracy = tf.reduce_mean(
      tf.cast(correct_predictions, tf.float32),
      name='accuracy',
  )

In [0]:
def save(saver, sess, epoch, path='./model/'):
  if not os.path.isdir(path):
    os.makedirs(path)

  saver.save(sess, os.path.join(path, 'cnn-model.ckpt'), global_step=epoch)


def load(saver, sess, path, epoch):
  saver.restore(sess, os.path.join(path, 'cnn-model.ckpt-%d' % epoch))


def train(sess, training_set, validation_set=None, initialize=True, epochs=20, shuffle=True, dropout=0.5, random_seed=None):
  X_data = np.array(training_set[0])
  y_data = np.array(training_set[1])
  training_loss = []

  if initialize:
    sess.run(tf.global_variables_initializer())

  np.random.seed(random_seed)

  for epoch in range(1, epochs + 1):
    batch_gen = batch_generator(X_data, y_data, shuffle=shuffle)
    avg_loss = 0.0

    for i, (batch_X, batch_y) in enumerate(batch_gen):
      feed = {
          'tf_x:0': batch_X,
          'tf_y:0': batch_y,
          'fc_keep_prob:0': dropout,
      }
      loss, _ = sess.run(
          ['cross_entropy_loss:0', 'train_op'],
          feed_dict=feed,
      )

      avg_loss += loss

    training_loss.append(avg_loss / (i+1))
    print('epoch %02d training avg loss: %7.3f' % (epoch, avg_loss), end=' ')

    if validation_set is not None:
      feed = {
          'tf_x:0': validation_set[0],
          'tf_y:0': validation_set[1],
          'fc_keep_prob:0': 1.0,
      }

      valid_acc = sess.run('accuracy:0', feed_dict=feed)
      print(' validation acc: %7.3f' % valid_acc)

    else:
      print()


def predict(sess, X_test, return_proba=False):
  feed = {
      'tf_x:0': X_test,
      'fc_keep_prob:0': 1.0,
  }

  if return_proba:
    return sess.run('probabilities:0', feed_dict=feed)
  else:
    return sess.run('labels:0', feed_dict=feed)

In [13]:
leaning_rate = 1e-4
random_seed = 123

g = tf.Graph()
with g.as_default():
  tf.set_random_seed(random_seed)
  build_cnn()
  saver = tf.train.Saver()

<tf.Variable 'conv_1/_weights:0' shape=(5, 5, 1, 32) dtype=float32_ref>
<tf.Variable 'conv_1/_biases:0' shape=(32,) dtype=float32_ref>
Tensor("conv_1/Conv2D:0", shape=(?, 24, 24, 32), dtype=float32)
Tensor("conv_1/net_pre-activation:0", shape=(?, 24, 24, 32), dtype=float32)
Tensor("conv_1/activation:0", shape=(?, 24, 24, 32), dtype=float32)
<tf.Variable 'conv_2/_weights:0' shape=(5, 5, 32, 64) dtype=float32_ref>
<tf.Variable 'conv_2/_biases:0' shape=(64,) dtype=float32_ref>
Tensor("conv_2/Conv2D:0", shape=(?, 8, 8, 64), dtype=float32)
Tensor("conv_2/net_pre-activation:0", shape=(?, 8, 8, 64), dtype=float32)
Tensor("conv_2/activation:0", shape=(?, 8, 8, 64), dtype=float32)
<tf.Variable 'fc_3/_weights:0' shape=(1024, 1024) dtype=float32_ref>
<tf.Variable 'fc_3/_biases:0' shape=(1024,) dtype=float32_ref>
Tensor("fc_3/MatMul:0", shape=(?, 1024), dtype=float32)
Tensor("fc_3/net_pre-activation:0", shape=(?, 1024), dtype=float32)
Tensor("fc_3/activation:0", shape=(?, 1024), dtype=float32)
Ins

In [26]:
with tf.Session(graph=g) as sess:
  train(
      sess,
      training_set=(X_train_centered, y_train),
      validation_set=(X_valid_centered, y_valid),
      initialize=True,
      random_seed=123,
  )

  save(saver, sess, epoch=20)

epoch 01 training avg loss: 271.591  validation acc:   0.974
epoch 02 training avg loss:  74.593  validation acc:   0.983
epoch 03 training avg loss:  51.872  validation acc:   0.985
epoch 04 training avg loss:  39.070  validation acc:   0.987
epoch 05 training avg loss:  32.101  validation acc:   0.988
epoch 06 training avg loss:  26.963  validation acc:   0.988
epoch 07 training avg loss:  23.310  validation acc:   0.988
epoch 08 training avg loss:  20.142  validation acc:   0.990
epoch 09 training avg loss:  17.318  validation acc:   0.992
epoch 10 training avg loss:  15.006  validation acc:   0.992
epoch 11 training avg loss:  12.027  validation acc:   0.989
epoch 12 training avg loss:  11.698  validation acc:   0.990
 validation acc:   0.992
epoch 14 training avg loss:   8.956  validation acc:   0.991
epoch 15 training avg loss:   7.381  validation acc:   0.992
epoch 16 training avg loss:   6.850  validation acc:   0.991
epoch 17 training avg loss:   6.977  validation acc:   0.991

NameError: ignored