In [1]:
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
# Config the matplotlib backend as plotting inline in IPython
%matplotlib inline

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

  from ._conv import register_converters as _register_converters


In [2]:
graph = tf.Graph()
session = tf.InteractiveSession(graph = graph)
X = tf.placeholder(shape = [1, 10], dtype = tf.float32, name='X')
W = tf.Variable(tf.random_uniform(shape=[10, 5], minval = -0.1,
    maxval = 0.1, dtype = tf.float32), name = 'W')
b = tf.Variable(tf.zeros(shape=[5], dtype = tf.float32), name = 'b')
h = tf.nn.sigmoid(tf.matmul(X, W) + b)
tf.global_variables_initializer().run()
h_eval = session.run(h, feed_dict={X: np.random.rand(1, 10)})
print(h_eval)
session.close()

[[0.48867318 0.48186773 0.49172705 0.5374585  0.49757305]]


In [3]:
reset_graph()
X = tf.placeholder(shape = [1, 10], dtype = tf.float32, name='X')
w = tf.Variable(tf.random_uniform(shape=[10, 5], minval = -0.1,
    maxval = 0.1, dtype = tf.float32), name = 'w')
b = tf.Variable(tf.zeros(shape=[5], dtype = tf.float32), name = 'b')
h = tf.nn.sigmoid(tf.matmul(X, w) + b)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    X_eval, h_eval = sess.run([X,h], feed_dict={X: np.random.rand(1, 10)})
    print(X_eval)
    print(h_eval)    

[[0.37454012 0.9507143  0.7319939  0.5986585  0.15601864 0.15599452
  0.05808361 0.8661761  0.601115   0.7080726 ]]
[[0.4568662  0.5571102  0.46591306 0.5213214  0.46943322]]


In [4]:
''' 建立檔名佇列
有了所有的檔案名稱之後，接著把檔名總表傳入tf.train.string_input_producer函數，
建立檔案名稱佇列（queue）
'''
filenames = ['test%d.txt'%i for i in range(1, 4)]
filename_queue = tf.train.string_input_producer(filenames, capacity = 3,
                 shuffle = True, name='string_input_producer')

# check if all files exist
for f in filenames:
    if not tf.gfile.Exists(f):
        raise ValueError('Failed to find file:' + f)
    else:
        print('File %s found.'%f)

'''讀取檔案
根據自己的檔案格式，選擇一個適合的檔案讀取器，這裡我們要讀取的檔案是 CSV 檔，
這種檔案的資料格式是一行一筆資料，所以適合使用 tf.TextLineReader 這個讀取器。
將建立好的檔名佇列傳入讀取器：'''
# 選擇讀取器
reader = tf.TextLineReader()

# 讀取檔案
# key值辨識檔案與資料（可用於除錯），value實際的資料 
key, value = reader.read(filename_queue, name = 'text_read_op')

'''將檔案中的資料讀取進來之後，接著要進行資料解析與前處理的動作，
   將文字的資料轉換為tensor，這樣才能放入 TensorFlow 中使用。
   CSV 的資料我們可以使用 tf.decode_csv 這個解析器，它可以把 CSV 的文字資料
   轉為一連串的 tensors：'''

# 設定每個欄位預設的值以及資料類型
record_defaults = [[-1.0], [-1.0], [-1.0], [-1.0], [-1.0],
                   [-1.0], [-1.0], [-1.0], [-1.0], [-1.0],]

# 解析 CSV 資料  Decoding the read value to columns
col1,col2,col3,col4,col5,col6,col7,col8,col9,col10 = tf.decode_csv(value,
                                      record_defaults = record_defaults)
# 把 CSV 資料的前四欄打包成一個 tensor
features = tf.stack([col1,col2,col3,col4,col5,col6,col7,col8,col9,col10])

# tf.train.shuffle_batch()部分參數
# tensor_list: The list of tensors to enqueue.入隊的張量列表
# batch_size:The new batch size pulled from the queue.一次批次的張量數
# capacity: An integer.The maximum number of elements in the queue.建議值:
# capacity=min_after_dequeue+(num_threads+a small safety margin∗batchsize)
# min_after_dequeue:指定打散資料用的緩衝區大小，值越大代表資料打散資料的
#                  效果越好,不過值越大則啟動準備時間較長，記憶體用量也較大
# num_threads: The number of threads enqueuing tensor_list.
# 參看 https://blog.csdn.net/u013555719/article/details/77679964
X = tf.train.shuffle_batch([features], batch_size = 3,
                           capacity = 5, name = 'data_batch',
                           min_after_dequeue=1, num_threads=1)

# Building the graph by defining the variables and calculations
w = tf.Variable(tf.random_uniform(shape=[10,5],minval=-0.1,maxval= 0.1,
                                  dtype=tf.float32), name = 'w') 
b = tf.Variable(tf.zeros(shape=[5], dtype=tf.float32), name ='b')
h = tf.nn.sigmoid(tf.matmul(X, w) + b) # Operation to be performed

init = tf.global_variables_initializer()

# Executing operations and evaluating nodes in the graph
with tf.Session() as sess:
    sess.run(init)
    '''
    1.TensorFlow的Session物件是支援多執行緒的，可以在同一個會話（Session）
      中創建多個執行緒，並存執行。在Session中的所有執行緒都必須能被同步終止，
      異常必須能被正確捕獲並報告，會話終止的時候，佇列必須能被正確地關閉。
      TensorFlow提供兩個類來實現對Session中多執行緒的管理：tf.Coordinator
      和tf.QueueRunner，這兩個類往往一起使用。
    2.在實際讀取資料前，要先在另外一個執行緒中啟動佇列執行器，才能讀取資料'''
    # 建立 Coordinator   
    coord = tf.train.Coordinator()
    # 啟動佇列執行器
    threads = tf.train.start_queue_runners(coord = coord, sess = sess)
    
    # Calculate h with x and print the results foe 5 steps
    for step in range(5):
        key_val,value_val,c1v,c2v,c3v,c4v,c5v,c6v,c7v,c8v,c9v,c10v,\
        ft_val,X_val,h_val=sess.run([key, value,col1,col2,col3,col4,\
                            col5,col6,col7,col8,col9,col10,features,X,h])                
        
        print('========== Step %d ===========' %step)
        print('key_val:', key_val)
        print('value_val:', value_val)
        print('c1-10v:\n', c1v,c2v,c3v,c4v,c5v,c6v,c7v,c8v,c9v,c10v,)
        print('feat_val:', ft_val)
        print(' Evaluated data (X)')
        print(X_val)
        print(' Evaluated data (h)')
        print(h_val)
        print('')
    # We also need to explicitly stop the coordinator
    # otherwise the process will hang indefinitely
    coord.request_stop()
    coord.join(threads)    

File test1.txt found.
File test2.txt found.
File test3.txt found.
key_val: b'test3.txt:4'
value_val: b'1.0,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1'
c1-10v:
 1.0 0.9 0.8 0.7 0.6 0.5 0.4 0.3 0.2 0.1
feat_val: [1.  0.9 0.8 0.7 0.6 0.5 0.4 0.3 0.2 0.1]
 Evaluated data (X)
[[1.  0.9 0.8 0.7 0.6 0.5 0.4 0.3 0.2 0.1]
 [1.  0.9 0.8 0.7 0.6 0.5 0.4 0.3 0.2 0.1]
 [1.  0.9 0.8 0.7 0.6 0.5 0.4 0.3 0.2 0.1]]
 Evaluated data (h)
[[0.49413252 0.53333956 0.50757587 0.4781466  0.4681989 ]
 [0.49413252 0.53333956 0.50757587 0.4781466  0.4681989 ]
 [0.49413252 0.53333956 0.50757587 0.4781466  0.4681989 ]]

key_val: b'test1.txt:4'
value_val: b'0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0'
c1-10v:
 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0
feat_val: [0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]
 Evaluated data (X)
[[0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]
 [1.  0.9 0.8 0.7 0.6 0.5 0.4 0.3 0.2 0.1]
 [0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]]
 Evaluated data (h)
[[0.5328524  0.51803243 0.5141695  0.5007955  0.45780918]
 [

## Variable Scoping
Here we will see how we can use variable scoping to reuse variables during the execution of the code. First we will see that TensorFlow creates variables everytime we execute code if scoping is not used. Then we look at how to solve this issue with scoping.

In [5]:
def not_so_simple_computation(w):
  x = tf.get_variable('x', initializer=tf.constant (5.0, shape=None, dtype=tf.float32))
  y = tf.get_variable('y', initializer=tf.constant(2.0, shape=None, dtype=tf.float32)) 
  z = x*w + y**2
  return z

def another_not_so_simple_computation(w):
  x = tf.get_variable('x', initializer=tf.constant(5.0, shape=None, dtype=tf.float32))
  y = tf.get_variable('y', initializer=tf.constant(2.0, shape=None, dtype=tf.float32)) 
  z = w*x*y
  return z

# Since this is the first call, the variables will be created with following names
# x => scopeA/x, y => scopeA/y
with tf.variable_scope('scopeA'):
  z1 = not_so_simple_computation(tf.constant(1.0,dtype=tf.float32))
# scopeA/x and scopeA/y already created we reuse them
with tf.variable_scope('scopeA',reuse=True):
  z2 = another_not_so_simple_computation(z1)

# Since this is the first call, the variables will be created with following names
# x => scopeB/x, y => scopeB/y
with tf.variable_scope('scopeB'):
  a1 = not_so_simple_computation(tf.constant(1.0,dtype=tf.float32))
# scopeB/x and scopeB/y already created we reuse them
with tf.variable_scope('scopeB',reuse=True):
  a2 = another_not_so_simple_computation(a1)

# Say we want to reuse the "scopeA" scope again, since variables are already created
# we should set "reuse" argument to True when invoking the scope
with tf.variable_scope('scopeA',reuse=True):
  zz1 = not_so_simple_computation(tf.constant(1.0,dtype=tf.float32))
  zz2 = another_not_so_simple_computation(z1)
    
init = tf.global_variables_initializer()

# Executing operations and evaluating nodes in the graph
with tf.Session() as sess:
    sess.run(init)
    print(sess.run([z1,z2,a1,a2,zz1,zz2]))
    print([v.name for v in tf.global_variables()])


[9.0, 90.0, 9.0, 90.0, 9.0, 90.0]
['w:0', 'b:0', 'w_1:0', 'b_1:0', 'scopeA/x:0', 'scopeA/y:0', 'scopeB/x:0', 'scopeB/y:0']


# MNIST Classification

## Download and Prepare Data

The code below downloads the MNIST data set from source, reshapes the images to [number_of_training_samples, single_image_size] matrix and standardize (make zero-mean unit-variance) images. Then we do the same for testing images as well.

In [6]:
import struct
import gzip
import os
from six.moves.urllib.request import urlretrieve

def maybe_download(url, filename, expected_bytes, force=False):
  """Download a file if not present, and make sure it's the right size."""
  if force or not os.path.exists(filename):
    print('Attempting to download:', filename) 
    filename, _ = urlretrieve(url + filename, filename)
    print('\nDownload Complete!')
  statinfo = os.stat(filename)
  if statinfo.st_size == expected_bytes:
    print('Found and verified', filename)
  else:
    raise Exception(
      'Failed to verify '+ filename +'. Can you get to it with a browser?')
  return filename


def read_mnist(fname_img, fname_lbl):
    print('\nReading files %s and %s'%(fname_img, fname_lbl))
    
    with gzip.open(fname_img) as fimg:       
        magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
        print('magic', magic)
        print(num,rows,cols)
        img = (np.frombuffer(fimg.read(num*rows*cols),
             dtype=np.uint8).reshape(num, rows * cols)).astype(np.float32)
        print('(Images) Returned a tensor of shape ',img.shape)       
        img = (img - np.mean(img))/np.std(img)
        
    with gzip.open(fname_lbl) as flbl:
        # flbl.read(8) reads upto 8 bytes
        magic, num = struct.unpack(">II", flbl.read(8))        
        lbl = np.frombuffer(flbl.read(num), dtype=np.int8)
        print('(Labels) Returned a tensor of shape: %s'%lbl.shape)
        print('Sample labels: ',lbl[:10])
        
    return img, lbl
    
    
# Download data if needed
url = 'http://yann.lecun.com/exdb/mnist/'
# training data
maybe_download(url,'train-images-idx3-ubyte.gz',9912422)
maybe_download(url,'train-labels-idx1-ubyte.gz',28881)
# testing data
maybe_download(url,'t10k-images-idx3-ubyte.gz',1648877)
maybe_download(url,'t10k-labels-idx1-ubyte.gz',4542)

# Read the training and testing data 
train_inputs, train_labels = read_mnist('train-images-idx3-ubyte.gz',
                                        'train-labels-idx1-ubyte.gz')
test_inputs, test_labels = read_mnist('t10k-images-idx3-ubyte.gz',
                                      't10k-labels-idx1-ubyte.gz')



Found and verified train-images-idx3-ubyte.gz
Found and verified train-labels-idx1-ubyte.gz
Found and verified t10k-images-idx3-ubyte.gz
Found and verified t10k-labels-idx1-ubyte.gz

Reading files train-images-idx3-ubyte.gz and train-labels-idx1-ubyte.gz
magic 2051
60000 28 28
(Images) Returned a tensor of shape  (60000, 784)
(Labels) Returned a tensor of shape: 60000
Sample labels:  [5 0 4 1 9 2 1 3 1 4]

Reading files t10k-images-idx3-ubyte.gz and t10k-labels-idx1-ubyte.gz
magic 2051
10000 28 28
(Images) Returned a tensor of shape  (10000, 784)
(Labels) Returned a tensor of shape: 10000
Sample labels:  [7 2 1 0 4 1 4 9 5 9]


## Defining Hyperparameters and Some Constants

In [7]:
WEIGHTS_STRING = 'weights'
BIAS_STRING = 'bias'

batch_size = 100

img_width, img_height = 28,28
input_size = img_height * img_width
num_labels = 10

# resets the default graph Otherwise raises an error 
# about already initialized variables
tf.reset_default_graph()

## Defining Input and Label Placeholders

In [8]:
# Defining inputs and outputs
tf_inputs = tf.placeholder(shape=[batch_size, input_size], dtype=tf.float32, name = 'inputs')
tf_labels = tf.placeholder(shape=[batch_size, num_labels], dtype=tf.float32, name = 'labels')

## Defining the Weights and Bias Variables (with Scoping)

In [9]:
# Defining the Tensorflow variables
def define_net_parameters():
    with tf.variable_scope('layer1'):
        tf.get_variable(WEIGHTS_STRING,shape=[input_size,500],
                            initializer=tf.random_normal_initializer(0,0.02))
        tf.get_variable(BIAS_STRING, shape=[500],
                           initializer=tf.random_uniform_initializer(0,0.01))
        
    with tf.variable_scope('layer2'):
        tf.get_variable(WEIGHTS_STRING,shape=[500,250],
                            initializer=tf.random_normal_initializer(0,0.02))
        tf.get_variable(BIAS_STRING, shape=[250],
                           initializer=tf.random_uniform_initializer(0,0.01))
    
    with tf.variable_scope('output'):
        tf.get_variable(WEIGHTS_STRING,shape=[250,10],
                            initializer=tf.random_normal_initializer(0,0.02))
        tf.get_variable(BIAS_STRING, shape=[10],
                           initializer=tf.random_uniform_initializer(0,0.01))

## Defining the Inference Operation

Here we calculate the output logits(unnormalized scores) for a given input X.

In [10]:
# Defining calcutations in the neural network starting from inputs to logits
# logits are the values before applying softmax to the final output
        
def inference(x):
    # calculations for layer 1
    with tf.variable_scope('layer1',reuse=True):
        w,b = tf.get_variable(WEIGHTS_STRING), tf.get_variable(BIAS_STRING)
        tf_h1 = tf.nn.relu(tf.matmul(x,w) + b, name = 'hidden1')

    # calculations for layer 2
    with tf.variable_scope('layer2',reuse=True):
        w,b = tf.get_variable(WEIGHTS_STRING), tf.get_variable(BIAS_STRING)
        tf_h2 = tf.nn.relu(tf.matmul(tf_h1,w) + b, name = 'hidden1')

    # calculations for output layer
    with tf.variable_scope('output',reuse=True):
        w,b = tf.get_variable(WEIGHTS_STRING), tf.get_variable(BIAS_STRING)
        tf_logits = tf.nn.bias_add(tf.matmul(tf_h2,w), b, name = 'logits')

    return tf_logits

## Defining Loss Function and Optimizer
We use cross entropy loss function and a momentum-based optimizer for learning

In [11]:
define_net_parameters()

# defining the loss
tf_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=inference(tf_inputs), labels=tf_labels))

# defining the optimize function
tf_loss_minimize = tf.train.MomentumOptimizer(momentum=0.9,learning_rate=0.01).minimize(tf_loss)

## Defining Predictions

In [12]:
# defining predictions
tf_predictions = tf.nn.softmax(inference(tf_inputs))

## Executing the Graph to get the Classification Results

In [13]:
session = tf.InteractiveSession()

tf.global_variables_initializer().run()

NUM_EPOCHS = 50

def accuracy(predictions, labels):
    ''' Measure the classification accuracy of some predictions (softmax outputs) 
    and labels (integer class labels)'''
    return np.sum(np.argmax(predictions,axis=1).flatten()==labels.flatten())/batch_size

test_accuracy_over_time = []
train_loss_over_time = []
for epoch in range(NUM_EPOCHS):
    train_loss = []
    
    # Training Phase 
    for step in range(train_inputs.shape[0]//batch_size):
        # Creating one-hot encoded labels with labels
        # One-hot encoding dight 3 for 10-class MNIST data set will result in
        # [0,0,0,1,0,0,0,0,0,0]
        labels_one_hot = np.zeros((batch_size, num_labels),dtype=np.float32)
        labels_one_hot[np.arange(batch_size),train_labels[step*batch_size:(step+1)*batch_size]] = 1.0
        
        # Printing the one-hot labels
        if epoch ==0 and step==0:
            print('Sample labels (one-hot)')
            print(labels_one_hot[:10])
            print()
        
        # Running the optimization process
        loss, _ = session.run([tf_loss,tf_loss_minimize],feed_dict={
            tf_inputs: train_inputs[step*batch_size: (step+1)*batch_size,:],
            tf_labels: labels_one_hot}
                             )
        train_loss.append(loss) # Used to average the loss for a single epoch
        
    test_accuracy = []
    # Testing Phase
    for step in range(test_inputs.shape[0]//batch_size):
        test_predictions = session.run(tf_predictions,feed_dict={tf_inputs: test_inputs[step*batch_size: (step+1)*batch_size,:]})
        batch_test_accuracy = accuracy(test_predictions,test_labels[step*batch_size: (step+1)*batch_size])        
        test_accuracy.append(batch_test_accuracy)
    
    print('Average train loss for the %d epoch: %.3f\n'%(epoch+1,np.mean(train_loss)))
    train_loss_over_time.append(np.mean(train_loss))
    print('\tAverage test accuracy for the %d epoch: %.2f\n'%(epoch+1,np.mean(test_accuracy)*100.0))
    test_accuracy_over_time.append(np.mean(test_accuracy)*100)
    
session.close()

Sample labels (one-hot)
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]

Average train loss for the 1 epoch: 0.454

	Average test accuracy for the 1 epoch: 94.22

Average train loss for the 2 epoch: 0.139

	Average test accuracy for the 2 epoch: 96.27

Average train loss for the 3 epoch: 0.089

	Average test accuracy for the 3 epoch: 96.98

Average train loss for the 4 epoch: 0.062

	Average test accuracy for the 4 epoch: 97.35

Average train loss for the 5 epoch: 0.046

	Average test accuracy for the 5 epoch: 97.61

Average train loss for the 6 epoch: 0.034

	Average test accuracy for the 6 epoch: 97.71

Average train loss for the 7 epoch: 0.025

	Average test accuracy for the 7 epoch: 97.67

Average train 

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]


In [14]:
X_train, y_train = read_mnist('train-images-idx3-ubyte.gz',
                                        'train-labels-idx1-ubyte.gz')
X_test, y_test = read_mnist('t10k-images-idx3-ubyte.gz',
                                      't10k-labels-idx1-ubyte.gz')

X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]



Reading files train-images-idx3-ubyte.gz and train-labels-idx1-ubyte.gz
magic 2051
60000 28 28
(Images) Returned a tensor of shape  (60000, 784)
(Labels) Returned a tensor of shape: 60000
Sample labels:  [5 0 4 1 9 2 1 3 1 4]

Reading files t10k-images-idx3-ubyte.gz and t10k-labels-idx1-ubyte.gz
magic 2051
10000 28 28
(Images) Returned a tensor of shape  (10000, 784)
(Labels) Returned a tensor of shape: 10000
Sample labels:  [7 2 1 0 4 1 4 9 5 9]


Reading files train-images-idx3-ubyte.gz and train-labels-idx1-ubyte.gz
magic 2051
60000 28 28
(Images) Returned a tensor of shape  (60000, 784)
(Labels) Returned a tensor of shape: 60000
Sample labels:  [5 0 4 1 9 2 1 3 1 4]

Reading files t10k-images-idx3-ubyte.gz and t10k-labels-idx1-ubyte.gz
magic 2051
10000 28 28
(Images) Returned a tensor of shape  (10000, 784)
(Labels) Returned a tensor of shape: 10000
Sample labels:  [7 2 1 0 4 1 4 9 5 9]

In [15]:
n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z
        
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
                           activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
                           activation=tf.nn.relu)
    logits = neuron_layer(hidden2, n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
learning_rate=0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 40
batch_size = 50

def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch
        
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Val accuracy:", acc_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Batch accuracy: 0.96 Val accuracy: 0.9406
1 Batch accuracy: 0.94 Val accuracy: 0.9562
2 Batch accuracy: 0.96 Val accuracy: 0.9608
3 Batch accuracy: 0.94 Val accuracy: 0.9634
4 Batch accuracy: 0.98 Val accuracy: 0.9676
5 Batch accuracy: 0.98 Val accuracy: 0.9648
6 Batch accuracy: 1.0 Val accuracy: 0.9712
7 Batch accuracy: 0.98 Val accuracy: 0.9728
8 Batch accuracy: 1.0 Val accuracy: 0.9742
9 Batch accuracy: 0.98 Val accuracy: 0.974
10 Batch accuracy: 1.0 Val accuracy: 0.9752
11 Batch accuracy: 1.0 Val accuracy: 0.9756
12 Batch accuracy: 0.98 Val accuracy: 0.9748
13 Batch accuracy: 1.0 Val accuracy: 0.9764
14 Batch accuracy: 1.0 Val accuracy: 0.976
15 Batch accuracy: 1.0 Val accuracy: 0.9762
16 Batch accuracy: 1.0 Val accuracy: 0.9758
17 Batch accuracy: 1.0 Val accuracy: 0.9764
18 Batch accuracy: 1.0 Val accuracy: 0.978
19 Batch accuracy: 1.0 Val accuracy: 0.979
20 Batch accuracy: 1.0 Val accuracy: 0.9768
21 Batch accuracy: 1.0 Val accuracy: 0.978
22 Batch accuracy: 1.0 Val accuracy: 0

In [16]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt") # or better, use save_path
    X_new_scaled = X_test[:20]
    Z = logits.eval(feed_dict={X: X_new_scaled})
    y_pred = np.argmax(Z, axis=1)

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt


In [17]:
print("Predicted classes:", y_pred)
print("Actual classes:   ", y_test[:20])

Predicted classes: [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
Actual classes:    [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


In [19]:
from tensorflow_graph_in_jupyter import show_graph
show_graph(tf.get_default_graph())