# 2. Deep learning

In [1]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
import numpy as np
import pandas as pd
import os
import pickle
import matplotlib.pyplot as plt

%matplotlib inline

## 2.1 Load and format data

In [3]:
data_root = r'C:\Users\7153678\Desktop\AI\src\nlp\data\mnist'
pickle_file = os.path.join(data_root, 'notMNIST.pickle')

image_size = 28
class_num = 10

In [4]:
with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_data = save['train_data']
    train_labels = save['train_labels']
    valid_data = save['valid_data']
    valid_labels = save['valid_labels']
    test_data = save['test_data']
    test_labels = save['test_labels']
    del save
    
print('train: {} and {}'.format(train_data.shape, train_labels.shape))
print('valid: {} and {}'.format(valid_data.shape, valid_labels.shape))    
print('test: {} and {}'.format(test_data.shape, test_labels.shape))    

train: (200000, 28, 28) and (200000,)
valid: (10000, 28, 28) and (10000,)
test: (10000, 28, 28) and (10000,)


In [5]:
def reformat(data, labels):
    new_data = data.reshape(-1, image_size**2)
    new_label = (labels[:, None] == np.arange(class_num)).astype(np.float32)
    return new_data, new_label

In [6]:
train_data, train_labels = reformat(train_data, train_labels)
valid_data, valid_labels = reformat(valid_data, valid_labels)
test_data, test_labels = reformat(test_data, test_labels)
print('train: {} and {}'.format(train_data.shape, train_labels.shape))
print('valid: {} and {}'.format(valid_data.shape, valid_labels.shape))    
print('test: {} and {}'.format(test_data.shape, test_labels.shape))    

train: (200000, 784) and (200000, 10)
valid: (10000, 784) and (10000, 10)
test: (10000, 784) and (10000, 10)


## 2.2 Build model  block in tensorflow

In [7]:
train_subset = 10000

In [18]:
g = tf.Graph()

In [19]:
with g.as_default():
    
    #imput dafa
    tf_train_data = tf.constant(train_data[:train_subset, :])
    tf_train_labels = tf.constant(train_labels[:train_subset])
    tf_valid_data = tf.constant(valid_data)    
    tf_test_data = tf.constant(test_data)
    
    #variables
    weights = tf.Variable(tf.truncated_normal([image_size**2, class_num]))
    biases = tf.Variable(tf.zeros(class_num))
    
    #computation
    logits = tf.matmul(tf_train_data, weights) + biases
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_train_labels, logits=logits))
    
    #optimizer
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    
    #predictions
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_data, weights)+biases)
    test_prediction = tf.nn.softmax(tf.matmul(tf_test_data, weights)+biases)

## 2.2 Train the model

In [10]:
num_steps = 801

In [14]:
def accuracy(predictions, labels):
    return 100.0*sum(np.argmax(predictions, 1)==np.argmax(labels, 1))/labels.shape[0]

In [23]:
with tf.Session(graph=g) as session:
    # initailize
    tf.global_variables_initializer().run()
    print('Initialized!')
    
    for step in range(num_steps):
        # excute compution
        _, l, predictions = session.run([optimizer, loss, train_prediction])
        if step%50 == 0:
            train_accuracy = accuracy(predictions, train_labels[:train_subset, :])
            valid_accuracy = accuracy(valid_prediction.eval(), valid_labels)
            print('Train at step {} with loss: {}'.format(step, l))
            print('Train accuracy: {}, valid accuracy: {}'.format(train_accuracy, valid_accuracy))
            
    print('Test accuracy: {}'.format(accuracy(test_prediction.eval(), test_labels)))

Initialized!
Train at step 0 with loss: 16.927234649658203
Train accuracy: 7.67, valid accuracy: 11.18
Train at step 50 with loss: 2.7983522415161133
Train accuracy: 67.25, valid accuracy: 67.06
Train at step 100 with loss: 2.3227155208587646
Train accuracy: 71.9, valid accuracy: 71.38
Train at step 150 with loss: 2.0675525665283203
Train accuracy: 73.22, valid accuracy: 72.89
Train at step 200 with loss: 1.8890845775604248
Train accuracy: 74.35, valid accuracy: 73.65
Train at step 250 with loss: 1.751396894454956
Train accuracy: 74.74, valid accuracy: 74.16
Train at step 300 with loss: 1.640235185623169
Train accuracy: 75.17, valid accuracy: 74.44
Train at step 350 with loss: 1.548066258430481
Train accuracy: 75.58, valid accuracy: 74.66
Train at step 400 with loss: 1.4702507257461548
Train accuracy: 76.05, valid accuracy: 74.78
Train at step 450 with loss: 1.4034371376037598
Train accuracy: 76.47, valid accuracy: 74.92
Train at step 500 with loss: 1.3451465368270874
Train accuracy: 7

## 2.3 SGD

In [12]:
batch_size = 128
graph = tf.Graph()

In [23]:
with graph.as_default():
    # input, only train is with SGD
    tf_train_data = tf.placeholder(tf.float32, shape=(batch_size, image_size**2))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, class_num))
    tf_valid_data = tf.constant(valid_data)
    tf_test_data = tf.constant(test_data)
    
    # variables
    weights = tf.Variable(tf.truncated_normal([image_size**2, class_num]))
    biases = tf.Variable(tf.zeros(class_num))
    
    # train compution
    logits = tf.matmul(tf_train_data, weights) + biases
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_train_labels, logits=logits))

    
    # optimizer
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    
    # prediction
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_data, weights)+biases)
    test_prediction = tf.nn.softmax(tf.matmul(tf_test_data, weights)+biases)


In [24]:
num_steps = 30001

In [27]:
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initilazed')
    
    for step in range(num_steps):
        offset = (step*batch_size)%(train_labels.shape[0]-batch_size)
        # generate a mini-batch
        batch_data = train_data[offset:(offset+batch_size), :]
        batch_labels = train_labels[offset:(offset+batch_size), :]
        
        # dict to feed mini-batch
        feed_dict = {tf_train_data: batch_data, tf_train_labels: batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        
        # logging
        if step % 500 == 0:
            print('Mini-batch train at step {} with loss: {}'.format(step, l))
            print('Mini-batch train accuracy: {}, valid accuracy: {}'.format(
                 accuracy(predictions, batch_labels),
                 accuracy(valid_prediction.eval(), valid_labels)))
    print('Mini-batch test accuarcy: {}'.format(accuracy(test_prediction.eval(), test_labels)))

Initilazed
Mini-batch train at step 0 with loss: 17.55832862854004
Mini-batch train accuracy: 9.375, valid accuracy: 8.61
Mini-batch train at step 500 with loss: 2.54551362991333
Mini-batch train accuracy: 71.875, valid accuracy: 75.98
Mini-batch train at step 1000 with loss: 1.5546658039093018
Mini-batch train accuracy: 77.34375, valid accuracy: 77.17
Mini-batch train at step 1500 with loss: 1.2392942905426025
Mini-batch train accuracy: 80.46875, valid accuracy: 77.86
Mini-batch train at step 2000 with loss: 0.8931864500045776
Mini-batch train accuracy: 85.9375, valid accuracy: 78.65
Mini-batch train at step 2500 with loss: 1.0539610385894775
Mini-batch train accuracy: 73.4375, valid accuracy: 79.06
Mini-batch train at step 3000 with loss: 1.2189171314239502
Mini-batch train accuracy: 75.0, valid accuracy: 79.59
Mini-batch train at step 3500 with loss: 0.8143807649612427
Mini-batch train accuracy: 78.90625, valid accuracy: 79.32
Mini-batch train at step 4000 with loss: 0.6500001549720

## 2.4 Add Relu 

In [8]:
batch_size = 128
graph = tf.Graph()
h1 = 1024

In [11]:
with graph.as_default():
    # input, only train is with SGD
    tf_train_data = tf.placeholder(tf.float32, shape=(batch_size, image_size**2))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, class_num))
    tf_valid_data = tf.constant(valid_data)
    tf_test_data = tf.constant(test_data)
    
    # variables
    weights1 = tf.Variable(tf.truncated_normal([image_size**2, h1]))
    biases1 = tf.Variable(tf.zeros(h1))  
    weights2 = tf.Variable(tf.truncated_normal([h1, class_num]))
    biases2 = tf.Variable(tf.zeros(class_num))  
    
    # train compution   
    logits_1 = tf.matmul(tf_train_data, weights1) + biases1
    relu_layer = tf.nn.relu(logits_1)
    logits_2 = tf.matmul(relu_layer, weights2) + biases2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_train_labels, logits=logits_2))
    
    # optimizer
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    
    # prediction
    train_prediction = tf.nn.softmax(logits_2)
    
    logits_1 = tf.matmul(tf_valid_data, weights1) + biases1
    relu_layer = tf.nn.relu(logits_1)
    logits_2 = tf.matmul(relu_layer, weights2) + biases2
    valid_prediction = tf.nn.softmax(logits_2)

    logits_1 = tf.matmul(tf_test_data, weights1) + biases1
    relu_layer = tf.nn.relu(logits_1)
    logits_2 = tf.matmul(relu_layer, weights2) + biases2
    test_prediction = tf.nn.softmax(logits_2)
    

In [12]:
num_steps = 30001

In [15]:
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initilazed')
    
    for step in range(num_steps):
        offset = (step*batch_size)%(train_labels.shape[0]-batch_size)
        # generate a mini-batch
        batch_data = train_data[offset:(offset+batch_size), :]
        batch_labels = train_labels[offset:(offset+batch_size), :]
        
        # dict to feed mini-batch
        feed_dict = {tf_train_data: batch_data, tf_train_labels: batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        
        # logging
        if step % 500 == 0:
            print('Mini-batch train at step {} with loss: {}'.format(step, l))
            print('Mini-batch train accuracy: {}, valid accuracy: {}'.format(
                 accuracy(predictions, batch_labels),
                 accuracy(valid_prediction.eval(), valid_labels)))
    print('Mini-batch test accuarcy: {}'.format(accuracy(test_prediction.eval(), test_labels)))

Initilazed
Mini-batch train at step 0 with loss: 353.59393310546875
Mini-batch train accuracy: 10.15625, valid accuracy: 28.09
Mini-batch train at step 500 with loss: 15.597188949584961
Mini-batch train accuracy: 71.875, valid accuracy: 80.7
Mini-batch train at step 1000 with loss: 5.954239845275879
Mini-batch train accuracy: 84.375, valid accuracy: 81.15
Mini-batch train at step 1500 with loss: 2.924823760986328
Mini-batch train accuracy: 82.8125, valid accuracy: 79.22
Mini-batch train at step 2000 with loss: 2.204730749130249
Mini-batch train accuracy: 91.40625, valid accuracy: 79.37
Mini-batch train at step 2500 with loss: 9.596665382385254
Mini-batch train accuracy: 76.5625, valid accuracy: 82.3
Mini-batch train at step 3000 with loss: 2.9803805351257324
Mini-batch train accuracy: 77.34375, valid accuracy: 82.5
Mini-batch train at step 3500 with loss: 4.401630878448486
Mini-batch train accuracy: 86.71875, valid accuracy: 82.4
Mini-batch train at step 4000 with loss: 1.2764641046524