In [1]:
INPUT_NODE = 784
OUTPUT_NODE = 47

FOLD = 200 # default: 200
BUFFER_SIZE = 100 # default: 100
BATCH_SIZE = 100
CRITERIA = 0.001 # default: 0.001
LEARNING_RATE = 0.0001
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARAZTION_RATE = 0.0001
TRAINING_STEPS = 30000 # default: 300000
MOVING_AVERAGE_DECAY = 0.99
EPOH_NUM = 3 # default: 2

K = 5 # default: 5

CNN_MODEL_PATH = "./cnn_model/"
MODEL_NAME = "cnn_model.ckpt"

TASK_TYPE = "train_cnn" # change

parameter_pair = [[0.01, 0.1], [0.01, 0.01], [0.001, 0.1], [0.001, 0.01], [0.0001, 0.1], [0.0001, 0.01]]    
LR = [0.01] # change
MM = [0.1] # change
    
import os
import numpy as np
import argparse
import matplotlib.pyplot as plt
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import state_ops
from tensorflow.python.framework import ops
from tensorflow.python.training import optimizer
from sklearn import cross_validation
import tensorflow as tf
import time
import shutil
import csv

# tf.logging.set_verbosity(tf.logging.INFO)
tf.random_seed = 123

# TODO: save the process result
def save_dict(x, f):
    w = csv.writer(open(f, "w"))
    for i in range(x.shape[0]):
        dict = x[i]
        for key, val in dict.items():
            w.writerow([key, val])

# TDOO: SGD with momentum algorithm
class MomentOptimizer(optimizer.Optimizer):
    def __init__(self, learning_rate=0.001, momentum=0.1, use_locking=False, name="MomentOptimizer"):
        super(MomentOptimizer, self).__init__(use_locking, name)
        self._lr = learning_rate
        self._momentum = momentum
        
        self._lr_t = None
        self._momentum_t = None

    def _prepare(self):
        self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate")
        self._alpha_t = ops.convert_to_tensor(self._momentum, name="momentum_t")
        self._momentum_t = ops.convert_to_tensor(self._momentum, name="momentum_t")

    def _create_slots(self, var_list):
        for v in var_list:
            self._zeros_slot(v, "m", self._name)

    def _apply_dense(self, grad, var):
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        momentum_t = math_ops.cast(self._momentum_t, var.dtype.base_dtype)

        eps = 1e-7 #cap for moving average
        
        m = self.get_slot(var, "m")
        m_t = m.assign(momentum_t * m + lr_t * grad)
        var_update = state_ops.assign_sub(var, m_t) 

        return control_flow_ops.group(*[var_update, m_t])

def cnn_model_fn(features, labels, mode, params):
    with tf.variable_scope("cnn") as scope:
        input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])
#         print(input_layer.shape)

        # convolutional layer 1
        # Input Tensor Shape: [batch_size, 28, 28, 1]
        conv1 = tf.layers.conv2d(
            inputs=input_layer,
            filters=32,
            kernel_size=[3, 3],
            strides=(1, 1),
            padding="same",
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            bias_initializer=tf.contrib.layers.xavier_initializer(),
            kernel_regularizer=tf.nn.l2_loss,
            bias_regularizer=tf.nn.l2_loss,     
            activation=tf.nn.relu)
#         print(conv1.shape)

        # convolutional layer 2
        # Input Tensor Shape: [batch_size, 28, 28, 32]
        conv2 = tf.layers.conv2d(
            inputs=conv1,
            filters=32,
            kernel_size=[5, 5],
            strides=(2, 2),
            padding="same",
              kernel_initializer=tf.contrib.layers.xavier_initializer(),
              bias_initializer=tf.contrib.layers.xavier_initializer(),
            kernel_regularizer=tf.nn.l2_loss,
            bias_regularizer=tf.nn.l2_loss,              
            activation=tf.nn.relu)
#         print(conv2.shape)
        
        # convolutional layer 3
        # Input Tensor Shape: [batch_size, 14, 14, 32]
        conv3 = tf.layers.conv2d(
            inputs=conv2,
            filters=64,
            kernel_size=[3, 3],
            strides=(1, 1),
            padding="same",
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            bias_initializer=tf.contrib.layers.xavier_initializer(),
            kernel_regularizer=tf.nn.l2_loss,
            bias_regularizer=tf.nn.l2_loss,            
            activation=tf.nn.relu)
#         print(conv3.shape)

        # convolutional layer 4
        # Input Tensor Shape: [batch_size, 14, 14, 64]
        conv4 = tf.layers.conv2d(
            inputs=conv3,
            filters=64,
            kernel_size=[5, 5],
            strides=(2, 2),
            padding="same",
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            bias_initializer=tf.contrib.layers.xavier_initializer(),
            kernel_regularizer=tf.nn.l2_loss,
            bias_regularizer=tf.nn.l2_loss,               
            activation=tf.nn.relu)
#         print(conv4.shape)

        # Input Tensor Shape: [batch_size, 7, 7, 64]
        # Output Tensor Shape: [batch_size, 7 * 7 * 64]        
        conv4_flat = tf.reshape(conv4, [-1, 7 * 7 * 64])
#         print(conv4_flat.shape)
            
        # dense Layer
        dense = tf.layers.dense(
            inputs=conv4_flat, 
            units=1024, 
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            bias_initializer=tf.contrib.layers.xavier_initializer(),
            kernel_regularizer=tf.nn.l2_loss,
            bias_regularizer=tf.nn.l2_loss,             
            activation=tf.nn.relu)
#         print(dense.shape)
        
        # output layer
        # Input Tensor Shape: [batch_size, 1024]
        dropout = tf.layers.dropout(
            inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
#         print(dropout.shape)
        
        # logits Layer
        # Input Tensor Shape: [batch_size, 1024]
        logits = tf.layers.dense(inputs=dropout, units=OUTPUT_NODE)        
#         print(logits.shape)
        
        # convert raw value from logits layer into two different formats that our model function can return
        predictions = {
            "classes": tf.argmax(input=logits, axis=1),
            "probabilities": tf.nn.softmax(logits, name="softmax_tensor")}
            
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)     
        tf.summary.scalar("cross_entropy_loss", loss)
        
        accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
#         tf.identity(accuracy[1], name="train_accuracy")
#         tf.summary.scalar("prediction_accuracy", accuracy[1])        
        
        # 1 train
        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = MomentOptimizer(
                learning_rate=params["learning_rate"],
                momentum=params["momentum"])
#             optimizer = tf.train.GradientDescentOptimizer(learning_rate=params["learning_rate"])
            train_op = optimizer.minimize(
                loss=loss,
                global_step=tf.train.get_global_step())
    
            training_accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
            tf.identity(training_accuracy[1], name="training_accuracy")
            tf.summary.scalar("accuracy_training_data", training_accuracy[1])    
            
            return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
        
        # 2 predict
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)        
        
        # 3 evaluation
        eval_metric_ops = {
            "accuracy": tf.metrics.accuracy(labels=labels, 
                                            predictions=predictions["classes"])}
        
#         evaluation_accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
#         tf.identity(evaluation_accuracy[1], name="evaluation_accuracy")        
#         tf.summary.scalar("accuracy_evaluation_data", evaluation_accuracy)
        
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, 
                                          eval_metric_ops=eval_metric_ops)        

def main(unused_argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("--description", default="COMP5212 Programming Project 2_CNN Model", 
                        help="increase output verbosity")
    parser.add_argument('--task', default=TASK_TYPE, type=str,
                        help='Select the task, train_cnn, test_cnn')
    parser.add_argument('--datapath',default="../dataset",type=str, required=False,
                        help='Select the path to the data directory')
    args = parser.parse_args(args=[])
    print(args.description)
    datapath = args.datapath
      
    # read data
    file_train = np.load(datapath+"/data_classifier_train.npz")
    x_train = np.asarray(file_train["x_train"], dtype=np.float32)
    y_train = np.asarray(file_train["y_train"], dtype=np.int32)   
    file_test = np.load(datapath+"/data_classifier_test.npz")
    x_test = np.asarray(file_test["x_test"], dtype=np.float32)
    y_test = np.asarray(file_test["y_test"], dtype=np.int32)    
     
    # TODO: make validation data
    x_train, x_eval, y_train, y_eval = cross_validation.train_test_split(
        x_train, y_train, test_size=0, random_state=0) # default: test_size=1/K
    
    for L in LR:
        for M in MM:
            # set cnn model param
            t_begin = time.time()
            params = {"learning_rate": L, "momentum": M}
            print(params)

            CNN_MODEL_PATH_LR_MM = CNN_MODEL_PATH + "LR_"+str(L)+"_MM_"+str(M)
            print("[INFO] Saving model to %s" % CNN_MODEL_PATH_LR_MM)
                
            # create the estimator
            main.emnist_classifier = tf.estimator.Estimator(
                model_fn=cnn_model_fn, model_dir=CNN_MODEL_PATH_LR_MM,
                params=params)

            tensors_to_log = {"probabilities": "softmax_tensor", 
                                "prediction_accuracy_training_data" : "training_accuracy",
                                "prediction_accuracy_evaluation_data": "evaluation_accuracy",
                                "prediction_accuracy": "train_accuracy"}
            logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, 
                                                          every_n_iter=50)        
            tf.logging.set_verbosity(tf.logging.ERROR)    
            
            if args.task == "train_cnn":
                
                # clear the past trained model
                if os.path.isdir(CNN_MODEL_PATH_LR_MM):
                    shutil.rmtree(CNN_MODEL_PATH_LR_MM,True)                

                print("*****************[PARAM] FOLD:%d BS:%d LR:%f MM:%f" % (FOLD, BATCH_SIZE, L, M))
                b_exit = False           
                num_fold = int(x_train.shape[0] / FOLD); # number of folds 40
                print("Number of folds %d and Step size %d" % (num_fold, TRAINING_STEPS/num_fold))
                performance_over_time_train = np.array({"learning_rate": L, "momentum": M})
                performance_over_time_eval = np.array({"learning_rate": L, "momentum": M})              
                
                for e in range(EPOH_NUM):
                    for i in range(num_fold): # 40 times
                        print("%d th fold of training dataset" % i)
                        x_train_fold = x_train[i*FOLD:min((i+1)*FOLD, x_train.shape[0])]    
                        y_train_fold = y_train[i*FOLD:min((i+1)*FOLD, x_train.shape[0])]

                        train_input_fn = tf.estimator.inputs.numpy_input_fn(
                            x={"x": x_train_fold},
                            y=y_train_fold,
                            batch_size=BATCH_SIZE,
                            num_epochs=None,
                            shuffle=True) 

                        main.emnist_classifier.train(
                            input_fn=train_input_fn,
    #                         steps=1000)
                            steps=TRAINING_STEPS/num_fold)
            #                 hooks=[logging_hook])

                        # using training dataset to test the accuracy
                        test_input_fn = tf.estimator.inputs.numpy_input_fn(
                            x={"x": x_train},
                            y=y_train,
                            num_epochs=1,
                            shuffle=False)
                        train_results = main.emnist_classifier.evaluate(input_fn=test_input_fn)
                        train_results["e"] = e
                        train_results["i"] = i
                        print("[INFO] training performance over times on training data")
                        print(train_results)     
                        performance_over_time_train = np.append(performance_over_time_train, train_results)    
                        if len(performance_over_time_train) > 2:
                            delta_loss = performance_over_time_train[-2]["loss"] - train_results["loss"]
                            if abs(delta_loss) < CRITERIA:
                                b_exit = True

                        # using training dataset to test the accuracy
#                         eval_input_fn = tf.estimator.inputs.numpy_input_fn(
#                             x={"x": x_eval},
#                             y=y_eval,
#                             num_epochs=1,
#                             shuffle=False)
#                         eval_results = main.emnist_classifier.evaluate(input_fn=eval_input_fn)
#                         eval_results["e"] = e
#                         eval_results["i"] = i
#                         print("[INFO] training performance over times on evaluation data")
#                         print(eval_results)     
#                         performance_over_time_eval = np.append(performance_over_time_eval, eval_results) 
                    
                time_end = time.time() - t_begin
                print("Training time %f s" % time_end)   
                training_time = {"training_time(s)": time_end}
                performance_over_time_train = np.append(performance_over_time_train, training_time)
                performance_over_time_eval = np.append(performance_over_time_eval, training_time)
                str_performance_over_time_train = "performance_over_time_train"+"LR_"+str(L)+"_MM_"+str(M)+".csv";
                str_performance_over_time_eval = "performance_over_time_test"+"LR_"+str(L)+"_MM_"+str(M)+".csv";
                save_dict(performance_over_time_train, "experiment_result/"+str_performance_over_time_train)
                save_dict(performance_over_time_eval, "experiment_result/"+str_performance_over_time_eval)
                if b_exit:
                    break   

            elif args.task == "test_cnn":
                test_input_fn = tf.estimator.inputs.numpy_input_fn(
                    x={"x": x_test},
                    y=y_test,
                    num_epochs=1,
                    shuffle=False)
                test_results = main.emnist_classifier.evaluate(input_fn=test_input_fn)
                print("Loss %f and Accuracy %f with %d on testing dateset" 
                      % (test_results["loss"], test_results["accuracy"], test_results["global_step"]))               
                
            elif args.task == "eval_cnn":
                eval_input_fn = tf.estimator.inputs.numpy_input_fn(
                    x={"x": x_eval},
                    y=y_eval,
                    num_epochs=1,
                    shuffle=False)
                eval_results = main.emnist_classifier.evaluate(input_fn=eval_input_fn)
                print("Loss %f and Accuracy %f with %d  on evaluation dateset" 
                      % (eval_results["loss"], eval_results["accuracy"], eval_results["global_step"]))
                
            elif args.task == "test_cnn_with_train":
                test_input_fn = tf.estimator.inputs.numpy_input_fn(
                    x={"x": x_train},
                    y=y_train,
                    num_epochs=1,
                    shuffle=False)
                test_results = main.emnist_classifier.evaluate(input_fn=test_input_fn)
                print("Loss %f and Accuracy %f with %d on training dateset" 
                      % (test_results["loss"], test_results["accuracy"], test_results["global_step"])) 
        
if __name__ == "__main__":
    tf.app.run()

  from ._conv import register_converters as _register_converters


COMP5212 Programming Project 2_CNN Model
{'momentum': 0.021, 'learning_rate': 0.01}
[INFO] Saving model to ./cnn_model/LR_0.01_MM_0.021
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_num_worker_replicas': 1, '_tf_random_seed': None, '_model_dir': './cnn_model/LR_0.01_MM_0.021', '_global_id_in_cluster': 0, '_session_config': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fca36fcae48>, '_is_chief': True, '_task_type': 'worker', '_save_checkpoints_steps': None, '_num_ps_replicas': 0, '_save_summary_steps': 100, '_service': None, '_task_id': 0, '_evaluation_master': '', '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_keep_checkpoint_max': 5, '_master': '', '_keep_checkpoint_every_n_hours': 10000}




*****************[PARAM] FOLD:200 BS:100 LR:0.010000 MM:0.021000
Number of folds 200 and Step size 150
0 th fold of training dataset
[INFO] training performance over times on training data
{'accuracy': 0.31125, 'global_step': 150, 'i': 0, 'loss': 3.9312284, 'e': 0}
1 th fold of training dataset
[INFO] training performance over times on training data
{'accuracy': 0.38985, 'global_step': 300, 'i': 1, 'loss': 3.3717484, 'e': 0}
2 th fold of training dataset


KeyboardInterrupt: 