In [65]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import os
from datetime import datetime
from functools import partial

In [66]:
layers = [100, 100, 100, 100, 100, 5]
learning_rate = 0.01
n_epochs = 10001
BATCH_SIZE = 128
n_inputs = 28*28

In [67]:
tf.reset_default_graph()

train_data = pd.read_csv("train.csv")

train_labels = train_data.iloc[:, 0].values
boolean_indices = train_labels < 5 
train_labels = train_labels[boolean_indices]

train_data = train_data.iloc[:, 1:].values
train_data = train_data[boolean_indices]

print(train_data.shape)

train_data, test_data, train_labels, test_labels = train_test_split(train_data, train_labels, test_size=0.1, stratify=train_labels)

(21416, 784)


In [68]:
batch_size = tf.placeholder(tf.int64, name="batch_size")
training = tf.placeholder_with_default(False, shape=[], name="training")
X = tf.placeholder(tf.float64, shape=[None, n_inputs], name="X")
y = tf.placeholder(tf.int64, shape=[None], name="y")

def map_fn(data, labels):
    data = tf.math.divide(data, 255)
    return data, labels

dataset = tf.data.Dataset.from_tensor_slices((X, y))

if training is not None:
    dataset = dataset.repeat().shuffle(len(train_data))
    
dataset = dataset.batch(batch_size)
dataset = dataset.map(map_fn, num_parallel_calls = 6)
dataset = dataset.prefetch(1)

iteration = dataset.make_initializable_iterator()
data, labels = iteration.get_next()

In [69]:
with tf.name_scope("dnn"):
#     input_x = keras.Input(data.get_shape().as_list())
#     X = layers.Dense(layers[0], activation="relu")(input_x)
#     X = layers.Dense(layers[1], activation="relu")(X)
#     X = layers.Dense(layers[2], activation="relu")(X)
#     X = layers.Dense(layers[3], activation="relu")(X)
#     logits = layers.Dense(layers[4], activation="softmax")(X)
    my_model = partial(tf.layers.dense, activation=tf.nn.elu)

    hidden1 = my_model(data, layers[0], name="hidden1")
    hidden2 = my_model(hidden1, layers[1], name="hidden2")
    hidden3 = my_model(hidden2, layers[2], name="hidden3")
    hidden4 = my_model(hidden3, layers[3], name="hidden4")
    hidden5 = my_model(hidden4, layers[4], name="hidden5")
    logits = tf.layers.dense(hidden5, layers[5], name="outputs")
    logits = tf.cast(logits, tf.float32)

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar("log_loss", loss)

with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, labels, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float64), name="accuracy")
    accuracy_summary = tf.summary.scalar("log_accuracy", accuracy)

with tf.name_scope("init"):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

In [70]:
def logdir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_dir = "tf_logs/"
    return root_dir + prefix + "run-" + now

In [71]:
checkpoint_path = "/tmp/deep_mnist_dnn.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
checkpoint_final_model = "./tmp/deep_mnist_dnn.ckpt"

file_writer = tf.summary.FileWriter(logdir("MNIST"), tf.get_default_graph())
best_loss = np.infty
counter_early_stopping=0
counter_max_value = 400


with tf.Session() as sess:
    sess.run(iteration.initializer, feed_dict={X: train_data, y: train_labels, training: True, batch_size: BATCH_SIZE})
    
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Foi interrompido! Parou no {} epoch".format(start_epoch))
        saver.restore(sess, checkpoint_path)
        
    else:
        start_epoch = 0
        sess.run(init)
    
    for epoch in range(start_epoch, n_epochs):
        _, _loss_summary, _accuracy_summary, loss_value = sess.run([training_op, loss_summary, accuracy_summary, loss])
        
        if loss_value < best_loss:
            best_loss = loss_value
            counter_early_stopping=0
        else:
            counter_early_stopping+=1
            if counter_early_stopping > counter_max_value:
                print("Early Stopping!")
                break
        
        if epoch % 10 == 0:
            file_writer.add_summary(_loss_summary, epoch)
            file_writer.add_summary(_accuracy_summary, epoch)
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
                
        if epoch % 200 == 0:
            accuracy_value = sess.run(accuracy)
            print("Best Loss: {:.6f}\t\tTrain Score: {:.4f}".format(best_loss, accuracy_value))
    
    accuracy_train = sess.run(accuracy)
    saver.save(sess, checkpoint_final_model)
    
    sess.run(iteration.initializer, feed_dict={X: test_data, y: test_labels, batch_size: BATCH_SIZE})
    accuracy_val = sess.run(accuracy)
    print("Train Accuracy Score: {}\nValidation Accuracy Score: {}".format(accuracy_train, accuracy_val))
    
    os.remove(checkpoint_epoch_path)

Best Loss: 1.678217		Train Score:0.4062
Best Loss: 0.002999		Train Score:0.9766
Best Loss: 0.002957		Train Score:0.9688
Best Loss: 0.001623		Train Score:0.9922
Best Loss: 0.000670		Train Score:1.0000
Best Loss: 0.000265		Train Score:0.9922
Best Loss: 0.000070		Train Score:0.9688
Best Loss: 0.000070		Train Score:0.9922
Early Stopping!
Train Accuracy Score: 0.9921875
Validation Accuracy Score: 0.9765625


In [72]:
# test_data = pd.read_csv("test.csv")
# test_data = test_data.values

# with tf.Session() as sess:
#     saver.restore(sess, checkpoint_path)
#     sess.run(iter.initializer, feed_dict={X_train: test_data, y_train: test_data[0], batch_size: len(test_data)})
#     logits = sess.run(logits)
#     y_pred = tf.argmax(logits, axis=1)