In [361]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split, RandomizedSearchCV
import os
from datetime import datetime
from functools import partial
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Separating data in train data and validation data

Here, we used stratified train test split sklearn function to separate our data. It's defined only 10% of all data to validation data. Here we have experimented to predict 0-5 handwritten digits.

In [362]:
tf.reset_default_graph()

train_data = pd.read_csv("train.csv")

train_labels = train_data.iloc[:, 0].values
boolean_indices = train_labels < 5 
train_labels = train_labels[boolean_indices]

train_data = train_data.iloc[:, 1:].values
train_data = train_data[boolean_indices]
train_data = train_data.astype(np.float64)

print(train_data.shape)

train_data, test_data, train_labels, test_labels = train_test_split(train_data, train_labels,
                                                                    test_size=0.1, stratify=train_labels)

print(test_labels.shape)

(21416, 784)
(2142,)


In [363]:
class MNISTClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, layers=[100, 100, 100, 100, 100], batch_size=128, optimizer_class = tf.train.AdamOptimizer,
                 learning_rate=0.01, activation = tf.nn.elu, dropout_rate=None, momentum_batch=None, random_state=None):
        self.layers = layers
        self.optimizer_class = optimizer_class
        self.learning_rate = learning_rate
        self.activation = activation
        self.dropout_rate = dropout_rate
        self.momentum_batch = momentum_batch
        self.batch_size = batch_size
        self._session = None
        self._training = None
        
        if random_state is not None:
            tf.set_random_seed(random_state)
            np.random.seed(random_state)
        
    def _dnn(self, X):
        '''Here we build our dnn structure'''
        for index, n_layer in enumerate(self.layers):
            if self.dropout_rate:
                X = tf.layers.dropout(X, self.dropout_rate, training=self._training)

            X = tf.layers.dense(X, n_layer)
            if self.momentum_batch:
                X = tf.layers.batch_normalization(X, training=self._training, momentum=self.momentum_batch)
            X = self.activation(X, name="hidden%d" % (index))

        return X

    def _prepare_dataset(self, X_shape = None, y = None):
        
        '''Prepare our tensorflow dataset'''
        self._batch_size = tf.placeholder(tf.int64, shape=(), name="batch_size")
        self._X = tf.placeholder(tf.float64, shape=[None, X_shape[1]], name="X")
        self._y = tf.placeholder(tf.int64, shape=[None], name="y")

        def map_fn(data, labels):
            data = tf.math.divide(data, 255)
            return data, labels
        
        train_dataset = tf.data.Dataset.from_tensor_slices((self._X, self._y))
        val_dataset = tf.data.Dataset.from_tensor_slices((self._X, self._y))
        
        train_dataset = train_dataset.repeat().shuffle(X_shape[0])
        
        train_dataset = train_dataset.batch(self._batch_size)
        train_dataset = train_dataset.map(map_fn, num_parallel_calls = tf.data.experimental.AUTOTUNE)
        train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
        
        val_dataset = val_dataset.batch(self._batch_size)
        val_dataset = val_dataset.map(map_fn, num_parallel_calls = tf.data.experimental.AUTOTUNE)
        val_dataset = val_dataset.prefetch(tf.data.experimental.AUTOTUNE)
        
        self._handle = tf.placeholder(tf.string, shape = [])
        iterator = tf.data.Iterator.from_string_handle(self._handle, train_dataset.output_types, train_dataset.output_shapes)
        
        self._data, self._labels = iterator.get_next()
        
        self._train_val_iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
        self._train_iterator = self._train_val_iterator.make_initializer(train_dataset)
        self._val_iterator = val_dataset.make_initializable_iterator()
        

    def _build_graph(self, n_outputs):
        '''Building tensorflow graph'''
        
        self._training = tf.placeholder_with_default(True, shape=[], name="training")

        with tf.name_scope("build"):

            last_hidden_layer = self._dnn(self._data)

            logits = tf.layers.dense(last_hidden_layer, n_outputs, name="outputs")
            logits = tf.cast(logits, tf.float32)
            y_proba = tf.nn.softmax(logits, name="y_proba")

        with tf.name_scope("loss"):
            xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self._labels, logits=logits)
            loss = tf.reduce_mean(xentropy, name="loss")
            loss_summary = tf.summary.scalar("log_loss", loss)

        with tf.name_scope("train"):
            optimizer = self.optimizer_class(self.learning_rate)
            training_op = optimizer.minimize(loss)

        with tf.name_scope("eval"):
            correct = tf.nn.in_top_k(logits, self._labels, 1)
            accuracy = tf.reduce_mean(tf.cast(correct, tf.float64), name="accuracy")
            accuracy_summary = tf.summary.scalar("log_accuracy", accuracy)

        with tf.name_scope("init"):
            init = tf.global_variables_initializer()
            saver = tf.train.Saver()

        self._saver = saver
        self._accuracy, self._accuracy_summary = accuracy, accuracy_summary
        self._loss, self._loss_summary = loss, loss_summary
        self._training_op = training_op
        self._y_proba = y_proba
        self._init = init

    
    def _get_model_params(self):
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}
    
    def _restore_model_params(self, model_params):
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + "/Assign")
                      for gvar_name in gvar_names}
        
        init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict = feed_dict)
        
        
    def fit(self, X, y, n_epochs=10001, summary=False):

        def logdir(prefix=""):
            now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
            root_dir = "tf_logs/"
            return root_dir + prefix + "run-" + now
        
        
        n_inputs = X.shape
        self._classes = np.unique(y)
        n_outputs = len(self._classes)

        self._graph = tf.Graph()
        with self._graph.as_default():
            self._prepare_dataset(n_inputs)
            self._build_graph(n_outputs)
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            
            if summary:
                file_writer = tf.summary.FileWriter(logdir("MNIST"), tf.get_default_graph())

                
        best_loss = np.infty
        counter_early_stopping=0
        counter_max_value = 200

            
        self._session = tf.Session(graph=self._graph)
        with self._session.as_default() as sess:                                        
            sess.run(self._train_iterator, feed_dict={self._X: X, self._y: y,
                                                    self._training: True, self._batch_size : self.batch_size})

#                 if os.path.isfile(checkpoint_epoch_path):
#                     with open(checkpoint_epoch_path, "rb") as f:
#                         start_epoch = int(f.read())
#                     print("Foi interrompido! Parou no {} epoch".format(start_epoch))
#                     saver.restore(sess, checkpoint_path)

#                 else:
            start_epoch = 0
            sess.run(self._init)
            train_val_string = sess.run(self._train_val_iterator.string_handle())

            feed_dict = {self._handle: train_val_string}
            for epoch in range(start_epoch, n_epochs):
                _, _loss_summary, _accuracy_summary, loss_value, accuracy_value = sess.run([self._training_op,
                                                                                            self._loss_summary,
                                                                                            self._accuracy_summary,
                                                                                            self._loss, self._accuracy],
                                                                                            feed_dict=feed_dict)
                
                if extra_update_ops:
                    sess.run(extra_update_ops, feed_dict=feed_dict)
                
                if loss_value < best_loss:
                    best_params = self._get_model_params()
                    best_loss = loss_value
                    counter_early_stopping = 0
                    
                else:
                    counter_early_stopping += 1
                    if counter_early_stopping > counter_max_value:
                        print("Early Stopping!")
                        break

                if epoch % 10 == 0:
                    if summary:
                        file_writer.add_summary(_loss_summary, epoch)
                        file_writer.add_summary(_accuracy_summary, epoch)

                if epoch % 50 == 0:
                    accuracy_value = sess.run(self._accuracy, feed_dict=feed_dict)
                    print("Best Loss: {:.6f}\t\tTrain Score: {:.4f}".format(best_loss, accuracy_value))

            
            if best_params:
                self._best_params = best_params
                self._restore_model_params(self._best_params)
                
        return self
    
#fix this part   
    def score(self, X, y, method = accuracy_score):
        with self._session.as_default() as sess:
            
            train_val_string = sess.run(self._train_val_iterator.string_handle())
            
            sess.run(self._train_iterator, feed_dict={self._X: X, self._y: y,
                                                    self._training: False, self._batch_size: len(X)})
            
            _, y_proba, labels_val = sess.run([self._loss, self._y_proba, self._labels],
                                              feed_dict = {self._training: False, self._handle: train_val_string})
            
            y_pred = np.argmax(y_proba, axis = 1)
            
            return method(labels_val, y_pred)
####

    def predict(self, X):
         with self._session.as_default() as sess:
            
            test_string = sess.run(self._val_iterator.string_handle())
            
            sess.run(self._val_iterator.initializer, feed_dict={self._X: X, self._y: np.zeros((len(X))),
                                                                self._training: False,
                                                                self._batch_size: len(X),
                                                                self._handle: test_string})
    
            y_proba_val = self._y_proba.eval(feed_dict = {self._handle: test_string})
        
            return np.argmax(y_proba_val, axis=1)
        
    def save(self, path):
        self._saver.save(self._session, path)

In [364]:
# mnist = MNISTClassifier(random_state=42)

# mnist.fit(train_data, train_labels)
# accuracy = mnist.score(test_data, test_labels)
# # precision = mnist.score(test_data, test_labels, method = precision_score)
# print("Test accuracy score: {}".format(accuracy))
# # print("Test precision score: {}".format(precision))

In [365]:
# param_distribs = {
#     "layers": [[100, 100, 100, 100, 100], [512, 256, 128, 64, 32], [50, 50, 50, 50, 50], [80, 80, 80, 80, 80]],
#     "batch_size": [16, 32, 64, 128, 256, 512],
#     "learning_rate": [0.01, 0.02, 0.05, 0.1, 0.2],
#     "activation": [tf.nn.relu, tf.nn.elu],
#     "momentum_batch": [None, 0.9, 0.99, 0.999],
#     "dropout_rate": [None, 0.1, 0.2, 0.3, 0.4, 0.5],
#     "optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)]
# }

# clf = RandomizedSearchCV(MNISTClassifier(random_state=42), param_distribs, cv=3, n_iter=350, random_state=42)
# clf.fit(train_data, train_labels)

In [366]:
mnist = MNISTClassifier(momentum_batch=0.9, learning_rate=0.01, layers=[512, 256, 128, 64, 32], dropout_rate=0.1, batch_size=512, random_state=42, activation=tf.nn.relu)
mnist.fit(train_data, train_labels, summary=True)

Best Loss: 1.980831		Train Score: 0.7422
Best Loss: 0.013515		Train Score: 0.9805
Best Loss: 0.013515		Train Score: 0.9902
Best Loss: 0.006960		Train Score: 0.9902
Best Loss: 0.006960		Train Score: 0.9922
Best Loss: 0.004739		Train Score: 0.9980
Best Loss: 0.002659		Train Score: 0.9941
Best Loss: 0.002659		Train Score: 0.9980
Best Loss: 0.001070		Train Score: 0.9980
Best Loss: 0.001070		Train Score: 0.9961
Best Loss: 0.001070		Train Score: 1.0000
Best Loss: 0.001070		Train Score: 0.9922
Best Loss: 0.000607		Train Score: 0.9922
Best Loss: 0.000456		Train Score: 1.0000
Best Loss: 0.000456		Train Score: 0.9980
Best Loss: 0.000399		Train Score: 0.9980
Best Loss: 0.000399		Train Score: 0.9961
Best Loss: 0.000399		Train Score: 0.9922
Best Loss: 0.000399		Train Score: 0.9980
Early Stopping!


MNISTClassifier(activation=<function relu at 0x00000232819B9400>,
        batch_size=512, dropout_rate=0.1, layers=[512, 256, 128, 64, 32],
        learning_rate=0.01, momentum_batch=0.9,
        optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
        random_state=None)

In [369]:
accuracy = mnist.score(test_data, test_labels)
print(accuracy)

0.9869281045751634


In [370]:
indices = mnist.predict(train_data[0:60])
print(train_labels[0:60])
print(indices)

[3 2 0 2 1 1 2 0 2 4 4 3 4 1 4 1 4 4 4 2 1 3 2 1 3 0 4 3 3 0 1 2 2 1 2 0 2
 1 2 3 3 2 0 4 3 0 4 1 4 4 1 1 2 4 3 2 0 2 3 2]
[3 2 0 2 1 1 2 0 2 4 4 3 4 1 4 1 4 4 4 2 1 3 2 1 3 0 4 3 3 0 1 2 2 1 2 0 2
 1 2 3 3 2 0 4 3 0 4 1 4 4 1 1 2 4 3 2 0 2 3 2]
