##### Importing Necessary modules

In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

  from ._conv import register_converters as _register_converters


In [2]:
def reset_graph(seed=42):
    '''Function to reset the tensorflow graph to default'''
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [3]:
class Multi_RNN():
    
    '''Class that constructs Multiple Recurrent Neural Network'''
    
    strt_ind = 0; end_ind = 0 #Index values for batch processing
    
    def __init__(self, n_steps, n_neurons, n_inputs, n_out, lr, n_layers):
        
        #Setting the hyper parameters from the user
        self.n_steps = n_steps
        self.n_neurons = n_neurons
        self.n_inputs = n_inputs
        self.n_out = n_out
        self.lr = lr
        self.n_layers = n_layers
        
        #Initializing default train and test variables
        self.X_train, self.X_test, self.y_train,self.y_test = None,None,None,None
        
        
        #Reading the data into a data frame
        self.df = pd.read_csv('C:\\Users\\amith\\Downloads\\data.csv')
        
        
        self.df = self.df.dropna(axis=1)
        
        
        #Encoding the target columns which is in string format to a binary format
        self.df['diagnosis'] = self.df['diagnosis'].astype('category')
        self.df['diag_cat'] = self.df['diagnosis'].cat.codes
        
        
        #Initializing standard scaler to normalize the input data
        self.s = StandardScaler()        
        
        
        #Fit and transform the input data by normal scaling
        x_vals = self.s.fit_transform(self.df.drop(['id', 'diagnosis', 'diag_cat'], axis=1))
        
        
        #Create a new data frame for independent variables
        X_Vals = pd.DataFrame(x_vals, columns=[ self.df.columns[2:32] ])
        y_Vals = self.df['diag_cat']
        
        
        #Choose training size to split the data
        self.training_size = float(input('Choose a training size in decimal '))
                
        
        #Split the data in to train and test variables
        self.X_train,self.X_test, self.y_train,self.y_test = train_test_split(X_Vals, y_Vals, train_size = self.training_size)
     
    def next_batch(self,iteration,batch_size):
        
        '''Returns batch of feature and target variables based on index values :
        
        inputs:
        -> Iteration: index of i-th iteration of training
        
        -> batch_size: the batch size specified
        
        -> return type: data frame'''
        
        global strt_ind, end_ind
         
        
        if iteration==0:
            
            strt_pos = 0 
            end_pos = batch_size
            
            
            X_batch = self.X_train[strt_pos:end_pos]
            y_batch = self.y_train[strt_pos:end_pos]
            
        else:
            
            strt_pos = iteration * batch_size
            end_pos = strt_pos + batch_size
            
            
            
            X_batch = self.X_train[strt_pos:end_pos]
            y_batch = self.y_train[strt_pos:end_pos]
    
    
        
        return X_batch, y_batch
    
    def Construct_RNN(self):
        
        '''This function constructs a Recurrent neural network  with the hyper parameters provided and internally calls 
        the train method to train the data.
        
        return : None'''
        
        #Reset the graph if already exists
        reset_graph()
        
        print('Constructing a Recurrent neural network with {} cells and {} neurons in each cell'.format(self.n_layers,self.n_neurons), '\n')
        
        # Place holders for X and y
        self.X = tf.placeholder(tf.float32, shape=[None, self.n_steps, self.n_inputs])
        
        self.y = tf.placeholder(tf.int32, shape=[None])
        
        
        #Set a training layer
        self.training = tf.placeholder_with_default(False, shape=(), name='training')

        self.dropout_rate = float(input('Please choose a drop out rate \n'))
        
        with tf.name_scope("RecNN"): 
            
            #Probability of dropping a neuron in each cell
            self.X_drop = tf.layers.dropout(self.X, self.dropout_rate,training=self.training)

            #Creating multiple Recurrent Neural Network cells
            cells = [tf.contrib.rnn.BasicRNNCell(num_units=self.n_neurons, activation=tf.nn.tanh)
                    for layer in range(self.n_layers)]

            #Construction of a Multiple Recurrent Neural Network Cell
            multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells,state_is_tuple=False)

            #Copy the output and states of an unrolled cell
            self.outputs,self.states = tf.nn.dynamic_rnn(multi_layer_cell,self.X_drop, dtype=tf.float32)

            
            #Heinitializer
            he_init = tf.contrib.layers.variance_scaling_initializer()
            
            #Create a densely connected recurrent neural network layer
            self.logits = tf.layers.dense(self.states,self.n_out, kernel_initializer=he_init)
            
        with tf.name_scope("loss"):    
            xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = self.y, logits = self.logits)

            #Specify the loss function
            self.loss = tf.reduce_mean(xentropy)
        
        with tf.name_scope("train"):
            
            #Choosing an Adam Optimizer
            optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
            
            #Minimize the training loss
            self.train_op = optimizer.minimize(self.loss)
        
        with tf.name_scope("eval"):
            
            #Choosing the top most prediction from the probability of values
            correct = tf.nn.in_top_k(self.logits, self.y,1)
            
            self.acc = tf.reduce_mean(tf.cast(correct, tf.float32))

        
        #Initializing the global variables
        self.init = tf.global_variables_initializer()
        
        #Save the model to disk
        self.saver = tf.train.Saver()
        
        #Train the constructed model
        self.train_it()
    
    def train_it(self):
        
        '''This function trains the model which was constructed before and internally calls the test method
        
        return type: None'''
        
        # Taking the no.of rows and columns of the data frame
        r,c = self.df.shape
        
        
        ep = int(input('Choose no. of epochs '))
        b_s = int(input('Choose batch size '))
        
        print('Training a Recurrent neural network for {} epochs '.format(ep), '\n')
        
        #Calculating the total training size
        total_training_size = int(r * self.training_size)
        
        with tf.Session() as sess:
            self.init.run()
            
            for epoch in range(ep):
                
                for it in range(total_training_size // b_s):
                    
                    #Fetching next batch of inputs
                    X_b, y_b = self.next_batch(it,b_s)
                    
                    #Reshaping the batch of inputs to a 3D tensor of shape [batch_size, no. of steps, no. of inputs]
                    X_batch = np.array(X_b).reshape((-1, self.n_steps,self.n_inputs))
                    
                    
                    #Running the training session
                    sess.run([self.train_op, self.loss], 
                                        feed_dict={self.training: True,self.X: X_batch, self.y: y_b})
                    
                #Predicting the accuracy of the training session
                acc_t = self.acc.eval(feed_dict={self.X: X_batch, self.y: y_b})
                
                
                if epoch % 100 == 0:
                    print(epoch, 'Train accuracy: ', acc_t)
                    
                          
    
            #Saving the model 
            save_path = self.saver.save(sess, "./cancer_tf_test.ckpt")
        
        #Testing the model
        self.test_it()
        
    def test_it(self):
        
        '''Tests the model on testing set'''
        
        with tf.Session() as sess:
            
            #invoking the saved model
            self.saver.restore(sess, "./cancer_tf_test.ckpt")
            
            #Testing the model on test set 
            Z = self.logits.eval(feed_dict={self.X: np.array(self.X_test).reshape((-1,self.n_steps,self.n_inputs))  })
            
            #Fetching the top predictions from evaluations
            y_pred = np.argmax(Z, axis=1)
            
            
            #Various accuracy metrics
            print('Accuracy of the predictions \n', accuracy_score(y_true= self.y_test, y_pred=y_pred), '\n')
            
            print('First 10 Predictions ', y_pred[0:10], '\n', 'First 10 Actual values ', np.array(self.y_test[0:10]), '\n')
            
            print('Classification report \n' , classification_report(y_true= self.y_test, y_pred= y_pred))   
            
    def main(self):
        
        '''Main Function'''
        
        self.Construct_RNN()
        
if __name__ == '__main__':
    Multi_RNN(30,100,1,2,0.001, 3).main()

Choose a training size in decimal 0.8




Constructing a Recurrent neural network with 3 cells and 100 neurons in each cell 

Please choose a drop out rate 
0.15
Choose no. of epochs 800
Choose batch size 20
Training a Recurrent neural network for 800 epochs  

0 Train accuracy:  0.95
100 Train accuracy:  1.0
200 Train accuracy:  1.0
300 Train accuracy:  1.0
400 Train accuracy:  1.0
500 Train accuracy:  1.0
600 Train accuracy:  1.0
700 Train accuracy:  1.0
INFO:tensorflow:Restoring parameters from ./cancer_tf_test.ckpt
Accuracy of the predictions 
 0.9649122807017544 

First 10 Predictions  [1 0 0 1 0 0 0 0 0 1] 
 First 10 Actual values  [1 0 0 1 0 0 0 0 0 1] 

Classification report 
              precision    recall  f1-score   support

          0       0.96      0.99      0.97        75
          1       0.97      0.92      0.95        39

avg / total       0.97      0.96      0.96       114



###### Precision: Of all the values which were predicted as 0/Benign; 96% of them were correctly predicted as Benign's and 4% of them were wrongly predicted as Benign's. Similarly, for 1's or Malign's

###### Recall: Of the all the values which were actually 0/Benign; the model was able to CATCH 99% of them correctly. It wasn't able to properly identify 1% of values as Benign. Similarly for 1/Malign.