# Welcome to Import Data Tensorflow Snippets¶

### import statements

import pandas as pd
import numpy as np
import tensorflow as tf
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.keras.datasets import imdb
from tensorflow.python.keras.preprocessing import sequence
import tempfile

### Print Versions

In [6]:
print('tensorflow version ',tf.__version__)
print('pandas version ',pd.__version__)
print('numpy version ', np.__version__)

tensorflow version  1.12.0
pandas version  0.23.4
numpy version  1.15.4


### IMDB Data Gathering Class

In [9]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.keras.datasets import imdb
from tensorflow.python.keras.preprocessing import sequence
import tempfile

class movieReviewData():

#vocab_size = 5000
#embedding_size = 50
#sentence_size=200
#start_id = 1
#oov_id = 2
#index_offset = 2

    def __init__(self):
        
        self.vocab_size = 5000
        self.start_id = 1
        self.oov_id = 2
        self.index_offset = 2
        self.sentence_size = 200
        
        model_dir = tempfile.mkdtemp()  
        
        print("Loading data...")
        (self.x_train_variable, self.y_train), (self.x_test_variable, self.y_test) = imdb.load_data(
            num_words=self.vocab_size, start_char=self.start_id, oov_char=self.oov_id,
            index_from=self.index_offset)
        
        self.x_train = 0
        self.x_test = 0
        
        print(len(self.y_train), "train sequences")
        print(len(self.y_test), "test sequences")
        
        
        
    def preProcessing(self):
        '''
            Description: 
                - Load data
                - Convert from text to index
                - 0 post-padding 
            Usage:
        '''
        
#        sentence_size = 200
    #    embedding_size = 50
    
        
        # we assign the first indices in the vocabulary to special tokens that we use
        # for padding, as start token, and for indicating unknown words
        
        pad_id = 0
    
        print("Pad sequences (samples x time)")
        self.x_train = sequence.pad_sequences(self.x_train_variable, 
                                         maxlen=self.sentence_size,
                                         truncating='post',
                                         padding='post',
                                         value=pad_id)
        self.x_test = sequence.pad_sequences(self.x_test_variable, 
                                        maxlen=self.sentence_size,
                                        truncating='post',
                                        padding='post', 
                                        value=pad_id)
        
        print("x_train shape:", self.x_train.shape)
        print("x_test shape:", self.x_test.shape)
        

    
    def convert2Text(self,pad_id,oov_id,start_id,index_offset):
        '''
            Description: covert index to text
            Usage:
        '''
        word_index = imdb.get_word_index()
        word_inverted_index = {v + index_offset: k for k, v in word_index.items()}
        
        # The first indexes in the map are reserved to represent things other than tokens
        word_inverted_index[pad_id] = '<PAD>'
        word_inverted_index[start_id] = '<START>'
        word_inverted_index[oov_id] = '<OOV>'
        
        for i in range(0, 10):
          print(i, word_inverted_index[i])
          
        def index_to_text(indexes):
            return ' '.join([word_inverted_index[i] for i in indexes])
        
        print(index_to_text(self.x_train_variable[0]))

### Obtain data from above class

In [14]:
# Load data
data = movieReviewData()

# Preprocessing data
data.preProcessing()

vocab_size = data.vocab_size
embedding_size = 50
sentence_size = data.sentence_size

#     Prepare data
x_train = data.x_train
x_test = data.x_test

y_train = data.y_train
y_test  = data.y_test

Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 200)
x_test shape: (25000, 200)


### Custom Model function, holding RNN + LSTM Cells which is fed into Tensorflow Estimator

In [13]:
def LSTM_model_fn(features, labels, mode):
    inputs = tf.contrib.layers.embed_sequence(
            features['x'],vocab_size,embed_dim=embedding_size,
            initializer=tf.random_uniform_initializer(-1.0,-1.0))
    
    # create an LSTM cell of size 100
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(100)
    
    sequence_length = tf.count_nonzero(features['x'], 1)
    
    _, final_states = tf.nn.dynamic_rnn(
        lstm_cell, inputs, sequence_length = sequence_length, dtype=tf.float32)
    
    outputs = final_states.h   
    logits = tf.layers.dense(inputs=outputs, units=1)
    
    if labels is not None: # vertical array
        labels = tf.reshape(labels, [-1, 1])

    predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "next": tf.round(tf.sigmoid(logits)),
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      "probabilities": tf.sigmoid(logits, name="sigmoid_tensor")
      }

    # Prediction
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode,predictions=predictions["next"])
    
    loss = tf.losses.sigmoid_cross_entropy(labels,logits)
    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(
                loss=loss,
                global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
    
    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
            "accuracy": tf.metrics.accuracy(
                    labels=labels, predictions=predictions["next"])}
    return tf.estimator.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [15]:
def parser(x, y):
    '''
        Description: 
        Usage:
    '''
    
    features = {"x": x}
#    y_ = {"next":y}
    return features, y

In [16]:
def train_input_fn(x_train,y_train,batch_size):
    '''
        Description: 
        Usage:
    '''
    dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train))
    dataset = dataset.shuffle(1000).batch(batch_size).map(parser).repeat()
    iterator = dataset.make_one_shot_iterator()
    return iterator.get_next()

In [17]:
def eval_input_fn(x_train,y_train,batch_size):
    '''
        Description: 
        Usage:
    '''
    dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train))
    dataset = dataset.batch(batch_size).map(parser)
    iterator = dataset.make_one_shot_iterator()
    return iterator.get_next()  

In [18]:
def serving_input_receiver_fn():
    """
        Description: This is used to define inputs to serve the model.
        Usage:
        return: ServingInputReciever
        Ref: https://www.tensorflow.org/versions/r1.7/api_docs/python/tf/estimator/export/ServingInputReceiver
    """

    reciever_tensors = {
        # The size of input sentence is flexible.
        "sentence":tf.placeholder(tf.int32, [None, 1])
    }
    
  
    features = {
        # Resize given images.
        "x": tf.reshape(reciever_tensors["sentence"], [200, 1])
    }

    return tf.estimator.export.ServingInputReceiver(receiver_tensors=reciever_tensors,
                                                    features=features)

In [19]:
def main(unused_argv):
    # Create the Estimator
    RNN_classifier = tf.estimator.Estimator(model_fn=LSTM_model_fn, model_dir= FLAGS.model_dir)
    
    tensors_to_log = {"probabilities": "sigmoid_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)
    
    STEP_SIZE = int(FLAGS.step_size)
    
    RNN_classifier.train(
        input_fn= lambda: train_input_fn(x_train,y_train,batch_size=100),
        steps=STEP_SIZE,
        hooks=[logging_hook])  

    eval_results = RNN_classifier.evaluate(
       input_fn = lambda: eval_input_fn(x_test,y_test,batch_size=100))
    
    print(eval_results)
    
    # Save the model
    RNN_classifier.export_savedmodel(FLAGS.saved_dir, serving_input_receiver_fn=serving_input_receiver_fn)

if __name__ == "__main__":
    tf.app.run()

NameError: global name 'FLAGS' is not defined