[View in Colaboratory](https://colab.research.google.com/github/hamil168/Chatbots/blob/master/Seq2Seq.ipynb)

### TODO:
* Early stopping
* Aggregate CV loss/acc into arrays for early stopping
* refactor data processing (padding, bucketing)
* create config file

### Completed:
*  Train test split, fixed random seed
*  Validation scores to training loop
*  Determine CV loss/acc calculations 

#### distant todo:
* beamsearch decoder

In [None]:
# For a fresh Colab instance, clone fresh:
#!pip install -q xlrd
#!git clone https://github.com/hamil168/Chatbots

In [None]:
# Change to Colab directory:
#cd Chatbots/

In [None]:
# For an existing Colab instance, pull from master, uncomment this:

#!git pull https://github.com/hamil168/Chatbots master

In [None]:
# Files as they appear in the repo clone
#ls


In [None]:
#!pip install tqdm   ### use later when in .py files

In [None]:
# Installs needed for the local virtual environment
#!pip install pandas
#!pip install time
#!pip install re
#!pip install sklearn
#!pip install scipy

In [1]:
import numpy as np
import tensorflow as tf
import time
import re

from tensorflow.python.layers.core import Dense
#from tqdm import tqdm    ### use later when in .py files

from sklearn.cross_validation import train_test_split

global graph1, model

graph1 = tf.get_default_graph()



In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 5918008111785651249
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 3177234432
locality {
  bus_id: 1
  links {
  }
}
incarnation: 6471722183456670414
physical_device_desc: "device: 0, name: GeForce GTX 970, pci bus id: 0000:01:00.0, compute capability: 5.2"
]


In [3]:
#my preproc.py
from preproc import *

In [4]:
lines = open('movie_lines.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')
conversations = open('movie_conversations.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')

In [5]:
id2l, cid, questions, answers, clean_questions, clean_answers, word2count, sorted_clean_questions, sorted_clean_answers = preproc_steps(lines,conversations)

questionswords2int, answerswords2int, tokens = map_questions_and_answers_to_integers(word2count)

In [6]:
# PREPROC LENGTH LIMITER CHECK:          ######### NOTE TO SELF: Do we go to length 25 what? <EOS consideration>
# this should be snipped at len() = MAX_LENGTH (25 for testing); it is 550+ otherwise
"""max_sca = max([len(s) for s in sorted_clean_answers])
print(max_sca)
max_idx = [i for i in range(len(sorted_clean_answers)) if len(sorted_clean_answers[i]) == max_sca]
print(sorted_clean_answers[max_idx[0]]) 
print(len(answers), len(sorted_clean_answers))"""

'max_sca = max([len(s) for s in sorted_clean_answers])\nprint(max_sca)\nmax_idx = [i for i in range(len(sorted_clean_answers)) if len(sorted_clean_answers[i]) == max_sca]\nprint(sorted_clean_answers[max_idx[0]]) \nprint(len(answers), len(sorted_clean_answers))'

In [6]:
# Create placeholder for inputs and the targets
# in TF, all variables are tensors
# need to go from NP --> TF tensors
# need placeholders for every TF variables inputs and targets

def model_inputs():
  #inputs and targets are 2D matrices
  inputs = tf.placeholder(tf.int32, [None, None], name = 'inputs') 
  targets = tf.placeholder(tf.int32, [None, None], name = 'targets')
  keep_prob = tf.placeholder(tf.float32, name = 'dropout_rate') #dropout
  
  #lr = tf.placeholder(tf.float32, name = 'learning_rate')
  
  encoder_sequence_length = tf.placeholder(tf.int32, (None, ), name='encoder_sequence_length')
  decoder_sequence_length = tf.placeholder(tf.int32, (None, ), name='decoder_sequence_length')
  max_sequence_length = tf.reduce_max(decoder_sequence_length, name='max_sequence_length')
  
  return inputs, targets, keep_prob, encoder_sequence_length, decoder_sequence_length, max_sequence_length

In [7]:
# Create encoder RNN layer
def encoder_rnn(rnn_inputs, rnn_size, num_layers, 
                encoder_sequence_length, keep_prob, encoder_embedding_size, encoder_word_count):
  
  
  # LSTM cell class
  # rnn_size: number of input tensors
  # sequence_length: length of each question in the atch
  
  
  def cell(units, rate):
    layer = tf.contrib.rnn.BasicLSTMCell(units)
    return tf.contrib.rnn.DropoutWrapper(layer, rate)

  encoder_cell_fw = tf.contrib.rnn.MultiRNNCell([cell(rnn_size, keep_prob) for _ in range(num_layers)])
  #encoder_cell_bw = tf.contrib.rnn.MultiRNNCell([cell(rnn_size, keep_prob) for _ in range(num_layers)])

  encoder_embeddings = tf.contrib.layers.embed_sequence(rnn_inputs, encoder_word_count, encoder_embedding_size)
  
  # bidirection rnn function (creates dynamic bidirectional network)
  # builds independent forward and backward rnn
  # need ot make sure the ends match
  # (first element is encoder_output)
  #encoder_outputs, encoder_states = tf.nn.bidirectional_dynamic_rnn(encoder_cell_fw,
  encoder_outputs, encoder_states = tf.nn.dynamic_rnn(encoder_cell_fw,                                                                    
                                                   #cell_bw = encoder_cell_bw,
                                                   inputs = encoder_embeddings,
                                                   sequence_length = encoder_sequence_length,
                                                   dtype = tf.float32)
  
  return encoder_outputs, encoder_states


In [8]:
# preprocessing the targets
# need batches, 
# need each to start with <SOS> token

def preprocess_decoder_inputs(targets, word2int_dict, batch_size):
  """

  Prepares the decoder inputs (i.e. the 'targets') for use
     
     Inputs: 
        targets: the input for the decoder for training.
        word2int_dict: one of the dictionaries used to map a word to its integer
        batch_size: size of each batch for model training
        
     Outputs:
        preprocessed_targets: the processed version of the decoder inputs
  
  """
  
  # Using <SOS> for "start of string", create a tensor with one per batch element
  left_side = tf.fill([batch_size, 1], word2int_dict['<SOS>'])
    
  # Take the targets and remove the last member of each sample (it is blank)
  right_side = tf.strided_slice(targets, [0,0], [batch_size, -1], [1,1])
  
  # Add the <SOS> to the left side of every target phrase
  return tf.concat([left_side, right_side], 1)

### Attention
- (warning for later, when I add Beam Search) **NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped in
`AttentionWrapper`
- will also need to return here with DeviceWrapper for multiple GPUs

In [9]:
def decoder(decoder_inputs, encoder_state, decoder_cell, decoder_embedding_size,
            vocabulary_size, decoder_sequence_length, max_sequence_length,
            word2id_dict, batch_size):
  
  
  embedding_layer = tf.Variable(tf.random_uniform([vocabulary_size, decoder_embedding_size]))
  embeddings = tf.nn.embedding_lookup(embedding_layer, decoder_inputs)
  
  output_layer = Dense(vocabulary_size, kernel_initializer=tf.truncated_normal_initializer(0.0, 0.1))
    
  with tf.variable_scope('decoder'):
  
    train_helper = tf.contrib.seq2seq.TrainingHelper(embeddings, sequence_length = decoder_sequence_length)

    train_decoder = tf.contrib.seq2seq.BasicDecoder(cell = decoder_cell,
                                             helper = train_helper,
                                             initial_state = encoder_state, 
                                                    output_layer = output_layer)


    # returns (final_outputs, final_state, final_sequence_lengths)
    train_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder = train_decoder,
                                                             impute_finished = True,
                                                             maximum_iterations = max_sequence_length)
    ###########################
    #decoder_output_dropout is handled in a attention wrapper function outside of this functinon                                                                
                                                   
         
  with tf.variable_scope('decoder', reuse=True):
  
    starting_id_vector = tf.tile(tf.constant([word2id_dict['<SOS>']], dtype=tf.int32), [batch_size], name = 'starting_id_vector')                                               
                                                   
    infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding_layer, 
                                                            starting_id_vector,
                                                           word2id_dict['<EOS>'])                                                   

    infer_decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell,
                                                    helper = infer_helper,
                                                    initial_state = encoder_state,
                                                    output_layer=output_layer)


    # returns (final_outputs, final_state, final_sequence_lengths)
    infer_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(infer_decoder,
                                                             impute_finished = True,
                                                             maximum_iterations = max_sequence_length)
    
                                                   
  return train_decoder_output, infer_decoder_output
                                                   
                                                   
                                              
  

In [10]:

def attention_mechanism(rnn_size, keep_prob, encoder_outputs, encoder_states, encoder_sequence_length, batch_size):
  
  
  def cell(units, probs):
    layer = tf.contrib.rnn.BasicLSTMCell(units)
    return tf.contrib.rnn.DropoutWrapper(layer, probs)
  
  decoder_cell = cell(rnn_size, keep_prob)
  
  attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(rnn_size, 
                                                            encoder_outputs,
                                                            encoder_sequence_length)
  
  decoder_cell_wrapped = tf.contrib.seq2seq.AttentionWrapper(decoder_cell,
                                                            attention_mechanism,
                                                            rnn_size / 2)
  
  attention_ought = decoder_cell_wrapped.zero_state(batch_size = batch_size, dtype = tf.float32)
  
  encoder_state_new = attention_ought.clone(cell_state = encoder_states[-1])
  
  return decoder_cell_wrapped, encoder_state_new
  


In [11]:
def optimizer_loss(outputs, targets, decoder_sequence_length, max_sequence_length, learning_rate, clip_rate):
    '''
	
		Function used to define optimizer and loss function
		Inputs:
			outputs - outputs got from decoder part of the network
			targets - expected outputs/ labels
			dec_seq_len -
			max_seq_len - 
			learning_rate - small nubmer used to decrease value of gradients used to update our network
			clip_rate - tolerance boundries for clipping gradients
		Outputs:
			loss -
			trained_opt - optimizer with clipped gradients
    '''
    logits = tf.identity(outputs.rnn_output)
    
    mask_weights = tf.sequence_mask(decoder_sequence_length, max_sequence_length, dtype=tf.float32)
    
    with tf.variable_scope('opt_loss'):
        #using sequence_loss to optimize the seq2seq model
        loss = tf.contrib.seq2seq.sequence_loss(logits, 
                                                targets, 
                                                mask_weights)
        
        #Define optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate)

        #Next 3 lines used to clip gradients {Prevent gradient explosion problem}
        gradients = tf.gradients(loss, tf.trainable_variables())
        clipped_grads, _ = tf.clip_by_global_norm(gradients, clip_rate)
        trained_opt = optimizer.apply_gradients(zip(clipped_grads, tf.trainable_variables()))
        
    return loss, trained_opt


In [12]:
class Seq2Seq_Model(object):
    
    def __init__(self, learning_rate, batch_size, encoder_embedded_size, decoder_embedded_size, rnn_size, 
                 number_of_layers, vocab_size, word2id_dict, clip_rate):
        
        #tf.reset_default_graph()
        
        self.inputs, self.targets, self.keep_prob, self.encoder_sequence_length, self.decoder_sequence_length, max_sequence_length = model_inputs()
        
        
        enc_outputs, enc_states = encoder_rnn(self.inputs, 
                                          rnn_size,
                                          number_of_layers, 
                                          self.encoder_sequence_length, 
                                          self.keep_prob, 
                                          encoder_embedded_size, 
                                          vocab_size)
        
        dec_inputs = preprocess_decoder_inputs(self.targets, 
                                                  word2id_dict, 
                                                  batch_size)
        
        
        decoder_cell, encoder_states_new = attention_mechanism(rnn_size, 
                                                          self.keep_prob, 
                                                          enc_outputs, 
                                                          enc_states, 
                                                          self.encoder_sequence_length, 
                                                          batch_size)
        
        train_outputs, inference_output = decoder(dec_inputs, 
                                                  encoder_states_new, 
                                                  decoder_cell,
                                                  decoder_embedded_size, 
                                                  vocab_size, 
                                                  self.decoder_sequence_length, 
                                                  max_sequence_length, 
                                                  word2id_dict, 
                                                  batch_size)
        
        self.predictions  = tf.identity(inference_output.sample_id, name='preds')
        
        self.loss, self.opt = optimizer_loss(train_outputs, 
                                       self.targets, 
                                       self.decoder_sequence_length, 
                                       max_sequence_length, 
                                       learning_rate, 
                                       clip_rate)

In [13]:
def get_accuracy(target, logits):
    """
    Calculate accuracy
    """
    max_seq = max(target.shape[1], logits.shape[1])
    if max_seq - target.shape[1]:
        target = np.pad(
            target,
            [(0,0),(0,max_seq - target.shape[1])],
            'constant')
    if max_seq - logits.shape[1]:
        logits = np.pad(
            logits,
            [(0,0),(0,max_seq - logits.shape[1])],
            'constant')

    return np.mean(np.equal(target, logits))

In [14]:
# Fxn to split data itno batches for batch gradient descent                                            
def split_into_batches(questions, answers, batch_size):
  padded_questions_in_batch = []
  padded_answers_in_batch = []
  final_question_batches = []
  final_answer_batches = []
  
  for batch_index in range(0,len(questions) // batch_size):
      start_index = batch_index * batch_size
                                            
      questions_in_batch = questions[start_index : start_index + batch_size]
      answers_in_batch = answers[start_index : start_index + batch_size]
      
                         
      final_question_batches.append(questions_in_batch)
      final_answer_batches.append(answers_in_batch)
       
  return final_question_batches, final_answer_batches
   

In [15]:
# Next up: hyper parameters
epochs = 10 #100
batch_size = 64  #64 make bigger to make faster
rnn_size = 64 # 512
num_layers = 3  #3
encoding_embedding_size = 64 #512  # 512 col in embedding matrix
decoding_embedding_size = 64 #512
learning_rate = 0.05 # 0.01
learning_rate_decay = 0.9
min_learning_rate = 0.003 #0.0001
keep_prob = 0.5
keep_probability = 0.5  # based on hinton paper '14'
clip= 5

In [16]:
# At this point, these MUST be equal
print(len(sorted_clean_questions), len(sorted_clean_answers))

203602 203602


In [None]:
#max([len(a) for b in padded_train_A_batches for a in b])

In [None]:
#np.array([sequence + questionswords2int['<PAD>']] * (25 - len(sequence)) for sequence in sorted_clean_answers])

In [None]:
#print(np.asarray(padded_train_A_batches)[0].shape)
#print(np.asarray(padded_train_A_batches)[0][1].shape)

In [None]:
# Onesuch error is (299, 20, 69xxx) ... 20 batches of 299 "words" of 69xxx possible words
print(np.asarray(padded_val_Q_batches).shape)
print(np.asarray(padded_val_A_batches).shape)

In [None]:
# If needed during testing
session.close()

In [None]:
# instantiate the Seq2Seq model using graph1
# starts with resetting graph1 for debugging purposes

tf.reset_default_graph()
graph1 = tf.get_default_graph()

with graph1.as_default():
  
  model = Seq2Seq_Model(learning_rate, batch_size, encoding_embedding_size, 
                        decoding_embedding_size,
                        rnn_size, num_layers, len(word2count), 
                        questionswords2int, clip)
  

In [None]:
padded_train_Q_batches = padded_train_Q_batches[0:999]
padded_train_A_batches = padded_train_Q_batches[0:999]

In [None]:
# establish session using graph1

session = tf.Session(graph = graph1)

In [None]:
run_opts = tf.RunOptions(report_tensor_allocations_upon_oom = True)
saver = tf.train.Saver()

In [None]:
# training loop

# initialize global variables
session.run(tf.global_variables_initializer())
PRINT_ERROR = True

# empty lists
epoch_accuracy = []
epoch_loss = []

# Start training loop
for i in range(epochs):
  
  # empty lists to collect loss and acc for bucketd training sets
  batch_accuracy = []
  batch_loss = []
  
  # for bucket index
  # tqdm is a progress bar that does not look good in notebooks
  # but looks good in command line
  #for b_idx in tqdm(len(padded_train_Q_batches)): ### commented out for debugging
  
  for b_idx in range(len(padded_train_A_batches)): #<-- for debugging
  
    # convert the python arrays to numpy arrays
    X_batch = np.asarray(padded_train_Q_batches[b_idx])
    y_batch = np.asarray(padded_train_A_batches[b_idx])
    ### print(X_batch.shape, y_batch.shape)

    # create the feed_dict for the model creation steps
    feed_dict = {model.inputs:X_batch, 
         model.targets:y_batch, 
         model.keep_prob:keep_prob, 
         model.decoder_sequence_length:[len(y_batch[0])]*batch_size,
         model.encoder_sequence_length:[len(X_batch[0])]*batch_size}

    
    
    # a single step of batch gradient descent
    cost, _, preds = session.run([model.loss, model.opt, model.predictions], feed_dict=feed_dict, options = run_opts)

    # collect loss/acc for each batch
    batch_loss.append(cost)
    batch_accuracy.append(get_accuracy(y_batch, preds))

       
    #if(PRINT_ERROR == True and b_idx%100 == 0): 
    if(PRINT_ERROR == True and b_idx%5 == 0):
      print(" Bucket {}:".format(b_idx), 
          " | Loss: {}".format(np.mean(batch_loss)), 
          " | Accuracy: {}".format(np.mean(batch_accuracy)))

  epoch_loss.append(np.mean(batch_loss))
  epoch_accuracy.append(np.mean(batch_accuracy))
  
  # Print epoch and CV loss/accuracy:
  #if(PRINT_ERROR == True and i%100 == 0):
  if(PRINT_ERROR == True and i%1 == 0):
      val_losses = []
      val_acc = []
      
      for v_idx in range(2):#len(padded_val_Q_batches)):
        X_val = np.asarray(padded_val_Q_batches[v_idx])
        
        y_val = np.asarray(padded_val_A_batches[v_idx])
        ### print(X_val.shape, y_val.shape)
        
        # validation feed_dict
        val_feed_dict = {model.inputs:X_val, 
                         model.targets:y_val, 
                         model.keep_prob:1, 
                         model.decoder_sequence_length:[len(y_val[0])]*batch_size,
                         model.encoder_sequence_length:[len(X_val[0])]*batch_size}

        
        # run model loss and predictions, but not optimization -- scoring, not training!
        val_loss, val_preds = session.run([model.loss, model.predictions], feed_dict = val_feed_dict)
                
        val_losses.append(val_loss)
        val_acc.append(get_accuracy(y_val, val_preds))
        #val_acc.append(tf.metrics.accuracy(y_val, val_preds)) ##<-- causes attribute error in np.mean()
           
      print("EPOCH[{}]: {}/{}".format(i, i+1, epochs), 
          "\n --->| loss: {} val: {}".format(np.mean(epoch_loss), np.mean(val_losses)), 
          "\n --->| acc: {} val: {}".format(np.mean(epoch_accuracy), np.mean(val_acc)))

  saver.save(session, "checkpoint/chatbot_{}.ckpt".format(i))
    
session.close()

    

In [None]:
### Debugging Parameters
# Q's | BS | bmod | epochs
# OK: 50 | 10 | %2 | 5 
# NOT OK: 100 | 20 | %2 | 5
# error: OOM [299, 20, 69k] <--- an answer. Why is that 299? 2GB
#
# after fixed bug, should not have OOM error now
# 1000 | 64 | %5 | 10
# WORKS GREAT! (stopped early by force)
#EPOCH[5]: 6/10 
#--->| loss: 1.7298284769058228 val: 5.566536903381348 
#--->| acc: 0.693757924591258 val: 0.49968749999999995

## USER ASKS, BOT ANSWERS
#### TODO: 
- Prep model for single input prediction
- Hacked
- Train a model more deeply

In [22]:
def get_user_input():

    # be wary of relationship of array dimensions between this single string and the
    # batch of strings being used to train the model, mismatch will need to be
    # compensated for
    user_input = input()

    cleaned_user_input = clean_text(user_input)
    cleaned_user_input += ' <EOS>'
    clean_len = len(cleaned_user_input.split())
    
    MAX_SENTENCE_LENGTH = 25
    for i in range(0,MAX_SENTENCE_LENGTH):
        if i >= clean_len:
            #print(i, MAX_SENTENCE_LENGTH)
            cleaned_user_input += ' <PAD>'

    cleaned_user_input_list = cleaned_user_input.split()
    user_input_ints = word_into_int(cleaned_user_input_list, questionswords2int)

    # hack b/c only using 1 line.
    user_ints = [i[0] for i in user_input_ints]
    return user_ints

In [23]:
# here if needed during debugging
# session.close()

In [None]:
tf.reset_default_graph()
graph1 = tf.get_default_graph()

with graph1.as_default():

    model = Seq2Seq_Model(learning_rate, batch_size, encoding_embedding_size, 
                        decoding_embedding_size,
                        rnn_size, num_layers, len(word2count), 
                        questionswords2int, clip)

    saver = tf.train.Saver()

In [24]:
# Very rough putting together of the above pieces

def get_user_output(batch_size, answerswords2int):
    
    user_ints = get_user_input()
    #print(user_ints)
    
    while user_ints != 'quit':

        response = ''



        with tf.Session() as session:
            saver.restore(session, save_path='/PYTHON_VIRTUAL_ENVIRONMENTS/Chatbots/checkpoint/chatbot_5.ckpt')

            # not sure how to deliver a single entry for predictions
            # keep getting dim error, so I am putting batch size repetition of the user input
            # very hacky
            tiled_input = np.asarray([user_ints for i in range(batch_size)])

            feed_dict = {model.inputs:tiled_input,
                         model.targets:np.zeros(tiled_input.shape), 
                         model.keep_prob:1, 
                         model.decoder_sequence_length:[len(user_ints)]*batch_size,
                         model.encoder_sequence_length:[len(user_ints)]*batch_size}

            pred = session.run([model.predictions], feed_dict)
            #print(type(pred))

            invert_dict = inv_map = {v: k for k, v in answerswords2int.items()}

            text_pred = [invert_dict[p] for p in pred[0][0]]

            #print(text_pred)
            #print('len: {}'.format(len(text_pred)))

            text_pred = [w for w in text_pred if not w == '<PAD>']
            response = ' '.join(text_pred)
            #construct sentence
            print(response)
            
        user_ints = get_user_input()


    return response


In [None]:
# Really slow. Need to implement the model loading properly.
get_user_output(batch_size, answerswords2int)

Chad, I am testing you. What do you think about that?
INFO:tensorflow:Restoring parameters from /PYTHON_VIRTUAL_ENVIRONMENTS/Chatbots/checkpoint/chatbot_5.ckpt
<OUT> i am you to you are you to you think that


In [None]:
"""# instantiate the Seq2Seq model using graph1
# starts with resetting graph1 for debugging purposes

tf.reset_default_graph()
graph1 = tf.get_default_graph()

with graph1.as_default():
  
  model = Seq2Seq_Model(learning_rate, batch_size, encoding_embedding_size, 
                        decoding_embedding_size,
                        rnn_size, num_layers, len(word2count), 
                        questionswords2int, clip)
  
saver = tf.train.Saver()"""

In [None]:
"""tiled_input = np.asarray([user_ints for i in range(batch_size)])
print(tiled_input)
print(tiled_input.shape)"""

In [18]:
"""with tf.Session() as session:
    saver.restore(session, save_path='/PYTHON_VIRTUAL_ENVIRONMENTS/Chatbots/checkpoint/chatbot_5.ckpt')
    
    # not sure how to deliver a single entry for predictions
    # keep getting dim error, so I am putting batch size repetition of the user input
    # very hacky
    tiled_input = np.asarray([user_ints for i in range(batch_size)])
    
    feed_dict = {model.inputs:tiled_input,
                 model.targets:np.zeros(tiled_input.shape), 
                 model.keep_prob:1, 
                 model.decoder_sequence_length:[len(user_ints)]*batch_size,
                 model.encoder_sequence_length:[len(user_ints)]*batch_size}
    
    pred = session.run([model.predictions], feed_dict)
    #print(type(pred))
    
    text_pred = [map_invert_answers_to_ints[i] for i in pred[0][0]]
    
    print(text_pred)
    print('len: {}'.format(len(text_pred)))
    
    text_pred = [w for w in text_pred if not w == '<PAD>']
    response = ' '.join(text_pred)
    #construct sentence
    print(response)
    
    
    
    

# instantiate the Seq2Seq model using graph1
# starts with resetting graph1 for debugging purposes
    
    
    """

"with tf.Session() as session:\n    saver.restore(session, save_path='/PYTHON_VIRTUAL_ENVIRONMENTS/Chatbots/checkpoint/chatbot_5.ckpt')\n    \n    # not sure how to deliver a single entry for predictions\n    # keep getting dim error, so I am putting batch size repetition of the user input\n    # very hacky\n    tiled_input = np.asarray([user_ints for i in range(batch_size)])\n    \n    feed_dict = {model.inputs:tiled_input,\n                 model.targets:np.zeros(tiled_input.shape), \n                 model.keep_prob:1, \n                 model.decoder_sequence_length:[len(user_ints)]*batch_size,\n                 model.encoder_sequence_length:[len(user_ints)]*batch_size}\n    \n    pred = session.run([model.predictions], feed_dict)\n    #print(type(pred))\n    \n    text_pred = [map_invert_answers_to_ints[i] for i in pred[0][0]]\n    \n    print(text_pred)\n    print('len: {}'.format(len(text_pred)))\n    \n    text_pred = [w for w in text_pred if not w == '<PAD>']\n    response = '