## 0. ENV setup

Setup libraries and dependent data files 

#### Code

In [1]:
# Importing the required notebooks
#import import_ipynb
import data_utils

In [2]:
# Importing the required libraires
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from sklearn.model_selection import train_test_split

## 1. Variable setup

Setting up the variable values for the entire program

In [3]:
# Setting up the variable for preparing the model
only_start = False
max_vocab_length = 15000
max_length = 20
test_split = 0.2
random_seed = 42
data_subset = -1
embedding_output_dimension = 128
lstm_units = 400
stacked_lstm_units = 256
dropout_rate = 0.2
epoch = 50
sparse_loss_fun = 'sparse_categorical_crossentropy'
one_hot_loss_fuc = 'categorical_crossentropy'

## 2. Data preprocessing

preparing the datasets for model creation

In [4]:
# Load the data 
# Load the movie details meta data
movie_titles = data_utils.load_data(file_name='movie_titles_metadata.txt')

# Load the conversation meta data
movie_conversations = data_utils.load_data(file_name='movie_conversations.txt')

# Load the conversation lines
movie_lines = data_utils.load_data(file_name='movie_lines.txt')

Data read from ./data/movie_titles_metadata.txt and converted into 618 lines
Data read from ./data/movie_conversations.txt and converted into 83098 lines
Data read from ./data/movie_lines.txt and converted into 304714 lines


#### Variable check

In [5]:
len(movie_titles), len(movie_conversations), len(movie_lines)

(618, 83098, 304714)

#### Code

In [6]:
# Prepare dictionary for all data
movie_title_list, movie_conversation_list, movie_lines_list = data_utils.prepare_data(movie_titles=movie_titles, movie_conversations=movie_conversations, movie_lines=movie_lines)

#### Variable check

In [7]:
len(movie_title_list), len(movie_conversation_list), len(movie_lines_list)

(617, 83097, 304713)

In [8]:
#movie_title_list

In [9]:
#movie_conversation_list

In [10]:
#movie_lines_list

#### Code

In [11]:
# Prepare dataframe from  the dictionary
movie_title_df = data_utils.dataframe_from_dict(data_dict_list=movie_title_list)
movie_conversation_df = data_utils.dataframe_from_dict(data_dict_list=movie_conversation_list)
movie_lines_df = data_utils.dataframe_from_dict(data_dict_list=movie_lines_list)

#### Variable check

In [12]:
movie_title_df.head()

Unnamed: 0,movie_id,name,year,rating,genre
0,m0,10 things i hate about you,1999,6.9,"[comedy, romance]"
1,m1,1492: conquest of paradise,1992,6.2,"[adventure, biography, drama, history]"
2,m2,15 minutes,2001,6.1,"[action, crime, drama, thriller]"
3,m3,2001: a space odyssey,1968,8.4,"[adventure, mystery, sci-fi]"
4,m4,48 hrs.,1982,6.9,"[action, comedy, crime, drama, thriller]"


In [13]:
movie_conversation_df.head()

Unnamed: 0,speaker1,speaker2,movie_id,line_ids
0,u0,u2,m0,"[L194, L195, L196, L197]"
1,u0,u2,m0,"[L198, L199]"
2,u0,u2,m0,"[L200, L201, L202, L203]"
3,u0,u2,m0,"[L204, L205, L206]"
4,u0,u2,m0,"[L207, L208]"


In [14]:
movie_lines_df.head()

Unnamed: 0,line_id,speaker,movie_id,character,dialogue
0,L1045,u0,m0,BIANCA,They do not!
1,L1044,u2,m0,CAMERON,They do to!
2,L985,u0,m0,BIANCA,I hope so.
3,L984,u2,m0,CAMERON,She okay?
4,L925,u0,m0,BIANCA,Let's go.


#### Code

In [15]:
# Prepare genre dictionary
genre_dict = data_utils.get_genre_dict(movie_title_df=movie_title_df)

Genre dictionary prepared


#### Variable check

In [16]:
#genre_dict

#### Code

In [17]:
# Make dialogue dict for final dataset
dialogue_dict, conversation_data_df = data_utils.prepare_conversations(movie_lines_df=movie_lines_df, movie_conversation_df=movie_conversation_df, only_start=only_start)

Conversations prepared


#### Variable check

In [18]:
#dialogue_dict

In [19]:
conversation_data_df.head()

Unnamed: 0,movie_id,input,target
0,m0,Can we make this quick? Roxanne Korrine and A...,"Well, I thought we'd start with pronunciation,..."
1,m0,"Well, I thought we'd start with pronunciation,...",Not the hacking and gagging and spitting part....
2,m0,Not the hacking and gagging and spitting part....,Okay... then how 'bout we try out some French ...
3,m0,You're asking me out. That's so cute. What's ...,Forget it.
4,m0,"No, no, it's my fault -- we didn't have a prop...",Cameron.


#### Code

In [20]:
# Do cleaning of the text data
conversation_data_df['input'] = conversation_data_df['input'].apply(data_utils.clean_text)
conversation_data_df['target'] = conversation_data_df['target'].apply(data_utils.clean_text, add_tags=True)

#### Variable check

In [21]:
conversation_data_df.head()

Unnamed: 0,movie_id,input,target
0,m0,can we make this quick roxanne korrine and and...,START_ well i thought we would start with pron...
1,m0,well i thought we would start with pronunciati...,START_ not the hacking and gagging and spittin...
2,m0,not the hacking and gagging and spitting part ...,START_ okay then how bout we try out some fren...
3,m0,you are asking me out that is so cute what is ...,START_ forget it _END
4,m0,no no its my fault we did not have a proper in...,START_ cameron _END


In [22]:
conversation_data_df.describe()

Unnamed: 0,movie_id,input,target
count,221616,221616,221616
unique,617,187664,187033
top,m289,what,START_ what _END
freq,1192,1732,1601


#### Code

In [23]:
# Filtering data which are not in appropriate length
filtered_conversation_df = data_utils.filter_short_long(conversation_data_df=conversation_data_df, 
                                                        min_q_length=2, max_q_length=20, 
                                                        min_a_length=2, max_a_length=20)

33% filtered from original data


#### Variable check

In [24]:
filtered_conversation_df.head()

Unnamed: 0,movie_id,input,target
0,m0,well i thought we would start with pronunciati...,START_ not the hacking and gagging and spittin...
1,m0,not the hacking and gagging and spitting part ...,START_ okay then how bout we try out some fren...
2,m0,you are asking me out that is so cute what is ...,START_ forget it _END
3,m0,no no its my fault we did not have a proper in...,START_ cameron _END
4,m0,gosh if only we could find kat a boyfriend,START_ let me see what i can do _END


In [25]:
filtered_conversation_df.describe()

Unnamed: 0,movie_id,input,target
count,148207,148207,148207
unique,617,132259,121712
top,m299,i do not know,START_ what _END
freq,789,250,1311


## 3. Prepare Vectorizer

#### Code

In [26]:
# Prepare text vectorizer object
Vectorizer = TextVectorization(max_tokens=max_vocab_length,
                                output_mode="int",
                                output_sequence_length=max_length,
                                standardize=None)

In [27]:
# Adapt the text vectorizer for the dataset
Vectorizer.adapt(filtered_conversation_df['target'].to_numpy())

In [28]:
vocab_list = Vectorizer.get_vocabulary()

#### Variable check

In [29]:
#Vectorizer.get_vocabulary()

## 4. Prepare traing and test datasets from subset of data

#### Code

In [30]:
# Filter only the comedy movies data
comedy_movies_list = genre_dict['comedy']

comedy_movie_line_df = filtered_conversation_df[filtered_conversation_df['movie_id'].isin(comedy_movies_list)][:data_subset]

#### Variable check

In [31]:
comedy_movie_line_df.head()

Unnamed: 0,movie_id,input,target
0,m0,well i thought we would start with pronunciati...,START_ not the hacking and gagging and spittin...
1,m0,not the hacking and gagging and spitting part ...,START_ okay then how bout we try out some fren...
2,m0,you are asking me out that is so cute what is ...,START_ forget it _END
3,m0,no no its my fault we did not have a proper in...,START_ cameron _END
4,m0,gosh if only we could find kat a boyfriend,START_ let me see what i can do _END


In [32]:
comedy_movie_line_df.describe()

Unnamed: 0,movie_id,input,target
count,46217,46217,46217
unique,159,42805,39557
top,m299,i do not know,START_ what _END
freq,789,76,416


#### Code

In [33]:
# Prepare data for datasets also remove the sentences with most unknown tokens
training_data, testing_data = data_utils.split_vectorize_filter_unk(conversation_data_df=comedy_movie_line_df, Vectorizer=Vectorizer, test_split=test_split, seed=random_seed)

Training data points: 36973
Test data points: 9244
5% filtered from training data points
After unknown token filters training data points: 35071


#### Variable check

In [34]:
training_data.keys(), testing_data.keys()

(dict_keys(['input', 'target', 'input_vectors', 'target_vectors']),
 dict_keys(['input', 'target', 'input_vectors', 'target_vectors']))

In [35]:
for x in training_data.keys():
    print('training', x, np.array(training_data[x]).shape, type(training_data[x]))
    print('testing', x, np.array(testing_data[x]).shape, type(testing_data[x]))

training input (35071,) <class 'list'>
testing input (9244,) <class 'numpy.ndarray'>
training target (35071,) <class 'list'>
testing target (9244,) <class 'numpy.ndarray'>
training input_vectors (35071, 20) <class 'list'>
testing input_vectors (9244, 20) <class 'tensorflow.python.framework.ops.EagerTensor'>
training target_vectors (35071, 20) <class 'list'>
testing target_vectors (9244, 20) <class 'tensorflow.python.framework.ops.EagerTensor'>


In [36]:
sample_index = 0

In [37]:
Vectorizer.get_vocabulary()[188]

'those'

In [38]:
training_data['input'][sample_index], testing_data['target'][sample_index]

('robthanks for everything this is really happening i never thought',
 'START_ junior year _END')

In [39]:
training_data['input_vectors'][sample_index], testing_data['target_vectors'][sample_index]

(<tf.Tensor: shape=(20,), dtype=int64, numpy=
 array([  1,  28, 214,  29,  10,  99, 819,   5,  96, 132,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0], dtype=int64)>,
 <tf.Tensor: shape=(20,), dtype=int64, numpy=
 array([1370,  459,    2,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0], dtype=int64)>)

In [40]:
comedy_movie_line_df['target'].to_numpy()[0]

'START_ not the hacking and gagging and spitting part please _END'

In [41]:
len(training_data['target_vectors'])
    

35071

In [42]:
training_data.keys()

dict_keys(['input', 'target', 'input_vectors', 'target_vectors'])

In [43]:
tf.keras.utils.to_categorical(tf.random.uniform(shape=(2,3)), 6).shape

(2, 3, 6)

In [44]:
#tf.expand_dims(tf.constant(np.array(training_data['target_vectors'])), axis = -1)

#### Code

In [45]:
# Preparing datasets
train_inputs = np.array(training_data['input'])
train_targets = np.array(training_data['target'])
#train_vector_targets = tf.keras.utils.to_categorical(np.array(training_data['target_vectors']), max_vocab_length)
train_vector_targets = tf.expand_dims(tf.constant(np.array(training_data['target_vectors'])), axis=-1)

test_inputs = np.array(testing_data['input'])
test_targets = np.array(testing_data['target'])
#test_vector_targets = tf.keras.utils.to_categorical(np.array(testing_data['target_vectors']), max_vocab_length)
test_vector_targets = tf.expand_dims(tf.constant(np.array(testing_data['target_vectors'])), axis=-1)

#### Variable check

In [46]:
print(tf.__version__)

2.7.0


In [47]:
train_inputs.shape, train_targets.shape, train_vector_targets.shape

((35071,), (35071,), TensorShape([35071, 20, 1]))

In [48]:
test_inputs.shape, test_targets.shape, test_vector_targets.shape

((9244,), (9244,), TensorShape([9244, 20, 1]))

#### Code

In [49]:
# Preparing dataset for training and validation
train_data_dataset = tf.data.Dataset.from_tensor_slices((train_inputs, train_targets))
train_lables_dataset = tf.data.Dataset.from_tensor_slices(train_vector_targets)
train_dataset = tf.data.Dataset.zip((train_data_dataset, train_lables_dataset))
train_dataset = train_dataset.batch(128).prefetch(tf.data.AUTOTUNE)

test_data_dataset = tf.data.Dataset.from_tensor_slices((test_inputs, test_targets))
test_lables_dataset = tf.data.Dataset.from_tensor_slices(test_vector_targets)
test_dataset = tf.data.Dataset.zip((test_data_dataset, test_lables_dataset))
test_dataset = test_dataset.batch(128).prefetch(tf.data.AUTOTUNE)

## 5. Prepare traing and test datasets from all available data

In [50]:
# Prepare data for datasets also remove the sentences with most unknown tokens
training_data_all, testing_data_all = data_utils.split_vectorize_filter_unk(conversation_data_df=filtered_conversation_df, Vectorizer=Vectorizer, test_split=test_split, seed=random_seed)

Training data points: 118565
Test data points: 29642
4% filtered from training data points
After unknown token filters training data points: 112655


In [51]:
# Preparing datasets
train_inputs_all = np.array(training_data_all['input'])
train_targets_all = np.array(training_data_all['target'])
#train_vector_targets_all = tf.keras.utils.to_categorical(np.array(training_data_all['target_vectors']), max_vocab_length)
train_vector_targets_all = tf.expand_dims(tf.constant(np.array(training_data_all['target_vectors'])), axis=-1)

test_inputs_all = np.array(testing_data_all['input'])
test_targets_all = np.array(testing_data_all['target'])
#test_vector_targets_all = tf.keras.utils.to_categorical(np.array(testing_data_all['target_vectors']), max_vocab_length)
test_vector_targets_all = tf.expand_dims(tf.constant(np.array(testing_data_all['target_vectors'])), axis=-1)

In [52]:
# Preparing dataset for training and validation
train_data_dataset_all = tf.data.Dataset.from_tensor_slices((train_inputs_all, train_targets_all))
train_lables_dataset_all = tf.data.Dataset.from_tensor_slices(train_vector_targets_all)
train_dataset_all = tf.data.Dataset.zip((train_data_dataset_all, train_lables_dataset_all))
train_dataset_all = train_dataset_all.batch(128).prefetch(tf.data.AUTOTUNE)

test_data_dataset_all = tf.data.Dataset.from_tensor_slices((test_inputs_all, test_targets_all))
test_lables_dataset_all = tf.data.Dataset.from_tensor_slices(test_vector_targets_all)
test_dataset_all = tf.data.Dataset.zip((test_data_dataset_all, test_lables_dataset_all))
test_dataset_all = test_dataset_all.batch(128).prefetch(tf.data.AUTOTUNE)

## 6. Model creation

### Model 4 with dual layer lstm (all data)

* Input for this model is string (question, answer first word(START_))
* Output will be the probability of the next word

In [53]:
# Creatimg embedding objects for encoder and decoder models
EncoderEmbeddingLayerM4 = tf.keras.layers.Embedding(input_dim=max_vocab_length,
                                     output_dim=embedding_output_dimension,
                                     input_length=max_length,
                                     mask_zero=True,
                                     name='encoder_embedding_layer_model4')

DecoderEmbeddingLayerM4 = tf.keras.layers.Embedding(input_dim=max_vocab_length,
                                     output_dim=embedding_output_dimension,
                                     input_length=max_length,
                                     mask_zero=True,
                                     name='decoder_embedding_layer_model4')

In [54]:
# create encoder & decoder initial layers
EncoderInputM4 = tf.keras.layers.Input(shape=(1,), dtype=tf.string)
encoder_vectorM4 = Vectorizer(EncoderInputM4)

DecoderInputM4 = tf.keras.layers.Input(shape=(1,), dtype=tf.string)
decoder_vectorM4 = Vectorizer(DecoderInputM4)

In [55]:
# create encoder
encoder_embeddings_M4 = EncoderEmbeddingLayerM4(encoder_vectorM4)

EncoderLstmLayer1M4 = tf.keras.layers.LSTM(stacked_lstm_units, return_state=True, return_sequences=True, name='Encoder_LSTM_layer1_model4')
encoder_outputs_layer1_M4, state_h_layer1_M4, state_c_layer1_M4 = EncoderLstmLayer1M4(encoder_embeddings_M4)
encoder_states_layer1_M4 = [state_h_layer1_M4, state_c_layer1_M4]

EncoderLstmLayer2M4 = tf.keras.layers.LSTM(stacked_lstm_units, return_state=True, name='Encoder_LSTM2_layer2_model4')
encoder_outputs_layer2_M4, state_h_layer2_M4, state_c_layer2_M4 = EncoderLstmLayer2M4(encoder_outputs_layer1_M4)
encoder_states_layer2_M4 = [state_h_layer2_M4, state_c_layer2_M4]

In [56]:
# create decoder
decoder_embeddings_M4 = DecoderEmbeddingLayerM4(decoder_vectorM4)

DecoderLstmLayer1M4 = tf.keras.layers.LSTM(stacked_lstm_units, return_sequences=True, return_state=True, name='Decoder_LSTM2_layer1_model4')
decoder_outputs_layer1_M4, _, _ = DecoderLstmLayer1M4(decoder_embeddings_M4, initial_state=encoder_states_layer1_M4)

DecoderLstmLayer2M4 = tf.keras.layers.LSTM(stacked_lstm_units, return_sequences=True, return_state=True, name='Decoder_LSTM2_layer2_model4')
decoder_outputs_layer2_M4, _, _ = DecoderLstmLayer2M4(decoder_outputs_layer1_M4, initial_state=encoder_states_layer2_M4)

DecoderDenseLayerM4 = tf.keras.layers.Dense(max_vocab_length, activation='softmax', name='Decoder_Dense_layer_model4')
decoder_dense_outputs_M4 = DecoderDenseLayerM4(decoder_outputs_layer2_M4)

EncDecModel4 = tf.keras.Model([EncoderInputM4, DecoderInputM4], decoder_dense_outputs_M4)

In [57]:
# Compile model
EncDecModel4.compile(loss=sparse_loss_fun,
                    optimizer=tf.keras.optimizers.Adam(),
                    metrics=['accuracy'])

In [58]:
# decoder at test time
EncModel4 = tf.keras.Model(EncoderInputM4, [encoder_states_layer1_M4, encoder_states_layer2_M4])

DecoderStateInputHLayer1M4 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
DecoderStateInputCLayer1M4 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
decoder_states_inputs_layer1_M4 = [DecoderStateInputHLayer1M4, DecoderStateInputCLayer1M4]

DecoderStateInputHLayer2M4 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
DecoderStateInputCLayer2M4 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
decoder_states_inputs_layer2_M4 = [DecoderStateInputHLayer2M4, DecoderStateInputCLayer2M4]

decoder_vector_test_M4 = Vectorizer(DecoderInputM4)
dec_embedding_test_M4 = DecoderEmbeddingLayerM4(decoder_vector_test_M4)

decoder_lstm_outputs_test_layer1_M4, state_h_test_layer1_M4, state_c_test_layer1_M4 = DecoderLstmLayer1M4(dec_embedding_test_M4, initial_state=decoder_states_inputs_layer1_M4)
decoder_states_test_layer1_M4 = [state_h_test_layer1_M4, state_c_test_layer1_M4]

decoder_lstm_outputs_test_layer2_M4, state_h2_test_layer2, state_c2_test_layer2 = DecoderLstmLayer2M4(decoder_lstm_outputs_test_layer1_M4, initial_state=decoder_states_inputs_layer2_M4)
decoder_states_test_layer2_M4 = [state_h2_test_layer2, state_c2_test_layer2]

decoder_dense_outputs_test_M4 = DecoderDenseLayerM4(decoder_lstm_outputs_test_layer2_M4)

DecModel4 = tf.keras.Model(
    inputs = [DecoderInputM4, [decoder_states_inputs_layer1_M4, decoder_states_inputs_layer2_M4]],
    outputs = [decoder_dense_outputs_test_M4] + [decoder_states_test_layer1_M4, decoder_states_test_layer2_M4])

#### Variable check

In [59]:
EncDecModel4.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 input_1 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 text_vectorization (TextVector  (None, 20)          0           ['input_1[0][0]',                
 ization)                                                         'input_2[0][0]']                
                                                                                                  
 encoder_embedding_layer_model4  (None, 20, 128)     1920000     ['text_vectorization[0][0]'] 

#### Code

In [61]:
EncDecModel4.load_weights('model_weights/2LayerLstmAllData50epochs/EncDecModel4Weights')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x19eb7116490>

In [62]:
#model_history4 = EncDecModel4.fit(train_dataset_all,
#                epochs=epoch,
#                validation_data=test_dataset_all)

#### Variable check

In [63]:
train_inputs_all[:10]

array(['i was sent from the post in place of our social editor',
       'move it', 'but darling you ca not do that',
       'i am just yknow passing the time best i can till i can see you',
       'she is still a transvestite',
       'yeah well it was really all kind of a joke and a ripoff so uhso i dropped out',
       'yes master', 'what what is it all about',
       'i dunno but they look nice i rather like em', 'where is karen'],
      dtype='<U149')

In [64]:
train_targets_all[:10]

array(['START_ yes of course miss ramsey telephoned me well what would you like to have _END',
       'START_ where _END', 'START_ comrades good people of france _END',
       'START_ god i ca not believe i ever hated you _END',
       'START_ mm _END', 'START_ oh that is too bad _END',
       'START_ live ones where only the dead should be _END',
       'START_ huh do you mind if i sit down i am carrying quite a load here _END',
       'START_ top of the list of priorities how nice they look _END',
       'START_ in the can that the money _END'], dtype='<U135')

#### Code

In [65]:
def decoder_model4_test(input_word: str, states: list, vocab_list: list):
    decoder_vector_test_M4 = Vectorizer([input_word])
    dec_embedding_test_M4 = DecoderEmbeddingLayerM4(decoder_vector_test_M4)
    decoder_lstm_outputs_test_layer1_M4, state_h_l1_M4, state_c_l1_M4 = DecoderLstmLayer1M4(dec_embedding_test_M4, initial_state=states[0])
    decoder_lstm_outputs_test_layer2_M4, state_h_l2_M4, state_c_l2_M4 = DecoderLstmLayer2M4(decoder_lstm_outputs_test_layer1_M4, initial_state=states[1])
    decoder_dense_outputs_test_M4 = DecoderDenseLayerM4(decoder_lstm_outputs_test_layer2_M4)
    word_idx = tf.argmax(decoder_dense_outputs_test_M4[0, 0, :]).numpy()
    next_word = vocab_list[word_idx]
    states[0] = [tf.constant(state_h_l1_M4), tf.constant(state_c_l1_M4)]
    states[1] = [tf.constant(state_h_l2_M4), tf.constant(state_c_l2_M4)]
    return next_word, states

In [69]:
end_convo = True
while end_convo:
  human = input("Human: ")
  print('Human: ', human)
  if human == 'END CONVO':
    end_convo = False
  bot_response = data_utils.make_prediction(vocab_list=vocab_list, decoder_model_function=decoder_model4_test, encoder_model=EncModel4, input_text=human, clean_text=data_utils.clean_text, multi_layer=True)
  print("KATTA:", bot_response)

Human:  Hi
KATTA: hello 
Human:  How are you
KATTA: all right 
Human:  what are you doing
KATTA: i am thinking 
Human:  good
KATTA: well i got something to do with it 
Human:  do you need any help
KATTA: i do not think i am 
Human:  are you a robot
KATTA: no 
Human:  are you a human
KATTA: yes 
Human:  do you know me
KATTA: yes 
Human:  where are you
KATTA: i am here 
Human:  who are you
KATTA: gondorff asked me to meet you 
Human:  do you want to say anything
KATTA: no 
Human:  okay great
KATTA: you are not 
Human:  do you like me
KATTA: it was not me 
Human:  good night
KATTA: good night 
Human:  what's your name
KATTA: [UNK] morton 
Human:  okay good bye
KATTA: hey 
Human:  what
KATTA: do not let me go 
Human:  goodbye
KATTA: alright 
Human:  END CONVO
KATTA: threatening me 


In [None]:
"""
Human:  Hi
KATTA: hello 
Human:  How are you
KATTA: all right 
Human:  what are you doing
KATTA: i am thinking 
Human:  good
KATTA: well i got something to do with it 
Human:  do you need any help
KATTA: i do not think i am 
Human:  are you a robot
KATTA: no 
Human:  are you a human
KATTA: yes 
Human:  do you know me
KATTA: yes 
Human:  where are you
KATTA: i am here 
Human:  who are you
KATTA: gondorff asked me to meet you 
Human:  do you want to say anything
KATTA: no 
Human:  okay great
KATTA: you are not 
Human:  do you like me
KATTA: it was not me 
Human:  good night
KATTA: good night 
Human:  what's your name
KATTA: [UNK] morton 
Human:  okay good bye
KATTA: hey 
Human:  what
KATTA: do not let me go 
Human:  goodbye
KATTA: alright 
Human:  END CONVO
KATTA: threatening me 
"""

In [None]:
#EncDecModel.save_weights('model_weights/2LayerLstmAllData50epochs/EncDecModel4Weights')