# KATTA bot local

This bot code is a replica of the local code

## ENV setup

Setup libraries and dependent data files 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Importing the required libraries for data preparation
import pandas as pd
import numpy as np
import re
import string
import nltk
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

## Setting up data utilities

Creating the required functions for preparing the data

In [None]:
def load_data(file_name: str, data_location: str = './data/'):
    """
    This function will be loading the data from the filenames which are given as input and return the list of lines from the data file
    input: file_name -> str, data_location -> str = ./data/ by default
    output: lines -> list data lines list from the input file
    """
    def fix_dir(dir_name: str):
        if dir_name[-1] == '/':
            return dir_name
        return dir_name + '/'
    
    data_file = fix_dir(data_location) + file_name
    with open(data_file, 'r', encoding='utf-8', errors='ignore') as dfile:
        lines = dfile.read().split('\n')
    
    print(f'Data read from {data_file} and converted into {len(lines)} lines')

    return lines

In [None]:
def prepare_data(movie_titles: list, movie_conversations: list, movie_lines: list):
    """
    This function prepares data dictionary for each files it outputs list of dictionaries for all the major datasets 
    inputs: movie_titles -> list, movie_conversations -> list, movie_lines -> list
    outputs: movie_title_list -> list(dict), movie_conversation_list -> list(dict), movie_lines_list -> list(dict)
    """
    # Prepare dictionary for movie meta data
    movie_title_list = []
    for line in movie_titles:
        if not line:
            continue # for identifying and ignoring empty lines
        movie_title_info = {}
        movie_info = line.split(' +++$+++ ')
        movie_title_info['movie_id'] = movie_info[0].strip()
        movie_title_info['name'] = movie_info[1].strip()
        movie_title_info['year'] = movie_info[2].strip()
        movie_title_info['rating'] = movie_info[3].strip()
        movie_title_info['genre'] = movie_info[-1][2:-2].strip().split("', '") # this is for splitting the genres from ['comedy', 'romance'] to a list
        movie_title_list.append(movie_title_info)

    # Prepare dictionary for movie convo meta data
    movie_conversation_list = []
    for line in movie_conversations:
        if not line:
            continue # for identifying and ignoring empty lines
        movie_conversation_info = {}
        conversation_info = line.split(' +++$+++ ')
        movie_conversation_info['speaker1'] = conversation_info[0].strip()
        movie_conversation_info['speaker2'] = conversation_info[1].strip()
        movie_conversation_info['movie_id'] = conversation_info[2].strip()
        movie_conversation_info['line_ids'] = conversation_info[-1][2:-2].strip().split("', '")# this is for splitting the conversation info from ['L198', 'L199'] to a list
        movie_conversation_list.append(movie_conversation_info)

    # Prepare dictionary for movie dialogues
    movie_lines_list = []
    for line in movie_lines:
        if not line:
            continue # for identifying and ignoring empty lines
        movie_line_info = {}
        line_info = line.split(' +++$+++ ')
        movie_line_info['line_id'] = line_info[0].strip()
        movie_line_info['speaker'] = line_info[1].strip()
        movie_line_info['movie_id'] = line_info[2].strip()
        movie_line_info['character'] = line_info[3].strip()
        movie_line_info['dialogue'] = line_info[-1].strip()
        movie_lines_list.append(movie_line_info)

    return movie_title_list, movie_conversation_list, movie_lines_list

In [None]:
def dataframe_from_dict(data_dict_list: list):
    """
    This function converts the list of dictionaries into pandas dataframe
    input: data_dict_list -> list(dict)
    output: pandas dataframe prepared from the list
    """
    return pd.DataFrame.from_dict(data_dict_list)

In [None]:
def get_genre_dict(movie_title_df: pd.DataFrame):
    """
    This line takes the input as movie titles pandas dataframe and prepares the genre dict
    input: movie_title_df -> pandas.DataFrame
    output: genre_dict -> dict the output will have the dictionary with keys as genre and values as list of movies from that genre
    """
    # Get the list of available genres from the whole dataset 
    genres = movie_title_df['genre'].to_numpy()
    genre_set = set()
    for genre_list in genres:
        for genre in genre_list:
            if genre:
                genre_set.add(genre)
    
    # Checking the count of movies in each genres and storing the movies with respect to their genres in the dictionary
    genre_dict = {}
    for genre_name in genre_set:
        genre_dict[genre_name] = []
    for movie, genre_list in movie_title_df[['movie_id', 'genre']].to_numpy():
        for genre in genre_list:
            if genre:
              genre_dict[genre].append(movie)
    
    print('Genre dictionary prepared')

    return genre_dict

In [None]:
def prepare_conversations(movie_lines_df: pd.DataFrame, movie_conversation_df: pd.DataFrame, only_start: bool = False):
    """
    This line takes the input as movie lines pandas dataframe and prepares the genre dict
    input: movie_lines_df -> pandas.DataFrame, movie_conversation_df -> pandas.DataFrame
    output: dialogue_dict -> dict dictionary with line_id as key and respective line as value, conversation_data_df -> pandas.DataFrame will have question and answers dataframe
    """
    # Make conversation line dictionary for preparing the final dataset
    dialogue_ids = movie_lines_df['line_id'].to_numpy()
    dialogue_lines = movie_lines_df['dialogue'].to_numpy()
    dialogue_dict = {}
    for dialogue_id, dialogue_line in zip(dialogue_ids, dialogue_lines):
        dialogue_dict[dialogue_id] = dialogue_line

    # prepare final/actual dictionary for creating the chat bot
    # This dictionary will have the conversation wise data.
    conversation_data_dict = {}
    conversation_data_dict['movie_id'] = []
    conversation_data_dict['input'] = []
    conversation_data_dict['target'] = []
    for movie_id, convo_list in movie_conversation_df[['movie_id', 'line_ids']].to_numpy():
        for convos in range(len(convo_list)-1):
            conversation_data_dict['movie_id'].append(movie_id)
            conversation_data_dict['input'].append(dialogue_dict[convo_list[convos]])
            conversation_data_dict['target'].append(dialogue_dict[convo_list[convos+1]])
            if only_start:
              break

    # Prepare dataframe from the dictionary for better access
    conversation_data_df = pd.DataFrame.from_dict(conversation_data_dict)
    print('Conversations prepared')
    
    return dialogue_dict, conversation_data_df

In [None]:
# create a function for data cleaning
def clean_text(input_text: str, add_tags: bool = False, start_tag: str = 'START_ ', end_tag: str = ' _END', 
                remove_punc: bool = True, remove_symbols: str = '[^0-9a-z #+_]', ignore_words: list = [], 
                remove_numbers: bool = True, replace_word_from: list = [], replace_word_to: list = []):
    """
    Input: input_text (string), add_tags (optional - bool), start_tag (optional - string), end_tag (optional - string), 
            remove_punc (optional - bool), remove_symbols (optional - string), ignore_words (optional - list), remove_numbers (optional - bool),
            replace_word_from (optional - bool), replace_word_to (optional - bool)
    Output: cleaned text (string)
    description:
        This function will clean the input text given by removong the bad symbols, numbers, punctuations, extra spaces... and return back the cleaned text
        if the add_tags value is True (it's False by default) it will add the start tag and end tags at the start and end of the text
        we can also define the start_tag and end_tag values
    """
    def replace_common_words(text: str):
        text = text.lower()
        text = re.sub("i'm", "i am", text)
        text = re.sub("he's", "he is", text)
        text = re.sub("she's", "she is", text)
        text = re.sub("that's", "that is", text)
        text = re.sub("what's", "what is", text)
        text = re.sub("where's", "where is", text)
        text = re.sub("'ll", " will", text)
        text = re.sub("'ve", " have", text)
        text = re.sub("'re", " are", text)
        text = re.sub("'d", " would", text)
        text = re.sub("n't", " not", text)
        return text

    def remove_punctuation(text: str):
        punctuation_list = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in punctuation_list)

    def remove_bad_symbols(text: str, symbols: str):
        bad_symbols = re.compile(symbols)
        return bad_symbols.sub(' ', text)

    def remove_extra_space(text: str):
        extra_space = re.compile(' +')
        return extra_space.sub(' ', text)

    def remove_ignore_words(text: str, ignore_words_list: list):
        for word in ignore_words_list:
            text = text.replace(word, " ")
        return text
    
    def remove_digits(text:str):
        remove_digit = str.maketrans('', '', string.digits)
        return text.translate(remove_digit)

    def replace_words(text: str, replace_word_list_from: list, replace_word_list_to: list):
        for from_word, to_word in zip(replace_word_list_from, replace_word_list_to):
            text = text.replace(str(from_word).lower(), str(to_word).lower())
        return text

    def add_start_end_tags(text: str):
        return start_tag + text + end_tag

    input_text = input_text.lower()
    input_text = replace_common_words(input_text)
    input_text = replace_words(input_text, replace_word_from, replace_word_to) if replace_word_from and (len(replace_word_from) == len(replace_word_to)) else input_text
    input_text = remove_ignore_words(input_text, ignore_words) if ignore_words else input_text
    input_text = remove_digits(input_text) if remove_numbers else input_text
    input_text = remove_punctuation(input_text) if remove_punc else input_text
    input_text = remove_bad_symbols(input_text, remove_symbols) if remove_symbols else input_text
    input_text = add_start_end_tags(input_text) if add_tags else input_text
    input_text = remove_extra_space(input_text)
    #print('Data cleaning done')
    
    return input_text.strip()

In [None]:
def filter_short_long(conversation_data_df: pd.DataFrame, min_q_length: int = 2, max_q_length: int = 25, min_a_length: int = 2, max_a_length: int = 25):
    """
    This function takes list of input dialogues and list of target dialogues and returns only the dialogues with given length
    input: conversation_data_df -> pandas.DataFrame
    output: filtered_conversation_df -> pandas.DataFrame
    """
    movie_id_seq, qseq, aseq = conversation_data_df['movie_id'].to_numpy(), conversation_data_df['input'].to_numpy(), conversation_data_df['target'].to_numpy()
    conversation_data_dict = {}
    conversation_data_dict['movie_id'], conversation_data_dict['input'], conversation_data_dict['target'] = [], [], []
    raw_data_len = len(movie_id_seq)

    for i in range(raw_data_len):
        qlen, alen = len(qseq[i].split(' ')), len(aseq[i].split(' '))
        if qlen >= min_q_length and qlen <= max_q_length:
            if alen >= min_a_length and alen <= max_a_length:
                conversation_data_dict['movie_id'].append(movie_id_seq[i])
                conversation_data_dict['input'].append(qseq[i])
                conversation_data_dict['target'].append(aseq[i])
    
    filt_data_len = len(conversation_data_dict['movie_id'])
    filtered = int((raw_data_len - filt_data_len)*100/raw_data_len)
    print(f'{filtered}% filtered from original data')

    return pd.DataFrame.from_dict(conversation_data_dict)

In [None]:
def split_vectorize_filter_unk(conversation_data_df: pd.DataFrame, Vectorizer: TextVectorization, unk: str = '[UNK]', test_split: float = 0.2, seed: int = 42):
    """
    This function takes list of input dialogues and list of target dialogues and returns only the dialogues with less unknown tokens
    input: conversation_data_df -> pandas.DataFrame, vectorizer object
    output: training_data -> dict data needed for training, testing_data -> data needed for testing
    """
    def remove_start_tag(input_with_start_tag: str):
        return ' '.join(input_with_start_tag.split()[1:])

    movie_id_seq, qseq, aseq = conversation_data_df['movie_id'].to_numpy(), conversation_data_df['input'].to_numpy(), conversation_data_df['target'].to_numpy()
    training_data = {}
    testing_data = {}
    training_data['input'], training_data['target'], training_data['input_vectors'], training_data['target_vectors'] = [], [], [], []
    testing_data['input'], testing_data['target'], testing_data['input_vectors'], testing_data['target_vectors'] = [], [], [], []

    raw_data_len = len(movie_id_seq)
    vocab_list = Vectorizer.get_vocabulary()
    unk_index = vocab_list.index(unk)

    train_inputs, test_inputs, train_targets, test_targets = train_test_split(qseq, aseq, test_size=test_split, random_state=seed)
    
    start_tag_removed_train_targets = [remove_start_tag(target) for target in train_targets]
    start_tag_removed_test_targets = [remove_start_tag(target) for target in test_targets]

    train_vectorized_inputs, train_vectorized_targets = Vectorizer(train_inputs), Vectorizer(start_tag_removed_train_targets)
    test_vectorized_inputs, test_vectorized_targets = Vectorizer(test_inputs), Vectorizer(start_tag_removed_test_targets)

    for idx, (input_tensor, target_tensor) in enumerate(zip(train_vectorized_inputs, train_vectorized_targets)):
        input_list = list(input_tensor.numpy())
        target_list = list(target_tensor.numpy())
        unknown_count_q = input_list.count(unk_index)
        unknown_count_a = target_list.count(unk_index)
        if unknown_count_a <=1 :
            if unknown_count_q > 0:
                temp_list = list(filter(lambda num: num != 0, input_list)) # This list will have the inputs without zeros padded
                if unknown_count_q/len(temp_list) > 0.2:
                    continue
            training_data['input'].append(train_inputs[idx])
            training_data['target'].append(train_targets[idx])
            training_data['input_vectors'].append(input_tensor)
            training_data['target_vectors'].append(target_tensor)
        
    testing_data['input'], testing_data['target'] = test_inputs, test_targets 
    testing_data['input_vectors'], testing_data['target_vectors'] = test_vectorized_inputs, test_vectorized_targets

    print(f'Training data points: {len(train_inputs)}')
    print(f'Test data points: {len(test_inputs)}')
    filt_data_len = len(training_data['input'])
    filtered = int((len(train_inputs) - filt_data_len)*100/len(train_inputs))
    print(f'{filtered}% filtered from training data points')
    print(f'After unknown token filters training data points: {filt_data_len}')

    return training_data, testing_data

In [None]:
def make_prediction(vocab_list, decoder_model_function, encoder_model: Model, input_text: str = 'hi', next_word: str = 'START_', clean_text = clean_text, max_length: int = 19, multi_layer: bool = True):
    """
    This function takes inputs as follows and returns the model response.
    input: vocab_list -> this is the list of voicabulary used in the model,
            model_function -> this is a reference functions in wich the decoder model is defined, 
            encoder_model -> this is the encoder model which need to be used for input text encoding, 
            input_text -> this is the input phrase for which the model create the response the default value if 'hi', 
            next_word -> this is the trigger or start word for the decoder model, the default value is 'START_',
            clean_text -> this is a referance of the function which need to be used for cleaning the text the default is 'clean_text' function written or imported in this python file,
            max_length -> max length of the bot response defaults to 19
            multi_layer -> if the model single layer then this has to be False by default it is True
    output: bot_response -> this is the predicted response of the bot
    """
    states_list = []
    input_text = clean_text(input_text)
    if multi_layer:
        encoder_output = encoder_model.predict([input_text])
    else:
        encoder_output = [encoder_model.predict([input_text])]
    for states in encoder_output:
        states_list.append([tf.constant(states[0]), tf.constant(states[1])])
    stop_condition = True
    bot_response = ""
    states = states_list
    while stop_condition:
        next_word, states = decoder_model_function(next_word, states, vocab_list)
        if next_word == '_END' or len(bot_response.split()) > max_length:
            break
        bot_response += next_word + ' '
    return bot_response

## Variable setup

Setting up the variable values for the entire program

In [None]:
# Setting up the variable for preparing the model
only_start = False
max_vocab_length = 15000
max_length = 20
test_split = 0.2
random_seed = 42
data_subset = -1
subset = 'comedy'
embedding_output_dimension = 128
lstm_units = 400
stacked_lstm_units = 256
dropout_rate = 0.2
epoch = 50
sparse_loss_fun = 'sparse_categorical_crossentropy'
one_hot_loss_fuc = 'categorical_crossentropy'

## Data preprocessing

preparing the datasets for model creation

In [None]:
# Load the data 
# Load the movie details meta data
movie_titles = load_data(file_name='movie_titles_metadata.txt', data_location='/content/drive/MyDrive/Chatbot/data/')

# Load the conversation meta data
movie_conversations = load_data(file_name='movie_conversations.txt', data_location='/content/drive/MyDrive/Chatbot/data/')

# Load the conversation lines
movie_lines = load_data(file_name='movie_lines.txt', data_location='/content/drive/MyDrive/Chatbot/data/')

Data read from /content/drive/MyDrive/Chatbot/data/movie_titles_metadata.txt and converted into 618 lines
Data read from /content/drive/MyDrive/Chatbot/data/movie_conversations.txt and converted into 83098 lines
Data read from /content/drive/MyDrive/Chatbot/data/movie_lines.txt and converted into 304714 lines


In [None]:
# Prepare dictionary for all data
movie_title_list, movie_conversation_list, movie_lines_list = prepare_data(movie_titles=movie_titles, movie_conversations=movie_conversations, movie_lines=movie_lines)

In [None]:
# Prepare dataframe from  the dictionary
movie_title_df = dataframe_from_dict(data_dict_list=movie_title_list)
movie_conversation_df = dataframe_from_dict(data_dict_list=movie_conversation_list)
movie_lines_df = dataframe_from_dict(data_dict_list=movie_lines_list)

In [None]:
# Prepare genre dictionary
genre_dict = get_genre_dict(movie_title_df=movie_title_df)

Genre dictionary prepared


In [None]:
# Make dialogue dict for final dataset
dialogue_dict, conversation_data_df = prepare_conversations(movie_lines_df=movie_lines_df, movie_conversation_df=movie_conversation_df, only_start=only_start)

Conversations prepared


In [None]:
# Do cleaning of the text data
conversation_data_df['input'] = conversation_data_df['input'].apply(clean_text)
conversation_data_df['target'] = conversation_data_df['target'].apply(clean_text, add_tags=True)

In [None]:
# Filtering data which are not in appropriate length
filtered_conversation_df = filter_short_long(conversation_data_df=conversation_data_df, 
                                                        min_q_length=2, max_q_length=20, 
                                                        min_a_length=2, max_a_length=20)

33% filtered from original data


## Prepare Vectorizer

In [None]:
# Prepare text vectorizer object
Vectorizer = TextVectorization(max_tokens=max_vocab_length,
                                output_mode="int",
                                output_sequence_length=max_length,
                                standardize=None)

In [None]:
# Adapt the text vectorizer for the dataset
Vectorizer.adapt(filtered_conversation_df['target'].to_numpy())

In [None]:
vocab_list = Vectorizer.get_vocabulary()

In [None]:
len(vocab_list)

15000

## Prepare traing and test datasets from subset of data

In [None]:
# Filter only the comedy movies data
subset_movies_list = genre_dict[subset]

subset_movie_line_df = filtered_conversation_df[filtered_conversation_df['movie_id'].isin(subset_movies_list)][:data_subset]

In [None]:
# Prepare data for datasets also remove the sentences with most unknown tokens
training_data, testing_data = split_vectorize_filter_unk(conversation_data_df=subset_movie_line_df, Vectorizer=Vectorizer, test_split=test_split, seed=random_seed)

Training data points: 36973
Test data points: 9244
5% filtered from training data points
After unknown token filters training data points: 35071


In [None]:
# Preparing datasets
train_inputs = np.array(training_data['input'])
train_targets = np.array(training_data['target'])
#train_vector_targets = tf.keras.utils.to_categorical(np.array(training_data['target_vectors']), max_vocab_length)
train_vector_targets = tf.expand_dims(tf.constant(np.array(training_data['target_vectors'])), axis=-1)

test_inputs = np.array(testing_data['input'])
test_targets = np.array(testing_data['target'])
#test_vector_targets = tf.keras.utils.to_categorical(np.array(testing_data['target_vectors']), max_vocab_length)
test_vector_targets = tf.expand_dims(tf.constant(np.array(testing_data['target_vectors'])), axis=-1)

In [None]:
# Preparing dataset for training and validation
train_data_dataset = tf.data.Dataset.from_tensor_slices((train_inputs, train_targets))
train_lables_dataset = tf.data.Dataset.from_tensor_slices(train_vector_targets)
train_dataset = tf.data.Dataset.zip((train_data_dataset, train_lables_dataset))
train_dataset = train_dataset.batch(128).prefetch(tf.data.AUTOTUNE)

test_data_dataset = tf.data.Dataset.from_tensor_slices((test_inputs, test_targets))
test_lables_dataset = tf.data.Dataset.from_tensor_slices(test_vector_targets)
test_dataset = tf.data.Dataset.zip((test_data_dataset, test_lables_dataset))
test_dataset = test_dataset.batch(128).prefetch(tf.data.AUTOTUNE)

## Prepare traing and test datasets from all available data

In [None]:
# Prepare data for datasets also remove the sentences with most unknown tokens
training_data_all, testing_data_all = split_vectorize_filter_unk(conversation_data_df=filtered_conversation_df, Vectorizer=Vectorizer, test_split=test_split, seed=random_seed)

Training data points: 118565
Test data points: 29642
4% filtered from training data points
After unknown token filters training data points: 112655


In [None]:
# Preparing datasets
train_inputs_all = np.array(training_data_all['input'])
train_targets_all = np.array(training_data_all['target'])
#train_vector_targets_all = tf.keras.utils.to_categorical(np.array(training_data_all['target_vectors']), max_vocab_length)
train_vector_targets_all = tf.expand_dims(tf.constant(np.array(training_data_all['target_vectors'])), axis=-1)

test_inputs_all = np.array(testing_data_all['input'])
test_targets_all = np.array(testing_data_all['target'])
#test_vector_targets_all = tf.keras.utils.to_categorical(np.array(testing_data_all['target_vectors']), max_vocab_length)
test_vector_targets_all = tf.expand_dims(tf.constant(np.array(testing_data_all['target_vectors'])), axis=-1)

In [None]:
# Preparing dataset for training and validation
train_data_dataset_all = tf.data.Dataset.from_tensor_slices((train_inputs_all, train_targets_all))
train_lables_dataset_all = tf.data.Dataset.from_tensor_slices(train_vector_targets_all)
train_dataset_all = tf.data.Dataset.zip((train_data_dataset_all, train_lables_dataset_all))
train_dataset_all = train_dataset_all.batch(128).prefetch(tf.data.AUTOTUNE)

test_data_dataset_all = tf.data.Dataset.from_tensor_slices((test_inputs_all, test_targets_all))
test_lables_dataset_all = tf.data.Dataset.from_tensor_slices(test_vector_targets_all)
test_dataset_all = tf.data.Dataset.zip((test_data_dataset_all, test_lables_dataset_all))
test_dataset_all = test_dataset_all.batch(128).prefetch(tf.data.AUTOTUNE)

### Stop execution

In [None]:
fail_here

NameError: ignored

## Model creation

### Model 1 with single layer LSTM

* Input for this model is string (question, answer first word(START_))
* Output will be the probability of the next word
* This model has single layer of LSTM Units

In [None]:
# Creating embedding object for encoder and decoder models
EncoderEmbeddingLayer = layers.Embedding(input_dim=max_vocab_length,
                                output_dim=embedding_output_dimension, # 128
                                input_length=max_length,
                                mask_zero=True,
                                name='encoder_embedding_layer')

DecoderEmbeddingLayer = layers.Embedding(input_dim=max_vocab_length,
                                output_dim=embedding_output_dimension, # 128
                                input_length=max_length,
                                mask_zero=True,
                                name='decoder_embedding_layer')

In [None]:
# create encoder & decoder initial layers
EncoderInput = layers.Input(shape=(1,), dtype=tf.string)
encoder_vector = Vectorizer(EncoderInput)

DecoderInput = layers.Input(shape=(1,), dtype=tf.string)
decoder_vector = Vectorizer(DecoderInput)

In [None]:
# Create encoder
encoder_embeddings = EncoderEmbeddingLayer(encoder_vector)
EncoderLstmLayer = layers.LSTM(lstm_units, return_state=True, name='Encoder_LSTM')
encoder_lstm_outputs, state_h, state_c = EncoderLstmLayer(encoder_embeddings)
encoder_states = [state_h, state_c]

In [None]:
# Create Decoder
decoder_embeddings = DecoderEmbeddingLayer(decoder_vector)
DecoderLstmLayer = layers.LSTM(lstm_units, return_sequences=True, return_state=True, name='Decoder_LSTM')
decoder_lstm_outputs, _, _ = DecoderLstmLayer(decoder_embeddings, initial_state=encoder_states)
DecoderDenseLayer = layers.Dense(max_vocab_length, activation='softmax', name='Decoder_dense')
decoder_dense_outputs = DecoderDenseLayer(decoder_lstm_outputs)

EncDecModel = Model([EncoderInput, DecoderInput], decoder_dense_outputs)

In [None]:
# Compile model
EncDecModel.compile(loss=sparse_loss_fun,
                    optimizer=tf.keras.optimizers.Adam(),
                    metrics=['accuracy'])

In [None]:
# decoder at test time
EncModel = tf.keras.Model(EncoderInput, encoder_states)

DecoderStateInputH = layers.Input(shape=(lstm_units,))
DecoderStateInputC = layers.Input(shape=(lstm_units,))
decoder_states_inputs = [DecoderStateInputH, DecoderStateInputC]

decoder_vector_test = Vectorizer(DecoderInput)
dec_embedding_test = DecoderEmbeddingLayer(decoder_vector_test)

decoder_lstm_outputs_test, state_h_test, state_c_test = DecoderLstmLayer(dec_embedding_test, initial_state=decoder_states_inputs)
decoder_states_test = [state_h_test, state_c_test]
decoder_dense_outputs_test = DecoderDenseLayer(decoder_lstm_outputs_test)

DecModel = Model(
    inputs = [DecoderInput, decoder_states_inputs],
    outputs = [decoder_dense_outputs_test] + decoder_states_test)

In [None]:
# Loading the model weights
EncDecModel.load_weights('/content/drive/MyDrive/Chatbot/model_weights/1LayerLstmComedy50epochs/EncDecModel1Weights')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f654b2576d0>

In [None]:
#model_history = EncDecModel.fit(train_dataset,
#                epochs=epoch,
#                validation_data=test_dataset)

In [None]:
#EncDecModel.evaluate(test_dataset)



[2.5522470474243164, 0.28226396441459656]

In [None]:
def decoder_model_test(input_word: str, states: list, vocab_list: list):
    decoder_vector_test = Vectorizer([input_word])
    dec_embedding_test = DecoderEmbeddingLayer(decoder_vector_test)
    decoder_lstm_outputs_test, state_h_test, state_c_test = DecoderLstmLayer(dec_embedding_test, initial_state=states[0])
    decoder_dense_output_test = DecoderDenseLayer(decoder_lstm_outputs_test)
    word_idx = tf.argmax(decoder_dense_output_test[0, 0, :]).numpy()
    next_word = vocab_list[word_idx]
    states[0] = [tf.constant(state_h_test), tf.constant(state_c_test)]
    return next_word, states

In [None]:
human = 'hello'
states = [EncModel.predict([human])]
next_word = 'START_'
stop_condition = True
bot_response = ""
states[0] = [tf.constant(states[0][0]), tf.constant(states[0][1])]
while stop_condition:
    next_word, states = decoder_model_test(next_word, states, vocab_list)
    bot_response += next_word + ' '
    if next_word == '_END' or len(bot_response.split()) > max_length:
        stop_condition = False
print(bot_response)

hello why are you _END 


In [None]:
end_convo = False
while not end_convo:
  # Getting the input from user
  human = input("Human: ")
  if human == 'END CONVO':
    end_convo = True
  # Encoding the input
  stat = EncModel.predict([human])
  next_word = 'START_'
  stop_condition = True
  bot_response = ""
  stat = [tf.constant(stat[0]), tf.constant(stat[1])]
  while stop_condition:
      # Decoder model operations starts here
      decoder_vector_test = Vectorizer([next_word])
      dec_embedding_test = DecoderEmbeddingLayer(decoder_vector_test)
      decoder_lstm_outputs_test, state_h_test, state_c_test = DecoderLstmLayer(dec_embedding_test, initial_state=stat)
      decoder_dense_output_test = DecoderDenseLayer(decoder_lstm_outputs_test)
      # Decoder model operations end here
      word_idx = tf.argmax(decoder_dense_output_test[0, 0, :]).numpy()
      next_word = vocab_list[word_idx]
      bot_response += next_word + ' '
      if next_word == '_END' or len(bot_response.split()) > max_length:
          stop_condition = False
      stat = [state_h_test, state_c_test]
  print("KATTA:", bot_response)

Human: hi
KATTA: hi _END 
Human: what
KATTA: one while you are _END 
Human: who are you
KATTA: i am not lefthanded _END 
Human: are you a robot
KATTA: oh no no no no no no no no no no no just watching the historical records yes _END 
Human: are you a human
KATTA: if you are not sure i would rather not _END 
Human: why are you here
KATTA: i do not know _END 
Human: i do not know i am sorry
KATTA: do not puss out _END 
Human: i like you
KATTA: i am not _END 
Human: END CONVO
KATTA: [UNK] you are a great gal you would do it you would not understand that _END 


In [None]:
end_convo = True
while end_convo:
  human = input("Human: ")
  if human == 'END CONVO':
    end_convo = False
  bot_response = make_prediction(vocab_list=vocab_list, decoder_model_function=decoder_model_test, encoder_model=EncModel, input_text=human, clean_text=clean_text, multi_layer=False)
  print("KATTA:", bot_response)

Human: hi
KATTA: hi 
Human: how are you buddy
KATTA: how would you know 
Human: are you a robot
KATTA: oh no no no no no no no no no no no just watching the historical records yes 
Human: END CONVO
KATTA: that is the [UNK] that is right 


In [None]:
#EncDecModel.save_weights(filepath='/content/drive/MyDrive/Chatbot/model_weights/1LayerLstmComedy50epochs/EncDecModel1Weights')

In [None]:
#EncDecModel.load_weights('/content/drive/MyDrive/Chatbot/model_weights_GPU/EncDecModelWeights')

### Stop execution

In [None]:
fail_here

### Model 2 with stacked lstm
* Input for this model is string (question, answer first word(START_))
* Output will be the probability of the next word
* This model has double layer of lstm units

In [None]:
# Creatimg embedding objects for encoder and decoder models
EncoderEmbeddingLayerM2 = tf.keras.layers.Embedding(input_dim=max_vocab_length,
                                     output_dim=embedding_output_dimension,
                                     input_length=max_length,
                                     mask_zero=True,
                                     name='encoder_embedding_layer_model2')

DecoderEmbeddingLayerM2 = tf.keras.layers.Embedding(input_dim=max_vocab_length,
                                     output_dim=embedding_output_dimension,
                                     input_length=max_length,
                                     mask_zero=True,
                                     name='decoder_embedding_layer_model2')

In [None]:
# create encoder & decoder initial layers
EncoderInputM2 = tf.keras.layers.Input(shape=(1,), dtype=tf.string)
encoder_vectorM2 = Vectorizer(EncoderInputM2)

DecoderInputM2 = tf.keras.layers.Input(shape=(1,), dtype=tf.string)
decoder_vectorM2 = Vectorizer(DecoderInputM2)

In [None]:
# create encoder
encoder_embeddings_M2 = EncoderEmbeddingLayerM2(encoder_vectorM2)

EncoderLstmLayer1M2 = tf.keras.layers.LSTM(stacked_lstm_units, return_state=True, return_sequences=True, name='Encoder_LSTM_layer1_model2')
encoder_outputs_layer1_M2, state_h_layer1_M2, state_c_layer1_M2 = EncoderLstmLayer1M2(encoder_embeddings_M2)
encoder_states_layer1_M2 = [state_h_layer1_M2, state_c_layer1_M2]

EncoderLstmLayer2M2 = tf.keras.layers.LSTM(stacked_lstm_units, return_state=True, name='Encoder_LSTM2_layer2_model2')
encoder_outputs_layer2_M2, state_h_layer2_M2, state_c_layer2_M2 = EncoderLstmLayer2M2(encoder_outputs_layer1_M2)
encoder_states_layer2_M2 = [state_h_layer2_M2, state_c_layer2_M2]

In [None]:
# create decoder
decoder_embeddings_M2 = DecoderEmbeddingLayerM2(decoder_vectorM2)

DecoderLstmLayer1M2 = tf.keras.layers.LSTM(stacked_lstm_units, return_sequences=True, return_state=True, name='Decoder_LSTM2_layer1_model2')
decoder_outputs_layer1_M2, _, _ = DecoderLstmLayer1M2(decoder_embeddings_M2, initial_state=encoder_states_layer1_M2)

DecoderLstmLayer2M2 = tf.keras.layers.LSTM(stacked_lstm_units, return_sequences=True, return_state=True, name='Decoder_LSTM2_layer2_model2')
decoder_outputs_layer2_M2, _, _ = DecoderLstmLayer2M2(decoder_outputs_layer1_M2, initial_state=encoder_states_layer2_M2)

DecoderDenseLayerM2 = tf.keras.layers.Dense(max_vocab_length, activation='softmax', name='Decoder_Dense_layer_model2')
decoder_dense_outputs_M2 = DecoderDenseLayerM2(decoder_outputs_layer2_M2)

EncDecModel2 = tf.keras.Model([EncoderInputM2, DecoderInputM2], decoder_dense_outputs_M2)

In [None]:
# Compile model
EncDecModel2.compile(loss=sparse_loss_fun,
                    optimizer=tf.keras.optimizers.Adam(),
                    metrics=['accuracy'])

In [None]:
# decoder at test time
EncModel2 = tf.keras.Model(EncoderInputM2, [encoder_states_layer1_M2, encoder_states_layer2_M2])

DecoderStateInputHLayer1M2 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
DecoderStateInputCLayer1M2 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
decoder_states_inputs_layer1_M2 = [DecoderStateInputHLayer1M2, DecoderStateInputCLayer1M2]

DecoderStateInputHLayer2M2 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
DecoderStateInputCLayer2M2 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
decoder_states_inputs_layer2_M2 = [DecoderStateInputHLayer2M2, DecoderStateInputCLayer2M2]

decoder_vector_test_M2 = Vectorizer(DecoderInputM2)
dec_embedding_test_M2 = DecoderEmbeddingLayerM2(decoder_vector_test_M2)

decoder_lstm_outputs_test_layer1_M2, state_h_test_layer1_M1, state_c_test_layer1_M2 = DecoderLstmLayer1M2(dec_embedding_test_M2, initial_state=decoder_states_inputs_layer1_M2)
decoder_states_test_layer1_M2 = [state_h_test_layer1_M1, state_c_test_layer1_M2]

decoder_lstm_outputs_test_layer2_M2, state_h2_test_layer2, state_c2_test_layer2 = DecoderLstmLayer2M2(decoder_lstm_outputs_test_layer1_M2, initial_state=decoder_states_inputs_layer2_M2)
decoder_states_test_layer2_M2 = [state_h2_test_layer2, state_c2_test_layer2]

decoder_dense_outputs_test_M2 = DecoderDenseLayerM2(decoder_lstm_outputs_test_layer2_M2)

DecModel2 = tf.keras.Model(
    inputs = [DecoderInputM2, [decoder_states_inputs_layer1_M2, decoder_states_inputs_layer2_M2]],
    outputs = [decoder_dense_outputs_test_M2] + [decoder_states_test_layer1_M2, decoder_states_test_layer2_M2])

In [None]:
EncDecModel2.load_weights('/content/drive/MyDrive/Chatbot/model_weights/2LayerLstmComedy50epochs/EncDecModel2Weights')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f6522156ed0>

In [None]:
#model_history2 = EncDecModel2.fit(train_dataset,
#                epochs=epoch,
#                validation_data=test_dataset)

In [None]:
EncDecModel.evaluate(test_dataset) # 28.23%



[2.5522470474243164, 0.28226396441459656]

In [None]:
def decoder_model2_test(input_word: str, states: list, vocab_list: list):
    decoder_vector_test_M2 = Vectorizer([input_word])
    dec_embedding_test_M2 = DecoderEmbeddingLayerM2(decoder_vector_test_M2)
    decoder_lstm_outputs_test_layer1_M2, state_h_l1_M2, state_c_l1_M2 = DecoderLstmLayer1M2(dec_embedding_test_M2, initial_state=states[0])
    decoder_lstm_outputs_test_layer2_M2, state_h_l2_M2, state_c_l2_M2 = DecoderLstmLayer2M2(decoder_lstm_outputs_test_layer1_M2, initial_state=states[1])
    decoder_dense_outputs_test_M2 = DecoderDenseLayerM2(decoder_lstm_outputs_test_layer2_M2)
    word_idx = tf.argmax(decoder_dense_outputs_test_M2[0, 0, :]).numpy()
    next_word = vocab_list[word_idx]
    states[0] = [tf.constant(state_h_l1_M2), tf.constant(state_c_l1_M2)]
    states[1] = [tf.constant(state_h_l2_M2), tf.constant(state_c_l2_M2)]
    return next_word, states

In [None]:
end_convo = True
while end_convo:
  human = input("Human: ")
  if human == 'END CONVO':
    end_convo = False
  states = EncModel2.predict([human])
  next_word = 'START_'
  stop_condition = True
  bot_response = ""
  states[0] = [tf.constant(states[0][0]), tf.constant(states[0][1])]
  states[1] = [tf.constant(states[1][0]), tf.constant(states[1][1])]
  while stop_condition:
      next_word, states = decoder_model2_test(next_word, states, vocab_list)
      bot_response += next_word + ' '
      if next_word == '_END' or len(bot_response.split()) > max_length:
          stop_condition = False
  print("KATTA:", bot_response)

Human: hi
KATTA: hi _END 
Human: how are you
KATTA: insane i am going to be a jets fan _END 
Human: END CONVO
KATTA: no no no no i am not _END 


In [None]:
end_convo = True
while end_convo:
  human = input("Human: ")
  if human == 'END CONVO':
    end_convo = False
  bot_response = make_prediction(vocab_list=vocab_list, decoder_model_function=decoder_model2_test, encoder_model=EncModel2, input_text=human, clean_text=clean_text, multi_layer=True)
  print("KATTA:", bot_response)

Human: hi
KATTA: hi 
Human: how are you
KATTA: insane i am going to be a jets fan 
Human: END CONVO
KATTA: you know i have got a great age of enlightenment 


In [None]:
#EncDecModel2.save_weights(filepath='/content/drive/MyDrive/Chatbot/model_weights/2LayerLstmComedy50epochs/EncDecModel2Weights')

### Stop execution

In [None]:
fail_here

### Model 3 with single layer lstm (all data)

* This model is same as Model 1 but trained with all available data
* This model has single layer onf LSTM untis

In [None]:
# Creating embedding object for encoder and decoder models
EncoderEmbeddingLayer = layers.Embedding(input_dim=max_vocab_length,
                                output_dim=embedding_output_dimension, # 128
                                input_length=max_length,
                                mask_zero=True,
                                name='encoder_embedding_layer')

DecoderEmbeddingLayer = layers.Embedding(input_dim=max_vocab_length,
                                output_dim=embedding_output_dimension, # 128
                                input_length=max_length,
                                mask_zero=True,
                                name='decoder_embedding_layer')

In [None]:
# create encoder & decoder initial layers
EncoderInput = layers.Input(shape=(1,), dtype=tf.string)
encoder_vector = Vectorizer(EncoderInput)

DecoderInput = layers.Input(shape=(1,), dtype=tf.string)
decoder_vector = Vectorizer(DecoderInput)

In [None]:
# Create encoder
encoder_embeddings = EncoderEmbeddingLayer(encoder_vector)
EncoderLstmLayer = layers.LSTM(lstm_units, return_state=True, name='Encoder_LSTM')
encoder_lstm_outputs, state_h, state_c = EncoderLstmLayer(encoder_embeddings)
encoder_states = [state_h, state_c]

In [None]:
# Create Decoder
decoder_embeddings = DecoderEmbeddingLayer(decoder_vector)
DecoderLstmLayer = layers.LSTM(lstm_units, return_sequences=True, return_state=True, name='Decoder_LSTM')
decoder_lstm_outputs, _, _ = DecoderLstmLayer(decoder_embeddings, initial_state=encoder_states)
DecoderDenseLayer = layers.Dense(max_vocab_length, activation='softmax', name='Decoder_dense')
decoder_dense_outputs = DecoderDenseLayer(decoder_lstm_outputs)

EncDecModel = Model([EncoderInput, DecoderInput], decoder_dense_outputs)

In [None]:
# Compile model
EncDecModel.compile(loss=sparse_loss_fun,
                    optimizer=tf.keras.optimizers.Adam(),
                    metrics=['accuracy'])

In [None]:
# decoder at test time
EncModel = tf.keras.Model(EncoderInput, encoder_states)

DecoderStateInputH = layers.Input(shape=(lstm_units,))
DecoderStateInputC = layers.Input(shape=(lstm_units,))
decoder_states_inputs = [DecoderStateInputH, DecoderStateInputC]

decoder_vector_test = Vectorizer(DecoderInput)
dec_embedding_test = DecoderEmbeddingLayer(decoder_vector_test)

decoder_lstm_outputs_test, state_h_test, state_c_test = DecoderLstmLayer(dec_embedding_test, initial_state=decoder_states_inputs)
decoder_states_test = [state_h_test, state_c_test]
decoder_dense_outputs_test = DecoderDenseLayer(decoder_lstm_outputs_test)

DecModel = Model(
    inputs = [DecoderInput, decoder_states_inputs],
    outputs = [decoder_dense_outputs_test] + decoder_states_test)

In [None]:
EncDecModel.load_weights('/content/drive/MyDrive/Chatbot/model_weights/1LayerLstmAllData50epochs/EncDecModel3Weights')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f64e0372290>

In [None]:
#model_history = EncDecModel.fit(train_dataset_all,
#                epochs=epoch,
#                validation_data=test_dataset_all)

In [None]:
EncDecModel.evaluate(test_dataset_all) # 28.27%



[2.688870429992676, 0.2827378511428833]

In [None]:
def decoder_model_test(input_word: str, states: list, vocab_list: list):
    decoder_vector_test = Vectorizer([input_word])
    dec_embedding_test = DecoderEmbeddingLayer(decoder_vector_test)
    decoder_lstm_outputs_test, state_h_test, state_c_test = DecoderLstmLayer(dec_embedding_test, initial_state=states[0])
    decoder_dense_output_test = DecoderDenseLayer(decoder_lstm_outputs_test)
    word_idx = tf.argmax(decoder_dense_output_test[0, 0, :]).numpy()
    next_word = vocab_list[word_idx]
    states[0] = [tf.constant(state_h_test), tf.constant(state_c_test)]
    return next_word, states

In [None]:
human = 'hello'
states = [EncModel.predict([human])]
next_word = 'START_'
stop_condition = True
bot_response = ""
states[0] = [tf.constant(states[0][0]), tf.constant(states[0][1])]
while stop_condition:
    next_word, states = decoder_model_test(next_word, states, vocab_list)
    bot_response += next_word + ' '
    if next_word == '_END' or len(bot_response.split()) > max_length:
        stop_condition = False
print(bot_response)

i am sorry _END 


In [None]:
end_convo = False
while not end_convo:
  # Getting the input from user
  human = input("Human: ")
  if human == 'END CONVO':
    end_convo = True
  # Encoding the input
  stat = EncModel.predict([human])
  next_word = 'START_'
  stop_condition = True
  bot_response = ""
  while stop_condition:
      stat = [tf.constant(stat[0]), tf.constant(stat[1])]
      # Decoder model operations starts here
      decoder_vector_test = Vectorizer([next_word])
      dec_embedding_test = DecoderEmbeddingLayer(decoder_vector_test)
      decoder_lstm_outputs_test, state_h_test, state_c_test = DecoderLstmLayer(dec_embedding_test, initial_state=stat)
      decoder_dense_output_test = DecoderDenseLayer(decoder_lstm_outputs_test)
      # Decoder model operations end here
      word_idx = tf.argmax(decoder_dense_output_test[0, 0, :]).numpy()
      next_word = vocab_list[word_idx]
      bot_response += next_word + ' '
      if next_word == '_END' or len(bot_response.split()) > max_length:
          stop_condition = False
      stat = [state_h_test, state_c_test]
  print("KATTA:", bot_response)

Human: hi
KATTA: i do not know if you are hopeless i will see you at all _END 
Human: how are you
KATTA: [UNK] _END 
Human: why are you behaving so stupid
KATTA: he wants to see you _END 
Human: okay bye
KATTA: bye dad _END 
Human: good bye
KATTA: bye _END 
Human: hello
KATTA: i am sorry _END 
Human: END CONVO
KATTA: have you seen him in the canary islands a message on the road _END 


In [None]:
end_convo = True
while end_convo:
  human = input("Human: ")
  if human == 'END CONVO':
    end_convo = False
  bot_response = make_prediction(vocab_list=vocab_list, decoder_model_function=decoder_model_test, encoder_model=EncModel, input_text=human, clean_text=clean_text, multi_layer=False)
  print("KATTA:", bot_response)

Human: hi
KATTA: i do not know if you are hopeless i will see you at all 
Human: hello
KATTA: i am sorry 
Human: END CONVO
KATTA: it you bastard 


In [None]:
#EncDecModel.save_weights(filepath='/content/drive/MyDrive/Chatbot/model_weights/1LayerLstmAllData50epochs/EncDecModel3Weights')

### Stop execution

In [None]:
fail_here

### Model 4 with dual layer lstm (all data)

* This model is same as Model 2 but trained with all available data
* This model has dual layer of LSTM untis

In [None]:
# Creatimg embedding objects for encoder and decoder models
EncoderEmbeddingLayerM4 = tf.keras.layers.Embedding(input_dim=max_vocab_length,
                                     output_dim=embedding_output_dimension,
                                     input_length=max_length,
                                     mask_zero=True,
                                     name='encoder_embedding_layer_model4')

DecoderEmbeddingLayerM4 = tf.keras.layers.Embedding(input_dim=max_vocab_length,
                                     output_dim=embedding_output_dimension,
                                     input_length=max_length,
                                     mask_zero=True,
                                     name='decoder_embedding_layer_model4')

In [None]:
# create encoder & decoder initial layers
EncoderInputM4 = tf.keras.layers.Input(shape=(1,), dtype=tf.string)
encoder_vectorM4 = Vectorizer(EncoderInputM4)

DecoderInputM4 = tf.keras.layers.Input(shape=(1,), dtype=tf.string)
decoder_vectorM4 = Vectorizer(DecoderInputM4)

In [None]:
# create encoder
encoder_embeddings_M4 = EncoderEmbeddingLayerM4(encoder_vectorM4)

EncoderLstmLayer1M4 = tf.keras.layers.LSTM(stacked_lstm_units, return_state=True, return_sequences=True, name='Encoder_LSTM_layer1_model4')
encoder_outputs_layer1_M4, state_h_layer1_M4, state_c_layer1_M4 = EncoderLstmLayer1M4(encoder_embeddings_M4)
encoder_states_layer1_M4 = [state_h_layer1_M4, state_c_layer1_M4]

EncoderLstmLayer2M4 = tf.keras.layers.LSTM(stacked_lstm_units, return_state=True, name='Encoder_LSTM2_layer2_model4')
encoder_outputs_layer2_M4, state_h_layer2_M4, state_c_layer2_M4 = EncoderLstmLayer2M4(encoder_outputs_layer1_M4)
encoder_states_layer2_M4 = [state_h_layer2_M4, state_c_layer2_M4]

In [None]:
# create decoder
decoder_embeddings_M4 = DecoderEmbeddingLayerM4(decoder_vectorM4)

DecoderLstmLayer1M4 = tf.keras.layers.LSTM(stacked_lstm_units, return_sequences=True, return_state=True, name='Decoder_LSTM2_layer1_model4')
decoder_outputs_layer1_M4, _, _ = DecoderLstmLayer1M4(decoder_embeddings_M4, initial_state=encoder_states_layer1_M4)

DecoderLstmLayer2M4 = tf.keras.layers.LSTM(stacked_lstm_units, return_sequences=True, return_state=True, name='Decoder_LSTM2_layer2_model4')
decoder_outputs_layer2_M4, _, _ = DecoderLstmLayer2M4(decoder_outputs_layer1_M4, initial_state=encoder_states_layer2_M4)

DecoderDenseLayerM4 = tf.keras.layers.Dense(max_vocab_length, activation='softmax', name='Decoder_Dense_layer_model4')
decoder_dense_outputs_M4 = DecoderDenseLayerM4(decoder_outputs_layer2_M4)

EncDecModel4 = tf.keras.Model([EncoderInputM4, DecoderInputM4], decoder_dense_outputs_M4)

In [None]:
# Compile model
EncDecModel4.compile(loss=sparse_loss_fun,
                    optimizer=tf.keras.optimizers.Adam(),
                    metrics=['accuracy'])

In [None]:
# decoder at test time
EncModel4 = tf.keras.Model(EncoderInputM4, [encoder_states_layer1_M4, encoder_states_layer2_M4])

DecoderStateInputHLayer1M4 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
DecoderStateInputCLayer1M4 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
decoder_states_inputs_layer1_M4 = [DecoderStateInputHLayer1M4, DecoderStateInputCLayer1M4]

DecoderStateInputHLayer2M4 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
DecoderStateInputCLayer2M4 = tf.keras.layers.Input(shape=(stacked_lstm_units,))
decoder_states_inputs_layer2_M4 = [DecoderStateInputHLayer2M4, DecoderStateInputCLayer2M4]

decoder_vector_test_M4 = Vectorizer(DecoderInputM4)
dec_embedding_test_M4 = DecoderEmbeddingLayerM4(decoder_vector_test_M4)

decoder_lstm_outputs_test_layer1_M4, state_h_test_layer1_M4, state_c_test_layer1_M4 = DecoderLstmLayer1M4(dec_embedding_test_M4, initial_state=decoder_states_inputs_layer1_M4)
decoder_states_test_layer1_M4 = [state_h_test_layer1_M4, state_c_test_layer1_M4]

decoder_lstm_outputs_test_layer2_M4, state_h2_test_layer2, state_c2_test_layer2 = DecoderLstmLayer2M4(decoder_lstm_outputs_test_layer1_M4, initial_state=decoder_states_inputs_layer2_M4)
decoder_states_test_layer2_M4 = [state_h2_test_layer2, state_c2_test_layer2]

decoder_dense_outputs_test_M4 = DecoderDenseLayerM4(decoder_lstm_outputs_test_layer2_M4)

DecModel4 = tf.keras.Model(
    inputs = [DecoderInputM4, [decoder_states_inputs_layer1_M4, decoder_states_inputs_layer2_M4]],
    outputs = [decoder_dense_outputs_test_M4] + [decoder_states_test_layer1_M4, decoder_states_test_layer2_M4])

In [None]:
EncDecModel4.load_weights('/content/drive/MyDrive/Chatbot/model_weights/2LayerLstmAllData50epochs/EncDecModel4Weights')

In [None]:
#model_history4 = EncDecModel4.fit(train_dataset_all,
#                epochs=epoch,
#                validation_data=test_dataset_all)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
EncDecModel4.evaluate(test_dataset_all) # 29.13%



[2.4870564937591553, 0.29128652811050415]

In [None]:
def decoder_model4_test(input_word: str, states: list, vocab_list: list):
    decoder_vector_test_M4 = Vectorizer([input_word])
    dec_embedding_test_M4 = DecoderEmbeddingLayerM4(decoder_vector_test_M4)
    decoder_lstm_outputs_test_layer1_M4, state_h_l1_M4, state_c_l1_M4 = DecoderLstmLayer1M4(dec_embedding_test_M4, initial_state=states[0])
    decoder_lstm_outputs_test_layer2_M4, state_h_l2_M4, state_c_l2_M4 = DecoderLstmLayer2M4(decoder_lstm_outputs_test_layer1_M4, initial_state=states[1])
    decoder_dense_outputs_test_M4 = DecoderDenseLayerM4(decoder_lstm_outputs_test_layer2_M4)
    word_idx = tf.argmax(decoder_dense_outputs_test_M4[0, 0, :]).numpy()
    next_word = vocab_list[word_idx]
    states[0] = [tf.constant(state_h_l1_M4), tf.constant(state_c_l1_M4)]
    states[1] = [tf.constant(state_h_l2_M4), tf.constant(state_c_l2_M4)]
    return next_word, states

In [None]:
end_convo = True
while end_convo:
  human = input("Human: ")
  if human == 'END CONVO':
    end_convo = False
  bot_response = make_prediction(vocab_list=vocab_list, decoder_model_function=decoder_model4_test, encoder_model=EncModel4, input_text=human, clean_text=clean_text, multi_layer=True)
  print("KATTA:", bot_response)

Human: hi
KATTA: hello 
Human: why are you here
KATTA: i do not know i am sorry 
Human: END CONVO
KATTA: threatening me 


In [None]:
"""
Human: hi
KATTA: hello _END 
Human: what
KATTA: do not let me go _END 
Human: aho are you
KATTA: not you _END 
Human: who are you
KATTA: gondorff asked me to meet you _END 
Human: are you a robot
KATTA: no _END 
Human: are you a human
KATTA: yes _END 
Human: why are you here
KATTA: i do not know i am sorry _END 
Human: i like you
KATTA: sure you have got to do is not that bad _END 
Human: do you like me
KATTA: it was not me _END 
"""

In [None]:
#EncDecModel4.save_weights(filepath='/content/drive/MyDrive/Chatbot/model_weights/2LayerLstmAllData50epochs/EncDecModel4Weights')