In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Attention, Concatenate, Bidirectional

In [None]:
data = pd.read_csv('/content/Conversation.csv')
input_texts = data['question'].values
response_texts = data['answer'].values

In [None]:
input_texts = [f"<start> {text} <end>" for text in input_texts]
response_texts = [f"<start> {text} <end>" for text in response_texts]

# Tokenization
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts(np.concatenate((input_texts, response_texts)))
input_sequences = tokenizer.texts_to_sequences(input_texts)
response_sequences = tokenizer.texts_to_sequences(response_texts)

# Padding
max_len = max(len(seq) for seq in input_sequences + response_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_len, padding='post')
response_sequences = pad_sequences(response_sequences, maxlen=max_len, padding='post')

vocab_size = len(tokenizer.word_index) + 1

In [None]:
# Encoder
encoder_input = Input(shape=(max_len,))
encoder_emb = Embedding(vocab_size, 256)(encoder_input)
encoder_bi_lstm = Bidirectional(LSTM(256, return_sequences=True, return_state=True))
encoder_output, forward_h, forward_c, backward_h, backward_c = encoder_bi_lstm(encoder_emb)
encoder_state_h = Concatenate()([forward_h, backward_h])
encoder_state_c = Concatenate()([forward_c, backward_c])
encoder_states = [encoder_state_h, encoder_state_c]

# Decoder with Attention
decoder_input = Input(shape=(max_len,))
decoder_emb = Embedding(vocab_size, 256)(decoder_input)
decoder_lstm = LSTM(512, return_sequences=True, return_state=True)
decoder_lstm_output, _, _ = decoder_lstm(decoder_emb, initial_state=encoder_states)


In [None]:
# Apply Attention
attention = Attention()([decoder_lstm_output, encoder_output])
decoder_concat = Concatenate()([decoder_lstm_output, attention])

decoder_dense = Dense(vocab_size, activation='softmax')
decoder_output = decoder_dense(decoder_concat)

# Full Model
model = Model([encoder_input, decoder_input], decoder_output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.summary()

# Preparing target data for teacher forcing
decoder_target_data = np.zeros_like(response_sequences)
decoder_target_data[:, :-1] = response_sequences[:, 1:]

# Training
batch_size = 64
epochs = 50
model.fit([input_sequences, response_sequences], decoder_target_data, batch_size=batch_size, epochs=epochs)

# Encoder Model for Inference
encoder_model = Model(encoder_input, [encoder_output, encoder_states])

# Decoder Model for Inference
decoder_state_input_h = Input(shape=(512,))
decoder_state_input_c = Input(shape=(512,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_lstm_output, state_h, state_c = decoder_lstm(decoder_emb, initial_state=decoder_states_inputs)
decoder_attention = Attention()([decoder_lstm_output, encoder_output])
decoder_concat_inf = Concatenate()([decoder_lstm_output, decoder_attention])
decoder_output = decoder_dense(decoder_concat_inf)

decoder_model = Model(
    [decoder_input, encoder_output] + decoder_states_inputs,
    [decoder_output, state_h, state_c])

# Response Generation with Beam Search
def beam_search_decode(input_seq, beam_width=3):
    encoder_out, states_value = encoder_model.predict(input_seq)
    start_token = tokenizer.word_index['<start>']
    end_token = tokenizer.word_index['<end>']

    sequences = [[[], 1.0, states_value]]
    while True:
        all_candidates = []
        for seq, score, states in sequences:
            target_seq = np.zeros((1, 1))
            target_seq[0, 0] = start_token if len(seq) == 0 else seq[-1]

            output_tokens, h, c = decoder_model.predict([target_seq, encoder_out] + states)
            states = [h, c]
            for i in range(vocab_size):
                candidate = [seq + [i], score * -np.log(output_tokens[0, 0, i]), states]
                all_candidates.append(candidate)

        ordered = sorted(all_candidates, key=lambda tup: tup[1])
        sequences = ordered[:beam_width]

        for seq, score, states in sequences:
            if seq[-1] == end_token:
                return ' '.join([tokenizer.index_word[i] for i in seq[1:-1]])

# Testing the chatbot
input_text = "<start> Hello, how are you doing? <end>"
input_seq = tokenizer.texts_to_sequences([input_text])
input_seq = pad_sequences(input_seq, maxlen=max_len, padding='post')
print("Bot:", beam_search_decode(input_seq))


Epoch 1/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 51ms/step - loss: 3.9485
Epoch 2/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 48ms/step - loss: 2.0989
Epoch 3/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 1.9773
Epoch 4/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 45ms/step - loss: 1.8705
Epoch 5/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 46ms/step - loss: 1.8125
Epoch 6/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 45ms/step - loss: 1.7442
Epoch 7/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 42ms/step - loss: 1.6827
Epoch 8/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 43ms/step - loss: 1.5952
Epoch 9/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 43ms/step - loss: 1.5054
Epoch 10/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 44ms/step - loss: 1.425