# Module 5 — L3 Apply: LSTM-based Generative Chatbot (Seq2Seq)

**Task (Apply – L3):** Implement a simple **LSTM seq2seq chatbot** that learns to generate responses from example (input → reply) pairs.

Run the cells top–down. You can add your own pairs and re-train quickly.

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tf.random.set_seed(42)
np.random.seed(42)
print('TensorFlow:', tf.__version__)

TensorFlow: 2.18.0


## 1) Toy conversation dataset (you can extend it)

In [11]:
pairs = [
    ('hi', 'hello'),
    ('hello', 'hi there'),
    ('how are you', 'i am fine how can i help'),
    ('what is your name', 'i am a chatbot'),
    ('who created you', 'i was created for learning'),
    ('bye', 'goodbye see you later'),
]

SOS, EOS = '<sos>', '<eos>'
inputs_texts = [q.lower().strip() for q,_ in pairs]
targets_texts = [f'{SOS} {a.lower().strip()} {EOS}' for _,a in pairs]
print('Sample:', inputs_texts[0], '->', targets_texts[0])

Sample: hi -> <sos> hello <eos>


## 2) Tokenization

In [3]:
tok_in = Tokenizer(num_words=5000, oov_token='<unk>')
tok_out = Tokenizer(num_words=5000, oov_token='<unk>')
tok_in.fit_on_texts(inputs_texts)
tok_out.fit_on_texts(targets_texts)

in_seqs = tok_in.texts_to_sequences(inputs_texts)
out_seqs = tok_out.texts_to_sequences(targets_texts)

max_len_in = max(len(s) for s in in_seqs)
max_len_out = max(len(s) for s in out_seqs)

enc_in = pad_sequences(in_seqs, maxlen=max_len_in, padding='post')
dec_in = pad_sequences([s[:-1] for s in out_seqs], maxlen=max_len_out-1, padding='post')
dec_out = pad_sequences([s[1:] for s in out_seqs], maxlen=max_len_out-1, padding='post')

vocab_in, vocab_out = len(tok_in.word_index)+1, len(tok_out.word_index)+1
print('Shapes:', enc_in.shape, dec_in.shape, dec_out.shape)

Shapes: (6, 4) (6, 8) (6, 8)


## 3) Seq2Seq Model

In [4]:
embed_dim, lstm_units = 64, 128

enc_inputs = layers.Input(shape=(max_len_in,))
x = layers.Embedding(vocab_in, embed_dim, mask_zero=True)(enc_inputs)
_, state_h, state_c = layers.LSTM(lstm_units, return_state=True)(x)
enc_states = [state_h, state_c]

dec_inputs = layers.Input(shape=(max_len_out-1,))
y = layers.Embedding(vocab_out, embed_dim, mask_zero=True)(dec_inputs)
dec_lstm = layers.LSTM(lstm_units, return_sequences=True, return_state=True)
dec_outputs, _, _ = dec_lstm(y, initial_state=enc_states)
dec_dense = layers.Dense(vocab_out, activation='softmax')
dec_outputs = dec_dense(dec_outputs)

model = models.Model([enc_inputs, dec_inputs], dec_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

## 4) Train

In [5]:
history = model.fit([enc_in, dec_in], dec_out[...,None],
                    epochs=200, batch_size=8, verbose=0)
print('Final accuracy:', history.history['accuracy'][-1])

Final accuracy: 0.6041666865348816


## 5) Inference Setup

In [6]:
encoder_model = models.Model(enc_inputs, enc_states)

state_h_in = layers.Input(shape=(lstm_units,))
state_c_in = layers.Input(shape=(lstm_units,))
dec_state_in = [state_h_in, state_c_in]
dec_single_in = layers.Input(shape=(1,))
dec_emb = model.layers[3](dec_single_in)
dec_outs, h, c = dec_lstm(dec_emb, initial_state=dec_state_in)
dec_outs = dec_dense(dec_outs)
decoder_model = models.Model([dec_single_in]+dec_state_in, [dec_outs,h,c])

In [12]:


def reply(sentence, max_steps=10):
    seq = tok_in.texts_to_sequences([sentence.lower()])
    seq = pad_sequences(seq, maxlen=max_len_in, padding='post')
    states = encoder_model.predict(seq, verbose=0)

    token = np.array([[sos_id]])
    h, c = states
    result = []
    for _ in range(max_steps):
        out, h, c = decoder_model.predict([token, h, c], verbose=0)
        next_id = int(np.argmax(out[0, 0]))
        if next_id == eos_id:
            break
        word = itos_out.get(next_id, '?')
        if word not in {'<sos>', '<eos>', 'sos', 'eos'}:
            result.append(word)
        token = np.array([[next_id]])
    return ' '.join(result)


## 6) Demo

In [13]:
tests = ['hi','how are you','what is your name','bye']
for t in tests:
    print('User:', t)
    print('Bot :', reply(t))
    print('-'*30)

User: hi
Bot : hello
------------------------------
User: how are you
Bot : i am fine how can i help
------------------------------
User: what is your name
Bot : i am a chatbot
------------------------------
User: bye
Bot : goodbye see you later
------------------------------
