In [69]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd

In [70]:
# Assuming the dialogs.txt is uploaded in your Colab environment
file_path = "./dialogs.txt"

# Load the file
with open(file_path, 'r') as file:
    lines = file.readlines()

In [71]:
dialogue_pairs = []
for i in range(0, len(lines) - 1, 2):
    input_text = lines[i].strip().lower()
    response_text = lines[i + 1].strip().lower()
    dialogue_pairs.append((input_text, response_text))

In [72]:
dialogues_df = pd.DataFrame(dialogue_pairs, columns=['input', 'response'])

In [73]:
tokenizer = Tokenizer()
all_text = dialogues_df['input'].tolist() + dialogues_df['response'].tolist()
tokenizer.fit_on_texts(all_text)

In [74]:
input_sequences = tokenizer.texts_to_sequences(dialogues_df['input'])
response_sequences = tokenizer.texts_to_sequences(dialogues_df['response'])

In [75]:
vocab_size = len(tokenizer.word_index) + 1

In [76]:
# Padding the sequences
max_sequence_len = max(max([len(seq) for seq in input_sequences]), max([len(seq) for seq in response_sequences]))
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='post')
response_sequences = pad_sequences(response_sequences, maxlen=max_sequence_len, padding='post')

In [77]:


# Split data into training and validation sets
split = int(0.8 * len(input_sequences))
X_train, X_val = input_sequences[:split], input_sequences[split:]
y_train, y_val = response_sequences[:split], response_sequences[split:]

## Build Model

In [78]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding

In [79]:
embedding_dim = 128
lstm_units = 256

In [80]:
encoder_inputs = Input(shape=(max_sequence_len,))
encoder_embedding = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_len)(encoder_inputs)
encoder_lstm = LSTM(lstm_units, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]



In [81]:
decoder_inputs = Input(shape=(max_sequence_len,))
decoder_embedding = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_len)(decoder_inputs)
decoder_lstm = LSTM(lstm_units, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)



In [82]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [83]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

In [84]:
model.summary()


## Train the Model

In [86]:
# Train the model
history = model.fit(
    [X_train, y_train[:, :-1]], 
    np.expand_dims(y_train[:, 1:], -1),  # Ensure y_train is shaped correctly
    epochs=50,
    batch_size=64,
    validation_data=([X_val, y_val[:, :-1]], np.expand_dims(y_val[:, 1:], -1))
)


Epoch 1/50


ValueError: Input 1 of layer "functional_4" is incompatible with the layer: expected shape=(None, 32), found shape=(None, 31)

In [None]:
# Function to generate responses
def generate_response(input_text):
    # Preprocess the input text
    input_sequence = tokenizer.texts_to_sequences([input_text.lower()])
    input_sequence = pad_sequences(input_sequence, maxlen=max_sequence_len, padding='post')

    # Predict the response sequence
    states_value = encoder_model.predict(input_sequence)
    target_sequence = np.zeros((1, 1))
    target_sequence[0, 0] = tokenizer.word_index['start_token']

    stop_condition = False
    generated_sequence = []
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_sequence] + states_value)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = tokenizer.index_word[sampled_token_index]
        generated_sequence.append(sampled_word)

        if sampled_word == 'end_token' or len(generated_sequence) > max_sequence_len:
            stop_condition = True

        target_sequence = np.zeros((1, 1))
        target_sequence[0, 0] = sampled_token_index
        states_value = [h, c]

    return ' '.join(generated_sequence)

# Example of generating a response
user_input = "how's it going?"
print("Bot:", generate_response(user_input))

In [None]:
# Save the model
model.save("chatbot_model.h5")

In [None]:
# Load the model
from tensorflow.keras.models import load_model
loaded_model = load_model("chatbot_model.h5")