### Basics of Seq2Seq Models for Dialogue Systems

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense


# Sample data: input and target sequences
input_texts = ['hello', 'how are you', 'good morning']
target_texts = ['hi', 'i am fine', 'good morning']

# Tokenize the data
tokenizer = tf.keras.preprocessing.text.Tokenizer()

# Add 'start' and 'end' tokens to the vocabulary
# By fitting on these special tokens, they are added to the word_index
tokenizer.fit_on_texts(['start', 'end'])

# Now fit on the actual input and target texts to build the complete vocabulary
tokenizer.fit_on_texts(input_texts + target_texts)

input_sequences = tokenizer.texts_to_sequences(input_texts)
target_sequences = tokenizer.texts_to_sequences(target_texts)
max_input_length = max(len(seq) for seq in input_sequences)
max_target_length = max(len(seq) for seq in target_sequences)
vocab_size = len(tokenizer.word_index) + 1
# Padding the sequences
input_sequences = tf.keras.preprocessing.sequence.pad_sequences(input_sequences, maxlen=max_input_length, padding='post')
target_sequences = tf.keras.preprocessing.sequence.pad_sequences(target_sequences, maxlen=max_target_length, padding='post')

# Define the model
embedding_dim = 256
latent_dim = 512

# Encoder
encoder_inputs = Input(shape=(None,))
encoder_embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the complete model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Prepare target data for training
target_sequences_input = target_sequences[:, :-1]
target_sequences_output = target_sequences[:, 1:]
target_sequences_output = target_sequences_output.reshape(target_sequences_output.shape[0], target_sequences_output.shape[1], 1)

# Train the model
model.fit([input_sequences, target_sequences_input], target_sequences_output, epochs=100, batch_size=64)

# Inference models
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(decoder_embedding, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

# Function to decode the sequence
def decode_sequence(input_seq):
    states_value = encoder_model.predict(input_seq)
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = tokenizer.word_index['start']
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])

        # Handle the case where sampled_token_index is 0
        if sampled_token_index == 0:
            sampled_word = ''  # or any placeholder you want to use
        else:
            sampled_word = tokenizer.index_word[sampled_token_index]

        decoded_sentence += ' ' + sampled_word
        if sampled_word == 'end' or len(decoded_sentence) > max_target_length:
            stop_condition = True
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index
        states_value = [h, c]
    return decoded_sentence


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 2.5605
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342ms/step - loss: 2.4793
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 558ms/step - loss: 2.3909
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 289ms/step - loss: 2.2840
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 281ms/step - loss: 2.1477
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - loss: 1.9709
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step - loss: 1.7457
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step - loss: 1.4794
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step - loss: 1.2183
Epoch 10/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step - loss: 1.0360
Epo

### Test the Model

In [None]:
# Function to preprocess input text
def preprocess_text(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = tf.keras.preprocessing.sequence.pad_sequences(sequence, maxlen=max_input_length, padding='post')
    return padded_sequence

# Test the model with new input sequences
test_sentences = ['hello', 'how are you', 'good morning']
for sentence in test_sentences:
    input_seq = preprocess_text(sentence)
    decoded_sentence = decode_sequence(input_seq)
    print(f'Input: {sentence}')
    print(f'Output: {decoded_sentence}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Input: hello
Output:     
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Input: how are you
Output:  am fine
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Input: good morning
Output:  morning


### Encoder-Decoder Framework for Sequence Generation

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample data
input_texts = ['hello', 'how are you', 'good morning']
target_texts = ['hi', 'i am fine', 'good morning']

# Add start and end tokens
target_texts = ['start ' + text + ' end' for text in target_texts]

# Tokenize the data
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts(input_texts + target_texts)
input_sequences = tokenizer.texts_to_sequences(input_texts)
target_sequences = tokenizer.texts_to_sequences(target_texts)
max_input_length = max(len(seq) for seq in input_sequences)
max_target_length = max(len(seq) for seq in target_sequences)
vocab_size = len(tokenizer.word_index) + 1

# Padding the sequences
input_sequences = pad_sequences(input_sequences, maxlen=max_input_length, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen=max_target_length, padding='post')

embedding_dim = 256
latent_dim = 512

# Encoder
encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(vocab_size, embedding_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(vocab_size, embedding_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the complete model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Prepare target data for training
target_sequences_input = target_sequences[:, :-1]
target_sequences_output = target_sequences[:, 1:]
target_sequences_output = target_sequences_output.reshape(target_sequences_output.shape[0], target_sequences_output.shape[1], 1)

# Train the model
model.fit([input_sequences, target_sequences_input], target_sequences_output, epochs=100, batch_size=64)

# Encoder model for inference
encoder_model = Model(encoder_inputs, encoder_states)

# Decoder model for inference
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(decoder_embedding, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

def decode_sequence(input_seq):
    states_value = encoder_model.predict(input_seq)
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = tokenizer.word_index['start']
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = tokenizer.index_word.get(sampled_token_index, '')
        if sampled_word == 'end' or len(decoded_sentence.split()) > max_target_length:
            stop_condition = True
        else:
            decoded_sentence += ' ' + sampled_word
            target_seq = np.zeros((1, 1))
            target_seq[0, 0] = sampled_token_index
            states_value = [h, c]
    return decoded_sentence


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 2.5640
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step - loss: 2.5117
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step - loss: 2.4533
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step - loss: 2.3793
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - loss: 2.2790
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - loss: 2.1408
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step - loss: 1.9585
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step - loss: 1.7599
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - loss: 1.6279
Epoch 10/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - loss: 1.5703
Epoc

### Test the Model

In [None]:
# Preprocess input text
def preprocess_text(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_input_length, padding='post')
    return padded_sequence

# Test the model with new input sequences
test_sentences = ['hello', 'how are you', 'good morning']
for sentence in test_sentences:
    input_seq = preprocess_text(sentence)
    decoded_sentence = decode_sequence(input_seq)
    print(f'Input: {sentence}')
    print(f'Output: {decoded_sentence}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 249ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Input: hello
Output:  hi
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Input: how are you
Output:  i am fine
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Input: good morning
Output:  good morning


### Use Cases of GRUs in Conversational AI

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample conversational data
conversations = [
    "Hello",
    "Hi there!",
    "How are you?",
    "I'm good, thank you. How about you?",
    "I'm doing great!"
]

# Tokenizing the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(conversations)
total_words = len(tokenizer.word_index) + 1

# Creating input sequences and labels
input_sequences = []
for line in conversations:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i + 1]
        input_sequences.append(n_gram_sequence)

# Padding sequences to make them uniform in length
max_sequence_length = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre'))

# Creating predictors (X) and labels (y)
X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = np.eye(total_words)[y]  # One-hot encode the labels

# Reshape X for GRU input
X = X.reshape((X.shape[0], X.shape[1], 1))

# Building the GRU model
model = Sequential()
model.add(GRU(150, input_shape=(X.shape[1], 1)))
model.add(Dense(total_words, activation='softmax'))

# Compiling the model
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.01), metrics=['accuracy'])

# Training the model
model.fit(X, y, epochs=100, verbose=1)

# Function to generate conversational responses
def generate_response(seed_text, next_words):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_length - 1, padding='pre')
        predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text



  super().__init__(**kwargs)


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.0909 - loss: 2.4615
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.2727 - loss: 1.9343
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.3636 - loss: 1.7117
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step - accuracy: 0.4545 - loss: 1.5396
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.4545 - loss: 1.3336
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step - accuracy: 0.5455 - loss: 1.1729
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step - accuracy: 0.6364 - loss: 1.1093
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step - accuracy: 0.5455 - loss: 1.0658
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [None]:
# Example conversation
seed_text = "Hi"
response = generate_response(seed_text, 3)
print(response)


Hi there thank you


### Text Conversation using Recurrent Neural Networks

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample data
input_texts = ['hello', 'how are you', 'good morning']
target_texts = ['hi', 'i am fine', 'good morning']

# Add start and end tokens
target_texts = ['start ' + text + ' end' for text in target_texts]

# Tokenize the data
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts(input_texts + target_texts)
input_sequences = tokenizer.texts_to_sequences(input_texts)
target_sequences = tokenizer.texts_to_sequences(target_texts)
max_input_length = max(len(seq) for seq in input_sequences)
max_target_length = max(len(seq) for seq in target_sequences)
vocab_size = len(tokenizer.word_index) + 1

# Padding the sequences
input_sequences = pad_sequences(input_sequences, maxlen=max_input_length, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen=max_target_length, padding='post')

embedding_dim = 256
latent_dim = 512

# Encoder
encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(vocab_size, embedding_dim)(encoder_inputs)
encoder_rnn = SimpleRNN(latent_dim, return_state=True)
encoder_outputs, state_h = encoder_rnn(encoder_embedding)
encoder_states = [state_h]

# Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(vocab_size, embedding_dim)(decoder_inputs)
decoder_rnn = SimpleRNN(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, state_h = decoder_rnn(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the complete model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Prepare target data for training
target_sequences_input = target_sequences[:, :-1]
target_sequences_output = target_sequences[:, 1:]
target_sequences_output = target_sequences_output.reshape(target_sequences_output.shape[0], target_sequences_output.shape[1], 1)

# Train the model
model.fit([input_sequences, target_sequences_input], target_sequences_output, epochs=100, batch_size=64)

# Encoder model for inference
encoder_model = Model(encoder_inputs, encoder_states)

# Decoder model for inference
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h]
decoder_outputs, state_h = decoder_rnn(decoder_embedding, initial_state=decoder_states_inputs)
decoder_states = [state_h]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)




Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 2.5425
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - loss: 1.8927
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 1.2380
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - loss: 0.6741
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 0.3184
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 0.1250
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - loss: 0.0376
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.0096
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.0025
Epoch 10/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 6.9371e-04
Epoch 1

In [None]:
def decode_sequence(input_seq):
    # Get the initial states from the encoder
    states_value = encoder_model.predict(input_seq)

    # Start with the 'start' token
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = tokenizer.word_index['start']

    # Initialize the decoded sentence
    decoded_sentence = ''

    # Set the stop condition to False
    stop_condition = False

    while not stop_condition:
        # Predict the next token and update the decoder states
        # unpack states_value before passing to predict
        # The problem was here: states_value[0] had shape (latent_dim,) and needed to be reshaped
        output_tokens, h = decoder_model.predict([target_seq] + [states_value[0].reshape(1, latent_dim)])

        # Get the index of the token with the highest probability
        sampled_token_index = np.argmax(output_tokens[0, -1, :])

        # Get the corresponding word
        sampled_word = tokenizer.index_word.get(sampled_token_index, '')

        # Check for the stop condition
        if sampled_word == 'end' or len(decoded_sentence.split()) > max_target_length:
            stop_condition = True
        else:
            # Append the sampled word to the decoded sentence
            decoded_sentence += ' ' + sampled_word

            # Update the target sequence for the next prediction
            target_seq = np.zeros((1, 1))
            target_seq[0, 0] = sampled_token_index

            # Update the decoder states
            states_value = [h]

    # Return the decoded sentence
    return decoded_sentence

# Preprocess input text
def preprocess_text(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_input_length, padding='post')
    return padded_sequence

# Test the model with new input sequences
test_sentences = ['hello', 'how are you', 'good morning']
for sentence in test_sentences:
    input_seq = preprocess_text(sentence)
    decoded_sentence = decode_sequence(input_seq)
    print(f'Input: {sentence}')
    print(f'Output: {decoded_sentence}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Input: hello
Output:  hi
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Input: how are you
Output:  i am fine
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Input: good morning
Output:  good morning


### Automatic Text Generation using LSTM

In [None]:
# Install TensorFlow if necessary
# !pip install tensorflow-gpu==2.0.0-rc0

import tensorflow as tf
import string
import requests
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding

# Fetch the data
response = requests.get("https://ocw.mit.edu/ans7870/6/6.006/s08/lecturenotes/files/t8.shakespeare.txt")
data = response.text.split("\n")
data = data[253:]
data = " ".join(data)

# Clean the text
def clean_text(doc):
  tokens = doc.split()
  table = str.maketrans('', '', string.punctuation)
  tokens = [w.translate(table) for w in tokens]
  tokens = [word for word in tokens if word.isalpha()]
  tokens = [word.lower() for word in tokens]
  return tokens

tokens = clean_text(data)

# Create sequences
length = 50 + 1
lines = []
for i in range(length, len(tokens)):
  seq = tokens[i-length:i]
  line = ' '.join(seq)
  lines.append(line)
  if i > 20000:
    break

# Tokenize and pad sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
sequences = tokenizer.texts_to_sequences(lines)
sequences = np.array(sequences)
X, y = sequences[:, :-1], sequences[:, -1]
vocab_size = len(tokenizer.word_index) + 1
y = to_categorical(y, num_classes=vocab_size)
seq_length = X.shape[1]

# Build the LSTM model
model = Sequential()
model.add(Embedding(vocab_size, 50, input_length=seq_length))
model.add(LSTM(100, return_sequences=True))
model.add(LSTM(100))
model.add(Dense(100, activation='relu'))
model.add(Dense(vocab_size, activation="softmax"))

# Compile and train the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])
model.fit(X, y, batch_size=256, epochs=100)




Epoch 1/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 17ms/step - accuracy: 0.0183 - loss: 7.5148
Epoch 2/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.0290 - loss: 6.5364
Epoch 3/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.0264 - loss: 6.4518
Epoch 4/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.0293 - loss: 6.4100
Epoch 5/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.0298 - loss: 6.3658
Epoch 6/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.0348 - loss: 6.2985
Epoch 7/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.0388 - loss: 6.1900
Epoch 8/100
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.0381 - loss: 6.1180
Epoch 9/100
[1m78/78[0m [32m━━━━━━━━━

In [None]:
# Generate text
def generate_text_seq(model, tokenizer, text_seq_length, seed_text, n_words):
  text = []
  for _ in range(n_words):
    encoded = tokenizer.texts_to_sequences([seed_text])[0]
    encoded = pad_sequences([encoded], maxlen=text_seq_length, truncating="pre")
    # Use predict and argmax instead of predict_classes
    y_predict = np.argmax(model.predict(encoded, verbose=0), axis=-1)
    predicted_word = ''
    for word, index in tokenizer.word_index.items():
      if index == y_predict:
        predicted_word = word
        break
    seed_text = seed_text + ' ' + predicted_word
    text.append(predicted_word)
  return ' '.join(text)

seed_text = lines[1233]
generated_text = generate_text_seq(model, tokenizer, seq_length, seed_text, 20)
print(generated_text)

sweet semblance in his love blesses sit and whether proclaims olives of endless age then do i am not is


### LSTMs in Dialogue Management and Text Generation

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding
from tensorflow.keras.utils import pad_sequences

# Define sample data for training
input_texts = ["hello", "how are you?", "what is your name?", "goodbye"]
target_texts = ["hi", "I am fine, thank you!", "I am a chatbot.", "see you later"]

# Prepare vocabulary
def create_tokenizer(texts):
    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')  # Retain punctuation
    tokenizer.fit_on_texts(texts)
    return tokenizer

input_tokenizer = create_tokenizer(input_texts)
target_tokenizer = create_tokenizer(target_texts)

input_sequences = input_tokenizer.texts_to_sequences(input_texts)
target_sequences = target_tokenizer.texts_to_sequences(target_texts)

# Add start and end tokens for target sequences
start_token = len(target_tokenizer.word_index) + 1
end_token = start_token + 1

target_sequences = [[start_token] + seq + [end_token] for seq in target_sequences]

# Pad sequences
max_encoder_seq_length = max([len(seq) for seq in input_sequences])
max_decoder_seq_length = max([len(seq) for seq in target_sequences])

input_sequences = pad_sequences(input_sequences, maxlen=max_encoder_seq_length, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen=max_decoder_seq_length, padding='post')

# Vocabulary sizes
num_encoder_tokens = len(input_tokenizer.word_index) + 1
num_decoder_tokens = len(target_tokenizer.word_index) + 3  # Include start and end tokens

# Prepare data for training
encoder_input_data = input_sequences
decoder_input_data = np.array([seq[:-1] for seq in target_sequences])
decoder_target_data = np.array([seq[1:] for seq in target_sequences])

# One-hot encoding for decoder target data
decoder_target_data_onehot = np.zeros((
    len(decoder_target_data), max_decoder_seq_length - 1, num_decoder_tokens
))
for i, seq in enumerate(decoder_target_data):
    for t, token in enumerate(seq):
        if token > 0:
            decoder_target_data_onehot[i, t, token] = 1

# Build the Seq2Seq model
# Encoder
encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(num_encoder_tokens, 128)(encoder_inputs)
encoder_lstm = LSTM(256, return_state=True)
_, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(num_decoder_tokens, 128)(decoder_inputs)
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(
    decoder_embedding, initial_state=encoder_states
)
decoder_dense = Dense(num_decoder_tokens, activation="softmax")
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(
    [encoder_input_data, decoder_input_data],
    decoder_target_data_onehot,
    batch_size=16,
    epochs=50,
    validation_split=0.2,
)

# Inference models
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(256,))
decoder_state_input_c = Input(shape=(256,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_embedding2 = Embedding(num_decoder_tokens, 128)(decoder_inputs)
decoder_lstm_outputs, state_h, state_c = decoder_lstm(
    decoder_embedding2, initial_state=decoder_states_inputs
)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_lstm_outputs)

decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states
)

# Define inference functions
def decode_sequence(input_seq):
    states_value = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = start_token

    stop_condition = False
    decoded_sentence = ""

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value
        )

        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = target_tokenizer.index_word.get(sampled_token_index, "")

        decoded_sentence += " " + sampled_word

        if sampled_token_index == end_token or len(decoded_sentence) > max_decoder_seq_length:
            stop_condition = True

        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

        states_value = [h, c]

    return decoded_sentence.strip()

# Test the model
for seq_index in range(len(input_texts)):
    input_seq = encoder_input_data[seq_index : seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print(f"Input: {input_texts[seq_index]}\nPredicted Response: {decoded_sentence}\n")


Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.1111 - loss: 1.9049 - val_accuracy: 0.1667 - val_loss: 1.7606
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - accuracy: 0.2222 - loss: 1.8917 - val_accuracy: 0.1667 - val_loss: 1.7618
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step - accuracy: 0.2778 - loss: 1.8780 - val_accuracy: 0.1667 - val_loss: 1.7628
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 0.2778 - loss: 1.8630 - val_accuracy: 0.1667 - val_loss: 1.7636
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 0.2778 - loss: 1.8460 - val_accuracy: 0.1667 - val_loss: 1.7640
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step - accuracy: 0.2778 - loss: 1.8259 - val_accuracy: 0.1667 - val_loss: 1.7638
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━

### CNNs for Sentence and Document Classification

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Example text data (replace with your own data)
texts = [
    "I love programming in Python",
    "Python is a great language for machine learning",
    "I enjoy learning new technologies",
    "Deep learning is a subset of machine learning",
    "Natural language processing is fascinating"
]
labels = [0, 0, 1, 1, 2]  # Example labels (e.g., topic classification)

# Number of classes (assuming 3 classes)
num_classes = len(set(labels))

# Tokenize text (convert text to integers)
tokenizer = Tokenizer(num_words=10000)  # Limit to top 10k words
tokenizer.fit_on_texts(texts)
X = tokenizer.texts_to_sequences(texts)

# Pad sequences to have a fixed length
max_length = 10  # Set sequence length (you can adjust this)
X = pad_sequences(X, maxlen=max_length)

# Convert labels to categorical (one-hot encoding)
y = to_categorical(labels, num_classes=num_classes)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Load pre-trained GloVe embeddings (for this example, we'll simulate loading GloVe)
embedding_dim = 100
embedding_matrix = np.random.rand(10000, embedding_dim)  # Replace with actual GloVe or Word2Vec embeddings

# Build the CNN model using functional API
input_layer = Input(shape=(max_length,))  # Define the input layer with shape of padded sequences

# Embedding layer (use pre-trained GloVe embeddings here)
embedding_layer = Embedding(input_dim=10000, output_dim=embedding_dim, input_length=max_length)(input_layer)
embedding_layer.trainable = False  # Freeze the weights (optional)

# Convolutional layers with multiple filter sizes (3, 4, 5)
conv_3 = Conv1D(filters=128, kernel_size=3, activation='relu')(embedding_layer)
conv_4 = Conv1D(filters=128, kernel_size=4, activation='relu')(embedding_layer)
conv_5 = Conv1D(filters=128, kernel_size=5, activation='relu')(embedding_layer)

# Max pooling layers
pool_3 = GlobalMaxPooling1D()(conv_3)
pool_4 = GlobalMaxPooling1D()(conv_4)
pool_5 = GlobalMaxPooling1D()(conv_5)

# Concatenate pooled features
merged = tf.keras.layers.concatenate([pool_3, pool_4, pool_5], axis=1)

# Fully connected layer (dense layer)
dense = Dense(128, activation='relu')(merged)
dropout = Dropout(0.5)(dense)  # Dropout for regularization

# Output layer
output = Dense(num_classes, activation='softmax')(dropout)

# Create the model
model = Model(inputs=input_layer, outputs=output)

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Print model summary
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc}")




Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.2500 - loss: 1.1123 - val_accuracy: 0.0000e+00 - val_loss: 1.0962
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step - accuracy: 0.7500 - loss: 1.0392 - val_accuracy: 0.0000e+00 - val_loss: 1.1132
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - accuracy: 0.7500 - loss: 1.0080 - val_accuracy: 0.0000e+00 - val_loss: 1.1365
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step - accuracy: 0.7500 - loss: 0.9853 - val_accuracy: 0.0000e+00 - val_loss: 1.1640
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step - accuracy: 0.7500 - loss: 0.9853 - val_accuracy: 0.0000e+00 - val_loss: 1.1850
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - accuracy: 1.0000 - loss: 0.8902 - val_accuracy: 0.0000e+00 - val_loss: 1.1998
Epoch 7/10
[1m1/1[0

In [None]:
# Test the model with new data
new_texts = ["Machine learning is revolutionizing industries", "Natural language processing is fascinating"]
new_sequences = tokenizer.texts_to_sequences(new_texts)
new_sequences = pad_sequences(new_sequences, maxlen=max_length)

# Get predictions
predictions = model.predict(new_sequences)

# Print predicted classes (the class with the highest probability)
print("Predictions (class labels):")
print(np.argmax(predictions, axis=1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Predictions (class labels):
[1 2]


### Add kaggle json
Refer chapter 5 under Gated Recurrent Units (GRUs) topics


In [None]:
!chmod 600 /content/kaggle.json
!kaggle datasets download -d fullmetal26/glovetwitter27b100dtxt

chmod: cannot access '/content/kaggle.json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/fullmetal26/glovetwitter27b100dtxt
License(s): CC0-1.0
Downloading glovetwitter27b100dtxt.zip to /content
100% 776M/778M [00:07<00:00, 136MB/s]
100% 778M/778M [00:07<00:00, 109MB/s]


In [None]:
!unzip /content/glovetwitter27b100dtxt.zip

Archive:  /content/glovetwitter27b100dtxt.zip
  inflating: glove.twitter.27B.200d.txt  


### Policy-Based Methods for Training Conversational Agents

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random

# Define a simple neural network for the policy
class PolicyNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(PolicyNetwork, self).__init__()
        self.fc = nn.Linear(input_dim, 128)
        self.output = nn.Linear(128, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc(x))
        return torch.softmax(self.output(x), dim=-1)

# Define the environment simulation
class SimpleChatEnv:
    def __init__(self):
        self.responses = ["Hello!", "How can I help you?", "Goodbye!", "What is your name?", "Tell me more."]
        self.reset()

    def reset(self):
        self.state = np.array([0.0])  # A simple state representation (you can use more complex features)
        return self.state

    def step(self, action):
        reward = self._get_reward(action)
        next_state = np.array([random.random()])  # Just a random next state for simplicity
        return next_state, reward

    def _get_reward(self, action):
        # A simple reward mechanism based on the action
        if action == 0:
            return 1  # Reward for saying "Hello!"
        elif action == 1:
            return 0  # Neutral reward
        elif action == 2:
            return -1  # Punish for saying "Goodbye!"
        else:
            return 0  # Neutral reward for other responses

# Define the agent using the REINFORCE algorithm
class PolicyAgent:
    def __init__(self, env, input_dim, output_dim, learning_rate=0.01):
        self.env = env
        self.model = PolicyNetwork(input_dim, output_dim)
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.gamma = 0.99  # Discount factor for future rewards

    def select_action(self, state):
        state_tensor = torch.tensor(state, dtype=torch.float32)
        probabilities = self.model(state_tensor)
        # Get the log probabilities before detaching:
        log_probs = torch.log(probabilities)
        # Sample action from the distribution
        distribution = torch.distributions.Categorical(probabilities)
        action = distribution.sample()
        # Get the log probability of the selected action and unsqueeze it:
        log_prob = distribution.log_prob(action).unsqueeze(0)  # Add this line

        return action.item(), log_prob  # Return log_prob as a 1-dimensional tensor


    def update_policy(self, rewards, log_probs):
        # Compute the discounted reward
        discounted_rewards = []
        cumulative_reward = 0
        for r in rewards[::-1]:
            cumulative_reward = r + self.gamma * cumulative_reward
            discounted_rewards.insert(0, cumulative_reward)

        discounted_rewards = torch.tensor(discounted_rewards, dtype=torch.float32)

        # Normalize the rewards (optional but helps with training)
        discounted_rewards = (discounted_rewards - discounted_rewards.mean()) / (discounted_rewards.std() + 1e-5)

        # Update the policy using the REINFORCE algorithm
        loss = -torch.sum(log_probs * discounted_rewards)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def train(self, episodes=100):
        for episode in range(episodes):
            state = self.env.reset()
            done = False
            rewards = []
            log_probs = []  # Store log_probs as a list of tensors
            while not done:
                action, log_prob = self.select_action(state)
                next_state, reward = self.env.step(action)
                rewards.append(reward)
                log_probs.append(log_prob) # Append the log_prob tensor directly
                state = next_state
                if len(rewards) >= 10:
                    done = True

            # Update the policy after the episode
            self.update_policy(rewards, torch.cat(log_probs)) # Concatenate log_probs tensors


# Create the environment and agent
env = SimpleChatEnv()
input_dim = 1  # Simple state representation
output_dim = len(env.responses)  # Number of possible responses

agent = PolicyAgent(env, input_dim, output_dim)

# Train the agent
agent.train(episodes=100)


Agent's response: Hello!


In [None]:
# Test the agent by selecting actions
test_state = env.reset()
action, _ = agent.select_action(test_state)
print(f"Agent's response: {env.responses[action]}")

Agent's response: Hello!
