In [12]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

In [13]:
questions = [
    "¿Cuál es la función del corazón?",
    "¿Qué órgano produce insulina?",
    "¿Dónde se encuentra el hígado?",
    "¿Cuántos huesos tiene el cuerpo humano?",
    "¿Qué es la médula espinal?"
]

answers = [
    "El corazón bombea sangre a todo el cuerpo.",
    "El páncreas produce insulina.",
    "El hígado se encuentra en la parte superior derecha del abdomen.",
    "El cuerpo humano tiene 206 huesos.",
    "La médula espinal es parte del sistema nervioso central."
]

In [14]:
# proceso de la Tokenización
tokenizer = keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(questions + answers)
vocab_size = len(tokenizer.word_index) + 1

In [15]:
# proceso de Convertir texto a secuencias
questions_seq = tokenizer.texts_to_sequences(questions)
answers_seq = tokenizer.texts_to_sequences(answers)


In [16]:
# Padding de las secuencias
max_length = max(max(len(seq) for seq in questions_seq), max(len(seq) for seq in answers_seq))
questions_padded = keras.preprocessing.sequence.pad_sequences(questions_seq, maxlen=max_length, padding='post')
answers_padded = keras.preprocessing.sequence.pad_sequences(answers_seq, maxlen=max_length, padding='post')

In [23]:


answers_input = answers_padded[:, :-1]
answers_output = answers_padded[:, 1:]


model = keras.Sequential([
    layers.Embedding(input_dim=vocab_size, output_dim=64),
    layers.LSTM(64, return_sequences=True),
    layers.LSTM(32, return_sequences=True),
    layers.Dense(vocab_size, activation='softmax')
])


model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(questions_padded, answers_output, epochs=40, batch_size=82)

0

Epoch 1/40


ValueError: Arguments `target` and `output` must have the same shape up until the last dimension: target.shape=(None, 10), output.shape=(None, 11, 37)

In [17]:

def predict_answer(question):
    seq = tokenizer.texts_to_sequences([question])
    padded = keras.preprocessing.sequence.pad_sequences(seq, maxlen=max_length, padding='post')
    pred = model.predict(padded)
    return tokenizer.sequences_to_texts(np.argmax(pred, axis=-1))
