In [None]:
# Importar bibliotecas necesarias
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Cargar el archivo .txt
from google.colab import files
uploaded = files.upload()

# Leer el archivo cargado
file_name = list(uploaded.keys())[0]
with open(file_name, 'r', encoding='utf-8') as file:
    text = file.read()

# Preprocesamiento del texto
tokenizer = Tokenizer(char_level=True)  # Trabajaremos a nivel de caracteres
tokenizer.fit_on_texts([text])
total_chars = len(tokenizer.word_index) + 1  # Número total de caracteres únicos

# Crear secuencias para entrenamiento
seq_length = 40  # Longitud de cada secuencia de entrada
step = 3  # Paso para crear las secuencias
input_sequences = []
output_chars = []

for i in range(0, len(text) - seq_length, step):
    input_sequences.append(text[i: i + seq_length])
    output_chars.append(text[i + seq_length])

# Convertir texto a secuencias numéricas
input_sequences = tokenizer.texts_to_sequences(input_sequences)
output_chars = tokenizer.texts_to_sequences(output_chars)

# Convertir a arrays y categorizar la salida
X = np.array(input_sequences)
y = to_categorical(np.array(output_chars) - 1, num_classes=total_chars)

# Construir el modelo RNN
model = Sequential([
    Embedding(total_chars, 50, input_length=seq_length),
    LSTM(128, return_sequences=False),
    Dense(total_chars, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy')

# Entrenar el modelo
model.fit(X, y, batch_size=64, epochs=20)

# Función para generar texto
def generate_text(seed_text, length):
    result = seed_text
    for _ in range(length):
        # Convertir texto a secuencia
        encoded = tokenizer.texts_to_sequences([seed_text])
        encoded = pad_sequences(encoded, maxlen=seq_length, truncating='pre')
        # Predecir el siguiente carácter
        predicted_index = np.argmax(model.predict(encoded), axis=-1)
        predicted_char = tokenizer.index_word[predicted_index[0] + 1]
        result += predicted_char
        seed_text = result[-seq_length:]
    return result

# Modo conversación
def chat_with_model():
    print("Inicia una conversación con el modelo. Escribe 'salir' para terminar.")
    while True:
        prompt = input("Tú: ")
        if prompt.lower() == 'salir':
            break
        response = generate_text(prompt[-seq_length:], 100)
        print(f"Modelo: {response}")

# Iniciar conversación
chat_with_model()


Saving intervencionesAbascal.txt to intervencionesAbascal.txt
Epoch 1/20




[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 88ms/step - loss: 3.2917
Epoch 2/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 85ms/step - loss: 2.7334
Epoch 3/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 87ms/step - loss: 2.4187
Epoch 4/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 71ms/step - loss: 2.3313
Epoch 5/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 75ms/step - loss: 2.2645
Epoch 6/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 88ms/step - loss: 2.1931
Epoch 7/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 86ms/step - loss: 2.1563
Epoch 8/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 71ms/step - loss: 2.1320
Epoch 9/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 74ms/step - loss: 2.0820
Epoch 10/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 85ms/s