In [None]:
'''
✅ O que é um Seq2Seq (Sequence to Sequence)?

Um modelo Seq2Seq recebe uma sequência de entrada e gera uma sequência de saída. Exemplo clássico:

Input: "Eu gosto de café"
Output: "I like coffee"

📌 Arquitetura Básica:
Encoder: Processa a sequência de entrada e gera um vetor de contexto (uma "memória" do input).
Decoder: Lê esse vetor e gera a saída palavra por palavra.

👉 Usado em: tradução automática, chatbots, sumarização de texto, legendagem de vídeo.

✅ Problema da arquitetura clássica
O vetor de contexto gerado pelo encoder tem tamanho fixo.
Se a sequência for muito longa, o decoder perde informações.

⭐ Attention Mechanism - A Solução
O Attention foi criado para resolver esse gargalo.

📌 Como funciona?
O decoder não depende mais só do vetor fixo;
Ele "olha" para cada palavra da entrada e decide o que é mais importante a cada passo;
Calcula pesos (α) para cada palavra da entrada.

💡 Exemplo: Na tradução de "O menino comeu a maçã", ao gerar "apple", o Attention vai focar mais em "maçã" do que no restante.
'''


'''
✅ Fórmula Simplificada do Attention:
`Attention(Q, K, V) = Softmax(Q * K.T / sqrt(d_k)) * V`

Onde:
Q (Query): Vetor da palavra atual que o decoder está gerando
K (Key): Vetores da entrada
V (Value): Valores associados às palavras da entrada
d_k: Dimensão de K
'''

In [None]:
!pip install tensorflow gensim requests

Collecting gensim
  Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Collecting numpy<2.1.0,>=1.26.0 (from tensorflow)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting scipy<1.14.0,>=1.7.0 (from gensim)
  Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.7/26.7 MB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━

In [None]:
!pip install --upgrade numpy gensim numba tensorflow

Collecting numpy
  Using cached numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)


In [None]:
import requests

x = requests.get('https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pt.300.vec.gz')
open('cc.pt.300.vec.gz', 'wb').write(x.content)

1271093660

In [None]:
from gensim.models import KeyedVectors

fasttext_model = KeyedVectors.load_word2vec_format('cc.pt.300.vec.gz')

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences

# Seu dataset
texts = ["Eu amo esse filme", "Esse filme é péssimo", "Que filme maravilhoso", "Horrível, não gostei"]
labels = [1, 0, 1, 0]

tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1

embedding_dim = 300
embedding_matrix = np.zeros((vocab_size, embedding_dim))

for word, i in word_index.items():
  if word in fasttext_model:
    embedding_matrix[i] = fasttext_model[word]
  else:
    embedding_matrix = np.random.normal(scale=0.6, size=(embedding_dim,))   # Caso não encontre

In [None]:
# ✅ Exemplo em Keras (Seq2Seq com Attention simples)

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Attention, LSTM

# Encoder
encoder_inputs = Input(shape=(None, vocab_size))
encoder_lstm = LSTM(256, return_sequences=True, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)

# Decoder
decoder_inputs = Input(shape=(None, vocab_size))
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=[state_h, state_c])

# Attention Layer
attention = Attention()
context_vector = attention([decoder_outputs, encoder_outputs])

# Concatenando o contexto com a saída do decoder
decoder_concat_input = Dense(256, activation="tanh")(context_vector)

# Camada de saída
decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_concat_input)

# Modelo final
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy')
model.summary()

In [None]:
import pandas as pd

df = pd.read_csv('Conversation.csv')

df.head()

Unnamed: 0.1,Unnamed: 0,question,answer
0,0,"hi, how are you doing?",i'm fine. how about yourself?
1,1,i'm fine. how about yourself?,i'm pretty good. thanks for asking.
2,2,i'm pretty good. thanks for asking.,no problem. so how have you been?
3,3,no problem. so how have you been?,i've been great. what about you?
4,4,i've been great. what about you?,i've been good. i'm in school right now.


In [None]:
df.drop(columns=['Unnamed: 0'], inplace=True)

In [None]:
print(df.to_json())

{"question":{"0":"hi, how are you doing?","1":"i'm fine. how about yourself?","2":"i'm pretty good. thanks for asking.","3":"no problem. so how have you been?","4":"i've been great. what about you?","5":"i've been good. i'm in school right now.","6":"what school do you go to?","7":"i go to pcc.","8":"do you like it there?","9":"it's okay. it's a really big campus.","10":"good luck with school.","11":"how's it going?","12":"i'm doing well. how about you?","13":"never better, thanks.","14":"so how have you been lately?","15":"i've actually been pretty good. you?","16":"i'm actually in school right now.","17":"which school do you attend?","18":"i'm attending pcc right now.","19":"are you enjoying it there?","20":"it's not bad. there are a lot of people there.","21":"good luck with that.","22":"how are you doing today?","23":"i'm doing great. what about you?","24":"i'm absolutely lovely, thank you.","25":"everything's been good with you?","26":"i haven't been better. how about yourself?","

In [None]:
conversations = [
    ("Olá!", "Oi! Como posso te ajudar?"),
    ("Qual seu nome?", "Sou um chatbot de teste."),
    ("O que você faz?", "Eu converso com você!"),
    ("Tchau", "Até mais!")
]

In [None]:
import json

json_data = json.loads(df.to_json())
json_data

{'question': {'0': 'hi, how are you doing?',
  '1': "i'm fine. how about yourself?",
  '2': "i'm pretty good. thanks for asking.",
  '3': 'no problem. so how have you been?',
  '4': "i've been great. what about you?",
  '5': "i've been good. i'm in school right now.",
  '6': 'what school do you go to?',
  '7': 'i go to pcc.',
  '8': 'do you like it there?',
  '9': "it's okay. it's a really big campus.",
  '10': 'good luck with school.',
  '11': "how's it going?",
  '12': "i'm doing well. how about you?",
  '13': 'never better, thanks.',
  '14': 'so how have you been lately?',
  '15': "i've actually been pretty good. you?",
  '16': "i'm actually in school right now.",
  '17': 'which school do you attend?',
  '18': "i'm attending pcc right now.",
  '19': 'are you enjoying it there?',
  '20': "it's not bad. there are a lot of people there.",
  '21': 'good luck with that.',
  '22': 'how are you doing today?',
  '23': "i'm doing great. what about you?",
  '24': "i'm absolutely lovely, thank

In [None]:
for idx in json_data['question']:
  conversations.append((json_data['question'][idx], json_data['answer'][idx]))
conversations

[('Olá!', 'Oi! Como posso te ajudar?'),
 ('Qual seu nome?', 'Sou um chatbot de teste.'),
 ('O que você faz?', 'Eu converso com você!'),
 ('Tchau', 'Até mais!'),
 'question',
 'answer',
 ('hi, how are you doing?', "i'm fine. how about yourself?"),
 ("i'm fine. how about yourself?", "i'm pretty good. thanks for asking."),
 ("i'm pretty good. thanks for asking.", 'no problem. so how have you been?'),
 ('no problem. so how have you been?', "i've been great. what about you?"),
 ("i've been great. what about you?",
  "i've been good. i'm in school right now."),
 ("i've been good. i'm in school right now.", 'what school do you go to?'),
 ('what school do you go to?', 'i go to pcc.'),
 ('i go to pcc.', 'do you like it there?'),
 ('do you like it there?', "it's okay. it's a really big campus."),
 ("it's okay. it's a really big campus.", 'good luck with school.'),
 ('good luck with school.', 'thank you very much.'),
 ("how's it going?", "i'm doing well. how about you?"),
 ("i'm doing well. how a

In [None]:
conversations.pop(5)
conversations.pop(4)
conversations

[('Olá!', 'Oi! Como posso te ajudar?'),
 ('Qual seu nome?', 'Sou um chatbot de teste.'),
 ('O que você faz?', 'Eu converso com você!'),
 ('Tchau', 'Até mais!'),
 ("i'm fine. how about yourself?", "i'm pretty good. thanks for asking."),
 ("i'm pretty good. thanks for asking.", 'no problem. so how have you been?'),
 ('no problem. so how have you been?', "i've been great. what about you?"),
 ("i've been great. what about you?",
  "i've been good. i'm in school right now."),
 ("i've been good. i'm in school right now.", 'what school do you go to?'),
 ('what school do you go to?', 'i go to pcc.'),
 ('i go to pcc.', 'do you like it there?'),
 ('do you like it there?', "it's okay. it's a really big campus."),
 ("it's okay. it's a really big campus.", 'good luck with school.'),
 ('good luck with school.', 'thank you very much.'),
 ("how's it going?", "i'm doing well. how about you?"),
 ("i'm doing well. how about you?", 'never better, thanks.'),
 ('never better, thanks.', 'so how have you been

In [None]:
conversations = conversations[:50]
conversations

[('Olá!', 'Oi! Como posso te ajudar?'),
 ('Qual seu nome?', 'Sou um chatbot de teste.'),
 ('O que você faz?', 'Eu converso com você!'),
 ('Tchau', 'Até mais!'),
 ("i'm fine. how about yourself?", "i'm pretty good. thanks for asking."),
 ("i'm pretty good. thanks for asking.", 'no problem. so how have you been?'),
 ('no problem. so how have you been?', "i've been great. what about you?"),
 ("i've been great. what about you?",
  "i've been good. i'm in school right now."),
 ("i've been good. i'm in school right now.", 'what school do you go to?'),
 ('what school do you go to?', 'i go to pcc.'),
 ('i go to pcc.', 'do you like it there?'),
 ('do you like it there?', "it's okay. it's a really big campus."),
 ("it's okay. it's a really big campus.", 'good luck with school.'),
 ('good luck with school.', 'thank you very much.'),
 ("how's it going?", "i'm doing well. how about you?"),
 ("i'm doing well. how about you?", 'never better, thanks.'),
 ('never better, thanks.', 'so how have you been

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Separando perguntas e respostas
questions, answers = zip(*conversations)

# Convertendo a lista para uma tupla
special_tokens = ('<start>', '<end>')

# Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(questions + answers + special_tokens)

vocab_size = len(tokenizer.word_index) + 1

# Convertendo para sequência numérica
encoder_input_data = tokenizer.texts_to_sequences(questions)
decoder_input_data = tokenizer.texts_to_sequences(["<start> " + ans for ans in answers])
decoder_target_data = tokenizer.texts_to_sequences([ans + " <end>" for ans in answers])

# Padding
max_len = max([len(seq) for seq in encoder_input_data + decoder_input_data])
encoder_input_data = pad_sequences(encoder_input_data, maxlen=max_len, padding='post')
decoder_input_data = pad_sequences(decoder_input_data, maxlen=max_len, padding='post')
decoder_target_data = pad_sequences(decoder_target_data, maxlen=max_len, padding='post')


In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, GRU, Embedding, Dense

# Encoder
encoder_inputs = Input(shape=(None,))
enc_emb = Embedding(vocab_size, 50)(encoder_inputs)
# encoder_lstm = LSTM(100, return_state=True)
# _, state_h, state_c = encoder_lstm(enc_emb)

encoder_gru = GRU(100, return_state=True)
_, encoder_state = encoder_gru(enc_emb)

# Decoder
decoder_inputs = Input(shape=(None,))
dec_emb = Embedding(vocab_size, 50)(decoder_inputs)
# decoder_lstm = LSTM(100, return_sequences=True, return_state=True)
# decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=[state_h, state_c])

decoder_gru = GRU(100, return_sequences=True, return_state=True)
decoder_outputs, decoder_state = decoder_gru(dec_emb, initial_state=[encoder_state])

decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Modelo final
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compilar
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.summary()

In [None]:
model.fit([encoder_input_data, decoder_input_data],
          np.expand_dims(decoder_target_data, -1),
          batch_size=2, epochs=500) # originalmente 500


Epoch 1/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - loss: 4.7959
Epoch 2/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 2.4286
Epoch 3/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 2.3194
Epoch 4/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 2.3606
Epoch 5/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 2.0142
Epoch 6/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 1.9301
Epoch 7/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 2.0520
Epoch 8/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 1.9165
Epoch 9/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 1.9471
Epoch 10/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - lo

<keras.src.callbacks.history.History at 0x77ff8bd0dd90>

In [None]:
# LSTM

# Encoder para gerar o contexto
encoder_model = Model(encoder_inputs, [state_h, state_c])

# Decoder com estado inicial
decoder_state_input_h = Input(shape=(100,))
decoder_state_input_c = Input(shape=(100,))

dec_emb2 = Embedding(vocab_size, 50)(decoder_inputs)
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c])
decoder_outputs2 = decoder_dense(decoder_outputs2)

decoder_model = Model([decoder_inputs, decoder_state_input_h, decoder_state_input_c], [decoder_outputs2, state_h2, state_c2])


In [None]:
# GRU

encoder_model = Model(encoder_inputs, encoder_state)

decoder_state_input = Input(shape=(100,))
dec_emb2 = Embedding(vocab_size, 50)(decoder_inputs)
decoder_outputs2, decoder_state2 = decoder_gru(dec_emb2, initial_state=[decoder_state_input])
decoder_outputs2 = decoder_dense(decoder_outputs2)

decoder_model = Model([decoder_inputs, decoder_state_input], [decoder_outputs2, decoder_state2])


In [None]:
def chat_response(input_text):
    # Prepara a entrada
    input_seq = tokenizer.texts_to_sequences([input_text])
    input_seq = pad_sequences(input_seq, maxlen=max_len, padding='post')

    # Codifica
    # state_h, state_c = encoder_model.predict(input_seq)
    states = encoder_model.predict(input_seq)
    # state_h = states
    # state_c = states

    # Começa o decoder
    target_seq = np.zeros((1, 1))
    # target_seq[0, 0] = tokenizer.word_index['<start>']
    stop_condition = False
    response = ""

    while not stop_condition:
        # output_tokens, h, c = decoder_model.predict([target_seq, state_h, state_c])
        output_tokens, state = decoder_model.predict([target_seq, states]) # GRU
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = tokenizer.index_word.get(sampled_token_index, '')

        if (sampled_word == '<end>' or len(response.split()) > max_len):
            stop_condition = True
        else:
            response += ' ' + sampled_word

        # Atualiza o target_seq e o estado
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index
        # state_h, state_c = h, c
        states = state # GRU

    return response.strip()


In [None]:
while True:
    user_input = input("Você: ")
    if user_input.lower() == 'sair':
        break
    resposta = chat_response(user_input)
    print("Bot:", resposta)


Você: hi
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

KeyboardInterrupt: Interrupted by user

In [None]:
model.save('chatbot-GRU-v0.h5')

