In [None]:
'''
✅ Por que a função de perda (loss) é tão importante em NLP?
Porque ela mede o quanto o modelo está errando ao prever as sequências de palavras ou classes. Como estamos lidando com sequências, classes e tokens, usamos algumas perdas específicas para lidar com esse tipo de dado.

✅ Principais Loss Functions usadas em NLP
1. Categorical Crossentropy (Entropia Cruzada Categórica)
🔎 Uso: Classificação de múltiplas classes (Ex: classificação de sentimentos ou palavras)
'''

from tensorflow.keras.losses import CategoricalCrossentropy
loss = CategoricalCrossentropy()


In [None]:
'''
2. Sparse Categorical Crossentropy
🔎 Uso: Quando seus rótulos são inteiros (ex: o índice da palavra no vocabulário), o que é muito comum em NLP.
'''

from tensorflow.keras.losses import SparseCategoricalCrossentropy
loss = SparseCategoricalCrossentropy()

# ✅ É a mais usada em Seq2Seq, Tradução, Chatbots.

In [None]:
'''
3. Binary Crossentropy
🔎 Uso: Quando o problema for binário, como classificação binária de sentimentos (positivo/negativo).
'''

from tensorflow.keras.losses import BinaryCrossentropy
loss = BinaryCrossentropy()


In [None]:
'''
4. Connectionist Temporal Classification (CTC Loss)
🔎 Uso: Quando entrada e saída têm tamanhos diferentes e não alinhados, como:

Reconhecimento de voz
OCR
NLP com sequências de tamanho variável
'''

from tensorflow.keras.backend import ctc_batch_cost
# Usado direto na modelagem

# ✅ Complexa, mas poderosa para alguns modelos seq2seq.

In [None]:
'''
5. Masked Loss (Perda Mascarada)
🔎 Uso: Em Seq2Seq ou modelos que geram texto, onde tokens <PAD> são inseridos para completar o tamanho das sequências.

Você ignora os erros nos tokens de padding durante o cálculo da loss.

✅ Evita que o modelo aprenda a prever <PAD> só porque tem muitos deles.

Exemplo com TensorFlow:
'''

loss_object = SparseCategoricalCrossentropy(from_logits=True, reduction='none')

def masked_loss(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))  # 0 é o padding
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask  # Aplica a máscara
    return tf.reduce_mean(loss_)


In [None]:
# ✅ Exemplo de uso na prática:

model.compile(optimizer='adam',
      loss=SparseCategoricalCrossentropy(from_logits=True),
      metrics=['accuracy'])


In [None]:
# ✅ Exemplo Masked Loss de uso na prática:

model.compile(optimizer='adam',
      loss=masked_loss,
      metrics=['accuracy'])


In [6]:
!pip install tensorflow gensim



In [18]:
!pip install --upgrade numpy gensim numba tensorflow pandas

Collecting numpy
  Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.4
    Uninstalling numpy-2.2.4:
      Successfully uninstalled numpy-2.2.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.2.3 which is incompatible.
cudf-cu12 25.2.1 requires numba<0.61.0a0,>=0.59.1, but you have numba 0.61.0 which is incompatible.
cuml-cu12 25.2.1 requires numba<0.61.0a0,>=0.59.1, but you have numba 0.61.0 which is incompatible.
dask-cuda 25.2.0 requires numba<0.61.0a0,>=0.59.1, but you have numba 0.61.0 which is incompatible.
tensorflow-text 2.18.1 requ

In [18]:
import pandas as pd
import json

df = pd.read_csv('Conversation.csv')
df.drop(columns=['Unnamed: 0'], inplace=True)

conversations = [
    ("Olá!", "Oi! Como posso te ajudar?"),
    ("Qual seu nome?", "Sou um chatbot de teste."),
    ("O que você faz?", "Eu converso com você!"),
    ("Tchau", "Até mais!")
]

json_data = json.loads(df.to_json())


for idx in json_data['question']:
  conversations.append((json_data['question'][idx], json_data['answer'][idx]))

conversations.pop(5)
conversations.pop(4)

conversations = conversations[:50]
conversations

[('Olá!', 'Oi! Como posso te ajudar?'),
 ('Qual seu nome?', 'Sou um chatbot de teste.'),
 ('O que você faz?', 'Eu converso com você!'),
 ('Tchau', 'Até mais!'),
 ("i'm pretty good. thanks for asking.", 'no problem. so how have you been?'),
 ('no problem. so how have you been?', "i've been great. what about you?"),
 ("i've been great. what about you?",
  "i've been good. i'm in school right now."),
 ("i've been good. i'm in school right now.", 'what school do you go to?'),
 ('what school do you go to?', 'i go to pcc.'),
 ('i go to pcc.', 'do you like it there?'),
 ('do you like it there?', "it's okay. it's a really big campus."),
 ("it's okay. it's a really big campus.", 'good luck with school.'),
 ('good luck with school.', 'thank you very much.'),
 ("how's it going?", "i'm doing well. how about you?"),
 ("i'm doing well. how about you?", 'never better, thanks.'),
 ('never better, thanks.', 'so how have you been lately?'),
 ('so how have you been lately?', "i've actually been pretty go

In [19]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Separando perguntas e respostas
questions, answers = zip(*conversations)

# Convertendo a lista para uma tupla
special_tokens = ('<start>', '<end>')

# Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(questions + answers + special_tokens)

vocab_size = len(tokenizer.word_index) + 1

# Convertendo para sequência numérica
encoder_input_data = tokenizer.texts_to_sequences(questions)
decoder_input_data = tokenizer.texts_to_sequences(["<start> " + ans for ans in answers])
decoder_target_data = tokenizer.texts_to_sequences([ans + " <end>" for ans in answers])

# Padding
max_len = max([len(seq) for seq in encoder_input_data + decoder_input_data])
encoder_input_data = pad_sequences(encoder_input_data, maxlen=max_len, padding='post')
decoder_input_data = pad_sequences(decoder_input_data, maxlen=max_len, padding='post')
decoder_target_data = pad_sequences(decoder_target_data, maxlen=max_len, padding='post')


In [24]:
import tensorflow as tf
from tensorflow.keras.losses import SparseCategoricalCrossentropy

loss_object = SparseCategoricalCrossentropy(from_logits=True, reduction='none')

def masked_loss(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))  # 0 é o padding
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    mask = tf.squeeze(mask, axis=-1)  # Remove the last dimension
    loss_ *= mask  # Aplica a máscara
    return tf.reduce_mean(loss_)


In [25]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, GRU, Embedding, Dense

# Encoder
encoder_inputs = Input(shape=(None,))
enc_emb = Embedding(vocab_size, 50)(encoder_inputs)
# encoder_lstm = LSTM(100, return_state=True)
# _, state_h, state_c = encoder_lstm(enc_emb)

encoder_gru = GRU(100, return_state=True)
_, encoder_state = encoder_gru(enc_emb)

# Decoder
decoder_inputs = Input(shape=(None,))
dec_emb = Embedding(vocab_size, 50)(decoder_inputs)
# decoder_lstm = LSTM(100, return_sequences=True, return_state=True)
# decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=[state_h, state_c])

decoder_gru = GRU(100, return_sequences=True, return_state=True)
decoder_outputs, decoder_state = decoder_gru(dec_emb, initial_state=[encoder_state])

decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Modelo final
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compilar
model.compile(optimizer='adam', loss=masked_loss)
model.summary()

In [26]:
model.fit([encoder_input_data, decoder_input_data],
          np.expand_dims(decoder_target_data, -1),
          batch_size=2, epochs=500) # originalmente 500


Epoch 1/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - loss: 2.2936
Epoch 2/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 2.0415
Epoch 3/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 2.1933
Epoch 4/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 1.9108
Epoch 5/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 2.0449
Epoch 6/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 1.7856
Epoch 7/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 1.8672
Epoch 8/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 1.9429
Epoch 9/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 1.5417
Epoch 10/500
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - lo

<keras.src.callbacks.history.History at 0x7acc91a36250>

In [27]:
# GRU

encoder_model = Model(encoder_inputs, encoder_state)

decoder_state_input = Input(shape=(100,))
dec_emb2 = Embedding(vocab_size, 50)(decoder_inputs)
decoder_outputs2, decoder_state2 = decoder_gru(dec_emb2, initial_state=[decoder_state_input])
decoder_outputs2 = decoder_dense(decoder_outputs2)

decoder_model = Model([decoder_inputs, decoder_state_input], [decoder_outputs2, decoder_state2])


In [28]:
def chat_response(input_text):
    # Prepara a entrada
    input_seq = tokenizer.texts_to_sequences([input_text])
    input_seq = pad_sequences(input_seq, maxlen=max_len, padding='post')

    # Codifica
    states = encoder_model.predict(input_seq)

    # Começa o decoder
    target_seq = np.zeros((1, 1))
    stop_condition = False
    response = ""

    while not stop_condition:
        output_tokens, state = decoder_model.predict([target_seq, states]) # GRU
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = tokenizer.index_word.get(sampled_token_index, '')

        if (sampled_word == '<end>' or len(response.split()) > max_len):
            stop_condition = True
        else:
            response += ' ' + sampled_word

        # Atualiza o target_seq e o estado
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index
        states = state # GRU

    return response.strip()


In [29]:
while True:
    user_input = input("Você: ")
    if user_input.lower() == 'sair':
        break
    resposta = chat_response(user_input)
    print("Bot:", resposta)


Você: hi
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 225ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

KeyboardInterrupt: Interrupted by user

In [30]:
model.save('chatbot-GRU-loss-v0.h5')

