In [61]:
# Montando o Google Drive para acesso aos arquivos
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [62]:
# Carregando bibliotecas essenciais
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, recall_score, classification_report


In [63]:
# Carregando o dataset
path_csv = "/content/drive/MyDrive/modulo11/semana3/dados_cliente.csv"
df = pd.read_csv(path_csv)


In [64]:
# Selecionando as colunas relevantes e removendo valores nulos
df = df[['Pergunta', 'Intencao']].dropna()


In [65]:
# Preprocessamento: tokenização e padding
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(df['Pergunta'])
sequences = tokenizer.texts_to_sequences(df['Pergunta'])
padded_sequences = pad_sequences(sequences, padding='post')


In [66]:
# Codificando os rótulos
label_encoder = LabelEncoder()
intencoes_encoded = label_encoder.fit_transform(df['Intencao'])


In [67]:
# Dividindo os dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, intencoes_encoded, test_size=0.2, random_state=42)


In [68]:
# Construção do modelo com uma camada de Pooling
model = Sequential([
    Embedding(input_dim=10000, output_dim=16, input_length=padded_sequences.shape[1]),
    GlobalAveragePooling1D(),  # Melhor para capturar a média das features
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])




In [69]:
# Avaliação e métricas
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

print(f'Recall: {recall}')
print(f'F1-Score: {f1}')
print('Classification Report:')
print(report)

loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Recall: 0.04950495049504951
F1-Score: 0.0178979436405179
Classification Report:
                                      precision    recall  f1-score   support

                      Acesso a conta       0.00      0.00      0.00         4
     Atualizacao de dados cadastrais       0.00      0.00      0.00         1
            Cadastro de beneficiario       0.00      0.00      0.00         3
                        Cancelamento       1.00      0.33      0.50         3
                      Como depositar       0.04      1.00      0.08         4
                  Como fazer remessa       0.00      0.00      0.00         2
                   Como se inscrever       0.00      0.00      0.00         9
         Confirmacao de cambio/taxas       0.00      0.00      0.00        21
              Envio via Deposit Code       0.00      0.00      0.00         1
Pedido de envio via metodo "ByPhone"       0.00      0.00      0.00

  >>> fbeta_score(y_true, y_pred, average='macro', beta=0.5)
  >>> fbeta_score(y_true, y_pred, average='macro', beta=0.5)
  >>> fbeta_score(y_true, y_pred, average='macro', beta=0.5)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0458 - loss: 2.8905  
Loss: 2.890580654144287
Accuracy: 0.049504950642585754
