Engenharia de Software - PUC RJ
Notebook MVP da Sprint 4.

Este notebook realiza o treinamento de Chabot desenvolvido para a implementação de FAQs via web.

In [21]:
# Imports
import json
import pickle

# Bibliotecas de NLP
import nltk
nltk.download('rslp')
nltk.download('punkt')
stemmer = nltk.stem.RSLPStemmer()

# Tensorflow
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD
import random

[nltk_data] Downloading package rslp to /root/nltk_data...
[nltk_data]   Package rslp is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [22]:
# Inicialização das variáveis
words = []
classes = []
documents = []
ignore_words = ['?', '!', ',', ';']

In [23]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
# Importação dos Intents
#
# Nota >> O arquivo com as intents deve estar disponível no google drive na pasta chatbot!!!
#
data_file = open('/content/drive/MyDrive/chatbot/intents.json').read()
intents = json.loads(data_file)


for intent in intents['intents']:
    for pattern in intent['patterns']:
        # Tokenize cada palavra da sentença
        w = nltk.word_tokenize(pattern)
        # aciciona as palavras em um lista.
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, intent['tag']))
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# stem and lower cada palavra
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# remove os duplicados
classes = sorted(list(set(classes)))

print(f"quantidade de documents >>  {len(documents)}")
print(f"quantidade de classes >>  {len(classes)}")
print(f"quantidade unique stemmed words >> {len(words)}")

quantidade de documents >>  65
quantidade de classes >>  13
quantidade unique stemmed words >> 95


In [25]:
# Gera o arquivo Words
file = open('words.pkl','wb')
pickle.dump(words,file)
file.close()

# Gera o arquivo Classes
file = open('classes.pkl','wb')
pickle.dump(classes,file)
file.close()

In [26]:
# cria training data
training = []
output = []

# cria um array vazio para output
output_empty = [0] * len(classes)

# training set, bag of words for casa sentença
for doc in documents:
    bag = []
    pattern_words = doc[0]
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    training.append([bag, output_row])

# shuffle em np.array
random.shuffle(training)
training = np.array(training, dtype="object")

# cria o lista de treinamento e teste
train_x = list(training[:,0])
train_y = list(training[:,1])

In [27]:
# Cria o Modelo
model = Sequential()
model.add(Dense(512, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Copila o modelo
sgd = SGD(lr=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# fitting  e salva o modelo
hist = model.fit(np.array(train_x), np.array(train_y), epochs=1000, batch_size=1000, verbose=1)
model.save('chatbot_model.h5', hist)
print("Modelo Criado com sucesso.")



Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

  saving_api.save_model(


# New Section

In [28]:
# Salva o modelo no google drive
!cp chatbot_model.h5 /content/drive/MyDrive/chatbot