In [1]:
# 19/07)2020
# Francisco Dominguez

In [1]:
import random
import os
import json
import pickle
import numpy as np
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()
#from nltk.stem import SnowballStemmer
#stemmer = SnowballStemmer('spanish')

In [2]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
keras.__version__

'2.2.4-tf'

In [25]:
class NLPModel(object):
    def __init__(self):
        self.chatBot=None
    def setChatBot(self,cb):
        self.chatBot=cb
class NLPModelBoW(NLPModel):
    def __init__(self):
        # Bag of Words data
        self.words=[]
        self.classes=[]
        self.documents=[]
        self.ignore_words=['?']
        # MLP data
        self.ann=None
        self.train_x = []
        self.train_y = []
    # return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
    def clean_up_sentence(self,sentence):
        # tokenize the pattern
        sentence_words = nltk.word_tokenize(sentence)
        # stem each word
        sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
        return sentence_words
    def bow(self,sentence, show_details=False):
        # tokenize the pattern
        sentence_words = self.clean_up_sentence(sentence)
        # bag of words
        bag = []
        # create our bag of words array
        for w in self.words:
            bag.append(1) if w in sentence_words else bag.append(0)
        return(np.array(bag))
    def buildData(self):
        self.buildBowData()
        self.buildTrainingData()
    def buildBowData(self):
        self.words=[]
        self.classes=[]
        self.documents=[]
        for intent in self.chatBot.intents:
            for pattern in intent.patterns:
                # tokenize each word in the sentence
                w = nltk.word_tokenize(pattern)
                # add to our words list
                self.words.extend(w)
                # add to documents in our corpus
                self.documents.append((pattern, intent.name))
                # add to our classes list
                if intent.name not in self.classes:
                    self.classes.append(intent.name)
        # stem and lower each word and remove duplicates
        self.words = [stemmer.stem(w.lower()) for w in self.words if w not in self.ignore_words]
        self.words = sorted(list(set(self.words)))

        # remove duplicates
        self.classes = sorted(list(set(self.classes)))

        print (len(self.documents), "documents")
        print (len(self.classes), "classes", self.classes)
        print (len(self.words), "unique stemmed words", self.words)
    def buildTrainingData(self):
        # create our training data
        training = []
        output = []
        # create an empty array for our output
        output_empty = [0] * len(self.classes)

        # training set, bag of words for each sentence
        x=[]
        y=[]
        for doc in self.documents:
            # initialize our bag of words
            bag = self.bow(doc[0])
            x.append(bag)
            # output is a '0' for each tag and '1' for current tag
            output_row = list(output_empty)
            output_row[self.classes.index(doc[1])] = 1
            y.append(output_row)
        # shuffle our features and turn into np.array
        random.shuffle(training)
        training = np.array(training)
        print(training.shape)

        # create train data
        self.train_x = np.array(x)
        self.train_y = np.array(y)
    def train(self):
        self.ann = Sequential()
        self.ann.add(Dense(25, input_dim=self.train_x.shape[1]))     # densidad de la primera capa de neurona y tipo de entrada
        self.ann.add(Dropout(0.5))                                   # convierte a 0 la mitad de 1 en el entrenamiento
        self.ann.add(Dense(25))                                      # densidad de la primera capa de neurona
        self.ann.add(Dropout(0.5))                                   # convierte a 0 la mitad de 1 en el entrenamiento
        self.ann.add(Dense(self.train_y.shape[1], activation='softmax'))  # densidad de la salida
        self.ann.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        self.ann.build()
        self.ann.summary()
        #self.ann.fit(self.train_x, self.train_y, nb_epoch=500, batch_size=8)   # entrena el modelo
    def save(self):
        # save all of our data structures
        data={}
        data['words']    =self.words 
        data['classes']  =self.classes 
        data['documents']=self.documents 
        data['train_x']  =self.train_x 
        data['train_y']  =self.train_y
        pickle.dump( data, open( os.path.join('./',self.chatBot.name+".pk"), "wb" ) )
        model.save(os.path.join('./',self.chatBot.name+'.h5'))    # guarda el modelo
    def load(self):
        data = pickle.load( open( os.path.join('./',self.chatBot.name+".pk"), "rb" ) )
        self.words     = data['words']
        self.classes   = data['classes']
        self.documents = data['documents']
        self.train_x   = np.array(data['train_x'])
        self.train_y   = np.array(data['train_y'])
        self.ann = load_model(os.path.join('./',self.chatBot.name+'.h5'))

In [26]:
class Intent(object):
    def __init__(self):
        self.name=""
        self.patterns=[]
        self.responses=[]
        self.action=None
    def fromJsonData(self,intent):
        self.patterns =[]
        self.responses=[]
        self.name=intent['tag']
        for pattern in intent['patterns']:
            self.patterns.append(pattern)
        for response in intent['responses']:
            self.responses.append(response)
        #self.action=intent['action']

In [27]:
class ChatBot(object):
    def __init__(self,name):
        self.name=name
        self.intents=[]
        self.model=NLPModelBoW()
        self.model.setChatBot(self)
    def loadJson(self):
        with open(os.path.join('./',self.name+'.json')) as json_data:
            intents = json.load(json_data)
        # loop through each sentence in our intents patterns
        for intent in intents['intents']:
            iobj=Intent()
            iobj.fromJsonData(intent)
            self.intents.append(iobj)
        self.model.buildData()

In [28]:
cb=ChatBot('intents')
cb.loadJson()

27 documents
9 classes ['goodbye', 'greeting', 'hours', 'mopeds', 'opentoday', 'payments', 'rental', 'thanks', 'today']
48 unique stemmed words ["'d", "'s", 'a', 'acceiv', 'anyon', 'ar', 'bye', 'can', 'card', 'cash', 'credit', 'day', 'do', 'doe', 'good', 'goodby', 'hav', 'hello', 'help', 'hi', 'hour', 'how', 'i', 'is', 'kind', 'lat', 'lik', 'mastercard', 'mop', 'of', 'on', 'op', 'rent', 'see', 'tak', 'thank', 'that', 'ther', 'thi', 'to', 'today', 'we', 'what', 'when', 'which', 'work', 'yo', 'you']
(0,)


In [29]:
cb.intents[0].patterns

['Hi', 'How are you', 'Is anyone there?', 'Hello', 'Good day']

In [30]:
test=cb.model.bow('Which mopeds do you have for sales?')
print(test.shape)
print(test)

(48,)
[0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 1]


In [31]:
cb.model.train_x

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [32]:
cb.model.train_y.shape

(27, 9)

In [None]:
cb.model.train()

In [None]:
model = Sequential()
model.add(Dense(25, input_dim=27))          # densidad de la primera capa de neurona y tipo de entrada
model.add(Dropout(0.5))                                        # convierte a 0 la mitad de 1 en el entrenamiento
model.add(Dense(25))                                           # densidad de la primera capa de neurona
model.add(Dropout(0.5))                                        # convierte a 0 la mitad de 1 en el entrenamiento
model.add(Dense(9, activation='softmax'))  # densidad de la salida
model.summary()