In [1]:
# 19/07)2020
# Francisco Dominguez

In [1]:
import random
import os
import json
import pickle
import numpy as np
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()
#from nltk.stem import SnowballStemmer
#stemmer = SnowballStemmer('spanish')

In [2]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
keras.__version__

'2.2.4-tf'

In [64]:
class NLPModel(object):
    def __init__(self):
        self.chatBot=None
    def setChatBot(self,cb):
        self.chatBot=cb
class NLPModelKeyWords(NLPModel):
    pass
class NLPANN(object):
    pass
class NLPANNkeras(NLPANN):
    def __init__(self,nlpModel):
        self.nlpModel=nlpModel
        self.ann=None
    def train(self,train_x,train_y):
        self.ann = Sequential()
        self.ann.add(Dense(25, input_dim=train_x.shape[1]))     # densidad de la primera capa de neurona y tipo de entrada
        self.ann.add(Dropout(0.5))                                   # convierte a 0 la mitad de 1 en el entrenamiento
        self.ann.add(Dense(25))                                      # densidad de la primera capa de neurona
        self.ann.add(Dropout(0.5))                                   # convierte a 0 la mitad de 1 en el entrenamiento
        self.ann.add(Dense(train_y.shape[1], activation='softmax'))  # densidad de la salida
        self.ann.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        #self.ann.build()
        self.ann.summary()
        self.ann.fit(train_x, train_y, epochs=500, batch_size=8)   # entrena el modelo
    def save(self,filename):
        self.ann.save(filename)
    def load(self,filename):
        self.ann = load_model(filename)
    def predict(self,sentenceBow):
        p=self.ann.predict(np.reshape(sentenceBow,(1,-1)))
        return p
class NLPModelBoW(NLPModel):
    def __init__(self):
        # Bag of Words data
        self.words=[]
        self.classes=[]
        self.documents=[]
        self.ignore_words=['?']
        # MLP data (this could be in a different object)
        self.ann=NLPANNkeras(self)
        self.train_x = []
        self.train_y = []
    # return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
    def clean_up_sentence(self,sentence):
        # tokenize the pattern
        sentence_words = nltk.word_tokenize(sentence)
        # stem each word
        sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
        return sentence_words
    def bow(self,sentence, show_details=False):
        # tokenize the pattern
        sentence_words = self.clean_up_sentence(sentence)
        # bag of words
        bag = []
        # create our bag of words array
        for w in self.words:
            bag.append(1) if w in sentence_words else bag.append(0)
        return(np.array(bag))
    def buildData(self):
        self.buildBowData()
        self.buildTrainingData()
    def buildBowData(self):
        self.words=[]
        self.classes=[]
        self.documents=[]
        for intent in self.chatBot.intents:
            for pattern in intent.patterns:
                # tokenize each word in the sentence
                w = nltk.word_tokenize(pattern)
                # add to our words list
                self.words.extend(w)
                # add to documents in our corpus
                self.documents.append((pattern, intent.name))
                # add to our classes list
                if intent.name not in self.classes:
                    self.classes.append(intent.name)
        # stem and lower each word and remove duplicates
        self.words = [stemmer.stem(w.lower()) for w in self.words if w not in self.ignore_words]
        self.words = sorted(list(set(self.words)))

        # remove duplicates
        self.classes = sorted(list(set(self.classes)))

        print (len(self.documents), "documents")
        print (len(self.classes), "classes", self.classes)
        print (len(self.words), "unique stemmed words", self.words)
    def buildTrainingData(self):
        # create our training data
        training = []
        output = []
        # create an empty array for our output
        output_empty = [0] * len(self.classes)

        # training set, bag of words for each sentence
        x=[]
        y=[]
        for doc in self.documents:
            # initialize our bag of words
            bag = self.bow(doc[0])
            x.append(bag)
            # output is a '0' for each tag and '1' for current tag
            output_row = list(output_empty)
            output_row[self.classes.index(doc[1])] = 1
            y.append(output_row)
        # shuffle our features and turn into np.array
        random.shuffle(training)
        training = np.array(training)
        print(training.shape)

        # create train data
        self.train_x = np.array(x)
        self.train_y = np.array(y)
    def train(self):
        self.ann.train(self.train_x,self.train_y)
    def save(self):
        # save all of our data structures
        data={}
        data['words']    =self.words 
        data['classes']  =self.classes 
        data['documents']=self.documents 
        data['train_x']  =self.train_x 
        data['train_y']  =self.train_y
        pickle.dump( data, open( os.path.join('./',self.chatBot.name+".pk"), "wb" ) )
        self.ann.save(os.path.join('./',self.chatBot.name+'.h5'))    # guarda el modelo
    def load(self):
        data = pickle.load( open( os.path.join('./',self.chatBot.name+".pk"), "rb" ) )
        self.words     = data['words']
        self.classes   = data['classes']
        self.documents = data['documents']
        self.train_x   = np.array(data['train_x'])
        self.train_y   = np.array(data['train_y'])
        self.ann.load(os.path.join('./',self.chatBot.name+'.h5'))
    def predictClass(self,sentence):
        sentenceBow=self.bow(sentence)
        p=self.ann.predict(sentenceBow)
        idClass=np.argmax(p)
        pClass=np.max(p)
        className=self.classes[idClass]
        return className,idClass,pClass

In [46]:
class Intent(object):
    def __init__(self):
        self.name=""
        self.patterns=[]
        self.responses=[]
        self.action=None
    def fromJsonData(self,intent):
        self.patterns =[]
        self.responses=[]
        self.name=intent['tag']
        for pattern in intent['patterns']:
            self.patterns.append(pattern)
        for response in intent['responses']:
            self.responses.append(response)
        #self.action=intent['action']
    def toJsonData(self):
        dic={}
        dic["name"]=self.name
        dic["patterns"]=self.patterns
        dic["responses"]=self.responses
        dic["action"]=self.action
        return dic

In [61]:
class ChatBot(object):
    def __init__(self,name):
        self.name=name
        self.intents=[]
        self.model=NLPModelBoW()
        self.model.setChatBot(self)
        #TODO: refactor this
        self.loadJson()
        self.model.load()
    def loadJson(self):
        self.intents=[]
        fileName=os.path.join('./',self.name+'.json')
        with open(fileName) as json_data:
            intents = json.load(json_data)
        # loop through each sentence in our intents patterns
        for intent in intents['intents']:
            iobj=Intent()
            iobj.fromJsonData(intent)
            self.intents.append(iobj)
        self.model.buildData()
    def saveJson(self):
        dic={}
        dic["name"]=self.name
        dic["intents"]=[i.toJsonData() for i in self.intents]
        json_string=json.dumps(dic,indent=4)
        fileName=os.path.join('./',self.name+'0.json')
        with open(fileName, 'w') as json_file:
            json_file.write(json_string)
    def chooseRandom(self,responses):
        sizeResponses=len(responses)
        chooseIdResponse=random.randint(0,sizeResponses-1)
        return responses[chooseIdResponse]
    def chooseResponse(self,predictedIntent):
        for intent in self.intents:
            if intent.name==predictedIntent:
                return self.chooseRandom(intent.responses)
    def chat(self,sentence):
        cn,idc,pc=self.model.predictClass(sentence)
        print(idc,cn,pc)
        if pc<0.55:
            return self.chooseRandom(["I don't understand your sentence.",
                                 "What do you mean?",
                                 "Could you please repeat with other words?"]),"do not understand"
        return self.chooseResponse(cn),cn

In [31]:
class ChatBotEngine(object):
    def __init__(self):
        self.currentChatBot=ChatBot('intents')
        self.currentIntent="None"
    def getInput(self):
        return input('Ready: ')
    def setOutput(self,response):
        print(response)
    def run(self):
        while self.currentIntent!="goodbye":
            sentence=self.getInput()
            response,intent=self.currentChatBot.chat(sentence)
            self.currentIntent=intent
            self.setOutput(response)
        self.currentIntent="None"

In [7]:
# These are analogous to winget for GUI
class Chatget(object):
    pass
# This is analogous to desktop GUI
# ChatExplorer to manipulate folders and copy/move chatbots
class ChatDesktop(ChatBot):
    pass
# A chatbot to build or modify other chatbots
class MetaChatBot(ChatBot):
    pass
# Detect prediction error an improve the model of chatbots
class ChatLearner(ChatBot):
    pass

In [62]:
cb=ChatBot('intents')

27 documents
9 classes ['goodbye', 'greeting', 'hours', 'mopeds', 'opentoday', 'payments', 'rental', 'thanks', 'today']
48 unique stemmed words ["'d", "'s", 'a', 'acceiv', 'anyon', 'ar', 'bye', 'can', 'card', 'cash', 'credit', 'day', 'do', 'doe', 'good', 'goodby', 'hav', 'hello', 'help', 'hi', 'hour', 'how', 'i', 'is', 'kind', 'lat', 'lik', 'mastercard', 'mop', 'of', 'on', 'op', 'rent', 'see', 'tak', 'thank', 'that', 'ther', 'thi', 'to', 'today', 'we', 'what', 'when', 'which', 'work', 'yo', 'you']
(0,)


In [63]:
cb.saveJson()

In [9]:
cb.intents[0].patterns

['Hi', 'How are you', 'Is anyone there?', 'Hello', 'Good day']

In [10]:
test=cb.model.bow('Which mopeds do you have for sales?')
print(test.shape)
print(test)

(48,)
[0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 1]


In [11]:
cb.model.train_x.shape

(27, 48)

In [12]:
cb.model.train_y.shape

(27, 9)

In [13]:
cb.chat('Are you open now?')

2 hours 0.98776114


('Our hours are 9am-9pm every day', 'hours')

In [21]:
#cb.model.train()

In [16]:
cb.model.save()

In [14]:
cb.model.load()

In [15]:
test=np.reshape(test,(1,48))
test.shape

(1, 48)

In [16]:
p=cb.model.ann.predict(test)

In [17]:
print(p)

[[3.14796011e-09 1.16214025e-07 3.47157822e-07 9.99977350e-01
  1.16851879e-06 6.39009886e-06 1.04172295e-05 3.39423173e-06
  8.38683832e-07]]


In [18]:
idclass=np.argmax(p)
print(idclass)

3


In [19]:
print(cb.model.classes[idclass])

mopeds


In [32]:
cbe=ChatBotEngine()

27 documents
9 classes ['goodbye', 'greeting', 'hours', 'mopeds', 'opentoday', 'payments', 'rental', 'thanks', 'today']
48 unique stemmed words ["'d", "'s", 'a', 'acceiv', 'anyon', 'ar', 'bye', 'can', 'card', 'cash', 'credit', 'day', 'do', 'doe', 'good', 'goodby', 'hav', 'hello', 'help', 'hi', 'hour', 'how', 'i', 'is', 'kind', 'lat', 'lik', 'mastercard', 'mop', 'of', 'on', 'op', 'rent', 'see', 'tak', 'thank', 'that', 'ther', 'thi', 'to', 'today', 'we', 'what', 'when', 'which', 'work', 'yo', 'you']
(0,)


In [34]:
cbe.run()

Ready:  Hello


1 greeting 0.99883026
Hi there, how can I help?


Ready:  what are pyment modes?


2 hours 0.6349642
Our hours are 9am-9pm every day


Ready:  Can I pay with card?


6 rental 0.38866323
What do you mean?


Ready:  I have only cash


5 payments 0.9901258
We accept VISA, Mastercard and AMEX


Ready:  you


1 greeting 0.3721449
Could you please repeat with other words?


Ready:  are you


1 greeting 0.6009029
Hi there, how can I help?


Ready:  what do


3 mopeds 0.9696661
We rent Yamaha, Piaggio and Vespa mopeds


Ready:  exit


0 goodbye 0.36290523
Could you please repeat with other words?


Ready:  good bye


0 goodbye 0.84957325
Bye! Come back again soon.
