In [154]:
import nltk, string
from nltk.stem import PorterStemmer

import numpy as np

In [155]:
stemmer = PorterStemmer()                   # Import stemmer sama punctuation
punctuations = string.punctuation

In [156]:
def tokenize(sentence):
    return nltk.word_tokenize(sentence)             # Tokenization

In [157]:
def stem(word):
    return stemmer.stem(word.lower())               # Stemming

In [158]:
def bag_of_words(tokenized_sentence, list_words):
    tokenized_sentence = [stem(w) for w in tokenized_sentence]             
    bag = np.zeros(len(list_words), dtype=np.float32)

    for index, w in enumerate(list_words):                                  # Initialize vocab berdasarkan dataset
        if w in tokenized_sentence:
            bag[index] = 1.0
    
    return bag

# Initialize

In [159]:
import json

with open('intents.json', 'r') as f:
    intents = json.load(f)

In [160]:
list_words = []
tags = []
responses = {}
xy = []

In [161]:
for intent in intents['intents']:                               # for every intents
    tag = intent['tag']                                      
    tags.append(tag)                                            # store every tags

    for pattern in intent['patterns']:
        words = tokenize(pattern)
        list_words.extend(words)                                # store all words 
        xy.append((words, tag))                                 # dataset (all tokenized words and their pattern's label)
    
    for response in intent['responses']:
        if tag in responses:
            responses[tag].append(response)
        else:
            responses[tag] = [response]

list_words = [stem(w) for w in list_words if w not in punctuations]
list_words = sorted(set(list_words))
tags = sorted(set(tags))

In [162]:
print(tags)

['food', 'goodbye', 'greeting', 'thanks', 'weather']


In [163]:
print(xy)

[(['Hi'], 'greeting'), (['Hey'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['Bye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['Goodbye', 'now'], 'goodbye'), (['Goodbye', 'now', 'see', 'you', 'later'], 'goodbye'), (['What', "'s", 'the', 'weather', 'like', 'today', '?'], 'weather'), (['How', "'s", 'the', 'weather', 'looking', 'for', 'today', '?'], 'weather'), (['What', "'s", 'the', 'temperature', 'outside', '?'], 'weather'), (['Is', 'it', 'going', 'to', 'rain', 'today', '?'], 'weather'), (['Can', 'you', 'recommend', 'a', 'good', 'food', 'to', 'eat', '?'], 'food'), (['What', "'s", 'a', 'good', 'dish', 'to', 'try', '?'], 'food'), (['What', "'s", 'the', 'best', 'food', 'to', 'eat', 'around', 'here', '?'], 'food'), (['I', "'m", 'a', 'bit', 'hungry'], 'food'), (['Thanks'], 'thanks'), (['Thank', 'you'], 'thanks'), (['That', "'s", 'helpful'], 'thanks

In [164]:
print(responses)

{'greeting': ['Hey :-)', 'Hello, thanks for visiting', 'Hi there, what can I do for you?', 'Hi there, how can I help?'], 'goodbye': ['See you later, thanks for visiting', 'Have a nice day', 'Bye! Come back again soon.'], 'weather': ['Currently, the weather is overcast and the temperature is 30°C', "Right now in Jakarta, it's overcast and 30°C", "It's currently 30°C in Jakarta."], 'food': ["How about trying the sate? It's a popular choice.", 'You should try eating sate, they are excellent.', 'For Indonesian food, I recommend sate. They are great for any meal of the day'], 'thanks': ['Happy to help!', 'Any time!', 'My pleasure']}


In [165]:
X = []
y= []

for (tokenized_sentence, tag) in xy:  
    bag = bag_of_words(tokenized_sentence, list_words)                # create [0 1 0 1 0] array by matching tokenized words with the one at our list_words
    X.append(bag) 

    y.append(tag)  

X = np.array(X)                                                        # Initialize x and y 
y = np.array(y)

# Training

In [166]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import os
import pickle

def train():
    global model
    if os.path.exists('model.pickle'):
        with open('model.pickle', 'rb') as model_file:
            model = pickle.load(model_file)

    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        model = MultinomialNB()
        model.fit(X_train, y_train)

        with open('model.pickle', 'wb') as model_file:
            pickle.dump(model, model_file)

## Chatbot

In [167]:
import random

def get_random_response(class_label):
    if class_label in responses:
        return random.choice(responses[class_label])

In [168]:
def main_menu(): 
    while True:
        sentence = str(input(">>"))
        if ((sentence == 'Quit') | (sentence == 'quit')):
            break 
        else:
            global model
            X_sentence = [bag_of_words(tokenize(sentence), list_words)]
            probabilities = model.predict_proba(X_sentence)[0]
            print(probabilities)
            max_proba = np.max(probabilities)

            y_pred = model.predict(X_sentence)[0]

            if (max_proba < 0.35):
                print("Sorry I'm not sure I know how to answer that")
                print()
            else:
                response = get_random_response(y_pred)
                print(response)
                print()

In [169]:
def main():
    train()
    main_menu()

In [170]:
main()

[0.01428236 0.11443119 0.76287458 0.07628746 0.03212441]
Hey :-)

[0.01836432 0.00972451 0.01215564 0.00972451 0.95003102]
Currently, the weather is overcast and the temperature is 30°C

[0.39542546 0.22225987 0.18521656 0.14817325 0.04892485]
For Indonesian food, I recommend sate. They are great for any meal of the day

[0.05391031 0.01698071 0.02122589 0.01698071 0.89090237]
It's currently 30°C in Jakarta.

[0.02533832 0.12075729 0.10063107 0.64403888 0.10923444]
My pleasure

[0.00188295 0.96916639 0.01495627 0.01196502 0.00202937]
Bye! Come back again soon.

