In [31]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import NearestCentroid
from sklearn.linear_model import SGDClassifier
from sklearn import tree
from sklearn.ensemble import AdaBoostClassifier


from operator import itemgetter
import pandas as pd
import numpy as np
import random
import json

In [32]:
# TRAINING DATA
with open('intents.json') as json_data:
    intents = json.load(json_data)

In [33]:
words = []
classes = []
documents = []
ignore_words = ['?','!']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
        # add to our words list
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, intent['tag']))
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# sort classes
classes = sorted(list(set(classes)))

# documents = combination between patterns and intents
print(len(documents), "documents")

# classes = intents
print (len(classes), "classes", classes)

# words = all words, vocabulary
print (len(words), "unique stemmed words", words)

62 documents
15 classes ['check_ahu', 'check_all_hvac', 'check_boiler_plant', 'check_chiller_plant', 'check_energy', 'check_future_weather', 'check_how_it_should_all_work', 'check_power', 'check_runtime', 'check_zone_temps', 'goodbye', 'greeting', 'options', 'thanks', 'what_to_do']
152 unique stemmed words ["'s", ',', 'a', 'ahu', 'air', 'al', 'alarm', 'algorithm', 'an', 'and', 'any', 'anyon', 'ar', 'area', 'at', 'awesom', 'bad', 'be', 'been', 'big', 'boil', 'box', 'build', 'bye', 'cal', 'can', 'cent', 'chat', 'chil', 'cold', 'consum', 'control', 'cool', 'could', 'damp', 'day', 'did', 'discharg', 'do', 'doe', 'doing', 'down', 'driv', 'duc', 'dud', 'econom', 'elect', 'energy', 'equip', 'fan', 'fir', 'for', 'fre', 'freez', 'frequ', 'from', 'front', 'fuel', 'get', 'going', 'good', 'goodby', 'handl', 'has', 'hello', 'help', 'hi', 'hot', 'hour', 'how', 'hvac', 'i', 'ilc', 'in', 'intellig', 'is', 'kw', 'kwh', 'lat', 'leav', 'lik', 'load', 'log', 'look', 'mak', 'many', 'me', 'mix', 'mod', 'mor

In [34]:
# create our training data
training = []
# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    
    # initialize our bag of words
    bag = []
    
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    
    # stem each word - create base word, in attempt to represent related words
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    
    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])

In [35]:
# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training)

  training = np.array(training)


In [36]:
type(training)

numpy.ndarray

In [37]:
training.shape

(62, 2)

In [38]:
# create train and test lists. X - patterns, Y - intents
train_x = list(training[:,0])
train_y = list(training[:,1])

train_y

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,

In [39]:
train_y = np.argmax(train_y, axis=1)

train_y

array([10, 10,  6,  3,  8,  0, 11,  1,  1,  0,  7,  3,  9, 13, 12, 14,  1,
       13,  0,  2,  5, 10, 10, 12, 14,  8, 14,  5,  9,  4,  2,  2, 13,  9,
       10,  6,  3,  0, 11,  7, 14, 11,  8, 12,  4,  0, 11,  9,  9, 12,  0,
        7,  6, 14,  0,  9,  0, 13, 13,  4,  1, 11], dtype=int64)

In [130]:
#model = GaussianNB().fit(train_x, train_y)
#model = SVC(probability=True).fit(train_x, train_y)
#model = tree.DecisionTreeClassifier().fit(train_x, train_y)
model = AdaBoostClassifier().fit(train_x, train_y)

In [131]:
def clean_up_sentence(sentence):
    # tokenize the pattern - split words into array
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word - create short form for word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words


# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=True):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words - matrix of N words, vocabulary matrix
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)
    return(np.array(bag))
    

def classify_local(sentence):
    
    # generate probabilities from the model
    input_data = pd.DataFrame([bow(sentence, words)], dtype=float, index=['input']) 
    results = model.predict_proba(input_data)[0]
    
    results =  np.round(results,4).tolist()
   
    probs_and_classes = list(zip(classes, results))
    print(probs_and_classes)
    
    best_result = max(probs_and_classes,key=itemgetter(1))[0]
    
    # return tuple of intent and probability
    return best_result

In [132]:
p = bow("HVAC is a mechanical system in a building I think", words)

print(p)
print(classes)

inputvar = pd.DataFrame([p], dtype=float, index=['input'])
print(model.predict_proba(inputvar))

found in bag: hvac
found in bag: is
found in bag: a
found in bag: system
found in bag: in
found in bag: a
found in bag: build
found in bag: i
[0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0]
['check_ahu', 'check_all_hvac', 'check_boiler_plant', 'check_chiller_plant', 'check_energy', 'check_future_weather', 'check_how_it_should_all_work', 'check_power', 'check_runtime', 'check_zone_temps', 'goodbye', 'greeting', 'options', 'thanks', 'what_to_do']
[[7.65649577e-02 7.70067548e-02 7.65649577e-02 7.65649577e-02
  7.65649577e-02 2.80721974e-15 7.68520927e-15 7.65649577e-02
  7.65649577e-02 7.76337647e-02 7.73511941e-02 7.73511941e-02
  7.65649577e-02 7.73511941e-02 7.73511941e-02]]


In [133]:
classify_local('Hello, good day!')

found in bag: hello
found in bag: ,
found in bag: good
found in bag: day
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [134]:
classify_local('How you can assist me?')

found in bag: how
found in bag: you
found in bag: can
found in bag: me
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [135]:
classify_local('what is my mechanical system doing?')

found in bag: what
found in bag: is
found in bag: my
found in bag: system
found in bag: doing
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [136]:
classify_local('are people complaining at all?')

found in bag: ar
found in bag: at
found in bag: al
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [137]:
classify_local('how is the HVAC running?')

found in bag: how
found in bag: is
found in bag: the
found in bag: hvac
found in bag: run
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [138]:
classify_local('has the power use for the building been bad?')

found in bag: has
found in bag: the
found in bag: pow
found in bag: us
found in bag: for
found in bag: the
found in bag: build
found in bag: been
found in bag: bad
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [139]:
classify_local('are we getting any free cooling or using mechanical cooling?')

found in bag: ar
found in bag: we
found in bag: get
found in bag: any
found in bag: fre
found in bag: cool
found in bag: or
found in bag: us
found in bag: cool
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [140]:
classify_local('does that seem like its working correctly?')

found in bag: doe
found in bag: that
found in bag: lik
found in bag: work
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [141]:
classify_local('are any of the vav boxes not moving air?')

found in bag: ar
found in bag: any
found in bag: the
found in bag: vav
found in bag: box
found in bag: mov
found in bag: air
[('check_ahu', 0.1463), ('check_all_hvac', 0.0712), ('check_boiler_plant', 0.0712), ('check_chiller_plant', 0.0712), ('check_energy', 0.0712), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0712), ('check_runtime', 0.0712), ('check_zone_temps', 0.0712), ('goodbye', 0.0712), ('greeting', 0.0712), ('options', 0.0708), ('thanks', 0.0712), ('what_to_do', 0.0712)]


'check_ahu'

In [142]:
classify_local('what are the discharge temps looking like on the reheat vav box coils?')

found in bag: what
found in bag: ar
found in bag: the
found in bag: discharg
found in bag: temp
found in bag: look
found in bag: lik
found in bag: on
found in bag: the
found in bag: reh
found in bag: vav
found in bag: box
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [143]:
classify_local('How is your energy use been past 10 days?')

found in bag: how
found in bag: is
found in bag: energy
found in bag: us
found in bag: been
found in bag: day
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [144]:
classify_local('energy')

found in bag: energy
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [145]:
classify_local('power')

found in bag: pow
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [146]:
classify_local('boiler')

found in bag: boil
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [147]:
classify_local('chiller')

found in bag: chil
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [148]:
classify_local('vav boxes')

found in bag: vav
found in bag: box
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'

In [149]:
classify_local('zone temps')

found in bag: zon
found in bag: temp
[('check_ahu', 0.0766), ('check_all_hvac', 0.077), ('check_boiler_plant', 0.0766), ('check_chiller_plant', 0.0766), ('check_energy', 0.0766), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0766), ('check_runtime', 0.0766), ('check_zone_temps', 0.0776), ('goodbye', 0.0774), ('greeting', 0.0774), ('options', 0.0766), ('thanks', 0.0774), ('what_to_do', 0.0774)]


'check_zone_temps'