In [1]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

from sklearn.neural_network import MLPClassifier

from operator import itemgetter
import pandas as pd
import numpy as np
import random
import json

In [2]:
# TRAINING DATA
with open('intents.json') as json_data:
    intents = json.load(json_data)

In [3]:
words = []
classes = []
documents = []
ignore_words = ['?','!']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
        # add to our words list
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, intent['tag']))
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# sort classes
classes = sorted(list(set(classes)))

# documents = combination between patterns and intents
print(len(documents), "documents")

# classes = intents
print (len(classes), "classes", classes)

# words = all words, vocabulary
print (len(words), "unique stemmed words", words)

62 documents
15 classes ['check_ahu', 'check_all_hvac', 'check_boiler_plant', 'check_chiller_plant', 'check_energy', 'check_future_weather', 'check_how_it_should_all_work', 'check_power', 'check_runtime', 'check_zone_temps', 'goodbye', 'greeting', 'options', 'thanks', 'what_to_do']
152 unique stemmed words ["'s", ',', 'a', 'ahu', 'air', 'al', 'alarm', 'algorithm', 'an', 'and', 'any', 'anyon', 'ar', 'area', 'at', 'awesom', 'bad', 'be', 'been', 'big', 'boil', 'box', 'build', 'bye', 'cal', 'can', 'cent', 'chat', 'chil', 'cold', 'consum', 'control', 'cool', 'could', 'damp', 'day', 'did', 'discharg', 'do', 'doe', 'doing', 'down', 'driv', 'duc', 'dud', 'econom', 'elect', 'energy', 'equip', 'fan', 'fir', 'for', 'fre', 'freez', 'frequ', 'from', 'front', 'fuel', 'get', 'going', 'good', 'goodby', 'handl', 'has', 'hello', 'help', 'hi', 'hot', 'hour', 'how', 'hvac', 'i', 'ilc', 'in', 'intellig', 'is', 'kw', 'kwh', 'lat', 'leav', 'lik', 'load', 'log', 'look', 'mak', 'many', 'me', 'mix', 'mod', 'mor

In [4]:
# create our training data
training = []
# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    
    # initialize our bag of words
    bag = []
    
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    
    # stem each word - create base word, in attempt to represent related words
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    
    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])

In [5]:
# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training)

  training = np.array(training)


In [6]:
type(training)

numpy.ndarray

In [7]:
training.shape

(62, 2)

In [8]:
# create train and test lists. X - patterns, Y - intents
train_x = list(training[:,0])
train_y = list(training[:,1])

train_y

[[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,

In [9]:
model = MLPClassifier(learning_rate_init=0.0001,max_iter=9000,shuffle=True).fit(train_x, train_y)

In [10]:
def clean_up_sentence(sentence):
    # tokenize the pattern - split words into array
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word - create short form for word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words


# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=True):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words - matrix of N words, vocabulary matrix
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)
    return(np.array(bag))
    

def classify_local(sentence):
    
    # generate probabilities from the model
    input_data = pd.DataFrame([bow(sentence, words)], dtype=float, index=['input']) 
    results = model.predict_proba(input_data)[0]
    
    results =  np.round(results,4).tolist()
   
    probs_and_classes = list(zip(classes, results))
    print(probs_and_classes)
    
    best_result = max(probs_and_classes,key=itemgetter(1))[0]
    
    # return tuple of intent and probability
    return best_result

In [11]:
p = bow("HVAC is a mechanical system in a building I think", words)

print(p)
print(classes)

inputvar = pd.DataFrame([p], dtype=float, index=['input'])
print(model.predict_proba(inputvar))

found in bag: hvac
found in bag: is
found in bag: a
found in bag: system
found in bag: in
found in bag: a
found in bag: build
found in bag: i
[0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0]
['check_ahu', 'check_all_hvac', 'check_boiler_plant', 'check_chiller_plant', 'check_energy', 'check_future_weather', 'check_how_it_should_all_work', 'check_power', 'check_runtime', 'check_zone_temps', 'goodbye', 'greeting', 'options', 'thanks', 'what_to_do']
[[8.20028933e-02 3.40126109e-01 1.49842245e-03 2.21061366e-03
  2.94568563e-04 2.68699738e-02 4.37162924e-03 4.93687158e-03
  1.42255322e-04 1.95997607e-04 3.30380115e-03 7.44389020e-04
  6.32546786e-04 2.31719432e-03 2.19119028e-02]]


In [12]:
classify_local('Hello, good day!')

found in bag: hello
found in bag: ,
found in bag: good
found in bag: day
[('check_ahu', 0.0002), ('check_all_hvac', 0.0005), ('check_boiler_plant', 0.0001), ('check_chiller_plant', 0.0001), ('check_energy', 0.0005), ('check_future_weather', 0.0001), ('check_how_it_should_all_work', 0.0001), ('check_power', 0.0001), ('check_runtime', 0.0003), ('check_zone_temps', 0.0004), ('goodbye', 0.0042), ('greeting', 0.9827), ('options', 0.0004), ('thanks', 0.0073), ('what_to_do', 0.0004)]


'greeting'

In [13]:
classify_local('How you can assist me?')

found in bag: how
found in bag: you
found in bag: can
found in bag: me
[('check_ahu', 0.0016), ('check_all_hvac', 0.0043), ('check_boiler_plant', 0.0017), ('check_chiller_plant', 0.0019), ('check_energy', 0.0011), ('check_future_weather', 0.0012), ('check_how_it_should_all_work', 0.0009), ('check_power', 0.0022), ('check_runtime', 0.0039), ('check_zone_temps', 0.0016), ('goodbye', 0.0092), ('greeting', 0.0728), ('options', 0.9047), ('thanks', 0.0341), ('what_to_do', 0.0007)]


'options'

In [14]:
classify_local('what is my mechanical system doing?')

found in bag: what
found in bag: is
found in bag: my
found in bag: system
found in bag: doing
[('check_ahu', 0.0452), ('check_all_hvac', 0.6985), ('check_boiler_plant', 0.0038), ('check_chiller_plant', 0.0008), ('check_energy', 0.0009), ('check_future_weather', 0.0032), ('check_how_it_should_all_work', 0.0056), ('check_power', 0.0004), ('check_runtime', 0.0003), ('check_zone_temps', 0.0015), ('goodbye', 0.0049), ('greeting', 0.0025), ('options', 0.0037), ('thanks', 0.0036), ('what_to_do', 0.0032)]


'check_all_hvac'

In [15]:
classify_local('are people complaining at all?')

found in bag: ar
found in bag: at
found in bag: al
[('check_ahu', 0.0089), ('check_all_hvac', 0.0053), ('check_boiler_plant', 0.0058), ('check_chiller_plant', 0.4073), ('check_energy', 0.003), ('check_future_weather', 0.0108), ('check_how_it_should_all_work', 0.0067), ('check_power', 0.0095), ('check_runtime', 0.0236), ('check_zone_temps', 0.0805), ('goodbye', 0.0246), ('greeting', 0.1111), ('options', 0.0142), ('thanks', 0.0115), ('what_to_do', 0.0297)]


'check_chiller_plant'

In [16]:
classify_local('how is the HVAC running?')

found in bag: how
found in bag: is
found in bag: the
found in bag: hvac
found in bag: run
[('check_ahu', 0.0066), ('check_all_hvac', 0.7446), ('check_boiler_plant', 0.002), ('check_chiller_plant', 0.0009), ('check_energy', 0.0007), ('check_future_weather', 0.0018), ('check_how_it_should_all_work', 0.0137), ('check_power', 0.0012), ('check_runtime', 0.0006), ('check_zone_temps', 0.0022), ('goodbye', 0.0047), ('greeting', 0.0089), ('options', 0.002), ('thanks', 0.0016), ('what_to_do', 0.0019)]


'check_all_hvac'

In [17]:
classify_local('has the power use for the building been bad?')

found in bag: has
found in bag: the
found in bag: pow
found in bag: us
found in bag: for
found in bag: the
found in bag: build
found in bag: been
found in bag: bad
[('check_ahu', 0.001), ('check_all_hvac', 0.0016), ('check_boiler_plant', 0.0003), ('check_chiller_plant', 0.0003), ('check_energy', 0.0077), ('check_future_weather', 0.0009), ('check_how_it_should_all_work', 0.0006), ('check_power', 0.2763), ('check_runtime', 0.0012), ('check_zone_temps', 0.0002), ('goodbye', 0.0004), ('greeting', 0.0004), ('options', 0.0001), ('thanks', 0.0034), ('what_to_do', 0.0521)]


'check_power'

In [18]:
classify_local('are we getting any free cooling or using mechanical cooling?')

found in bag: ar
found in bag: we
found in bag: get
found in bag: any
found in bag: fre
found in bag: cool
found in bag: or
found in bag: us
found in bag: cool
[('check_ahu', 0.0606), ('check_all_hvac', 0.0019), ('check_boiler_plant', 0.0006), ('check_chiller_plant', 0.1508), ('check_energy', 0.0006), ('check_future_weather', 0.0042), ('check_how_it_should_all_work', 0.0003), ('check_power', 0.0003), ('check_runtime', 0.0002), ('check_zone_temps', 0.0009), ('goodbye', 0.0006), ('greeting', 0.0016), ('options', 0.0015), ('thanks', 0.0003), ('what_to_do', 0.0011)]


'check_chiller_plant'

In [19]:
classify_local('does that seem like its working correctly?')

found in bag: doe
found in bag: that
found in bag: lik
found in bag: work
[('check_ahu', 0.0766), ('check_all_hvac', 0.003), ('check_boiler_plant', 0.0036), ('check_chiller_plant', 0.0067), ('check_energy', 0.0019), ('check_future_weather', 0.0055), ('check_how_it_should_all_work', 0.0022), ('check_power', 0.016), ('check_runtime', 0.0062), ('check_zone_temps', 0.0019), ('goodbye', 0.0125), ('greeting', 0.0085), ('options', 0.0016), ('thanks', 0.0412), ('what_to_do', 0.2071)]


'what_to_do'

In [20]:
classify_local('are any of the vav boxes not moving air?')

found in bag: ar
found in bag: any
found in bag: the
found in bag: vav
found in bag: box
found in bag: mov
found in bag: air
[('check_ahu', 0.0235), ('check_all_hvac', 0.0183), ('check_boiler_plant', 0.0034), ('check_chiller_plant', 0.0039), ('check_energy', 0.0003), ('check_future_weather', 0.0027), ('check_how_it_should_all_work', 0.0012), ('check_power', 0.0006), ('check_runtime', 0.0009), ('check_zone_temps', 0.626), ('goodbye', 0.0036), ('greeting', 0.0083), ('options', 0.0014), ('thanks', 0.0019), ('what_to_do', 0.0008)]


'check_zone_temps'

In [21]:
classify_local('what are the discharge temps looking like on the reheat vav box coils?')

found in bag: what
found in bag: ar
found in bag: the
found in bag: discharg
found in bag: temp
found in bag: look
found in bag: lik
found in bag: on
found in bag: the
found in bag: reh
found in bag: vav
found in bag: box
[('check_ahu', 0.0042), ('check_all_hvac', 0.0), ('check_boiler_plant', 0.0009), ('check_chiller_plant', 0.0002), ('check_energy', 0.0), ('check_future_weather', 0.0), ('check_how_it_should_all_work', 0.0), ('check_power', 0.0), ('check_runtime', 0.0), ('check_zone_temps', 0.8524), ('goodbye', 0.0), ('greeting', 0.0001), ('options', 0.0), ('thanks', 0.0), ('what_to_do', 0.0001)]


'check_zone_temps'

In [22]:
classify_local('How is your energy use been past 10 days?')

found in bag: how
found in bag: is
found in bag: energy
found in bag: us
found in bag: been
found in bag: day
[('check_ahu', 0.0008), ('check_all_hvac', 0.0166), ('check_boiler_plant', 0.0012), ('check_chiller_plant', 0.0002), ('check_energy', 0.0778), ('check_future_weather', 0.0004), ('check_how_it_should_all_work', 0.0005), ('check_power', 0.001), ('check_runtime', 0.0015), ('check_zone_temps', 0.0011), ('goodbye', 0.002), ('greeting', 0.5334), ('options', 0.0013), ('thanks', 0.0032), ('what_to_do', 0.0007)]


'greeting'

In [23]:
classify_local('energy')

found in bag: energy
[('check_ahu', 0.069), ('check_all_hvac', 0.1048), ('check_boiler_plant', 0.0942), ('check_chiller_plant', 0.0658), ('check_energy', 0.3473), ('check_future_weather', 0.0691), ('check_how_it_should_all_work', 0.0992), ('check_power', 0.0788), ('check_runtime', 0.084), ('check_zone_temps', 0.0936), ('goodbye', 0.2137), ('greeting', 0.2042), ('options', 0.0927), ('thanks', 0.197), ('what_to_do', 0.1453)]


'check_energy'

In [24]:
classify_local('power')

found in bag: pow
[('check_ahu', 0.093), ('check_all_hvac', 0.0489), ('check_boiler_plant', 0.0309), ('check_chiller_plant', 0.0849), ('check_energy', 0.0396), ('check_future_weather', 0.0972), ('check_how_it_should_all_work', 0.1014), ('check_power', 0.7337), ('check_runtime', 0.0764), ('check_zone_temps', 0.1041), ('goodbye', 0.1197), ('greeting', 0.1205), ('options', 0.1116), ('thanks', 0.1173), ('what_to_do', 0.1171)]


'check_power'

In [25]:
classify_local('boiler')

found in bag: boil
[('check_ahu', 0.0482), ('check_all_hvac', 0.0642), ('check_boiler_plant', 0.7589), ('check_chiller_plant', 0.0473), ('check_energy', 0.0532), ('check_future_weather', 0.0457), ('check_how_it_should_all_work', 0.0282), ('check_power', 0.0314), ('check_runtime', 0.0449), ('check_zone_temps', 0.0636), ('goodbye', 0.2298), ('greeting', 0.0899), ('options', 0.0499), ('thanks', 0.1073), ('what_to_do', 0.1151)]


'check_boiler_plant'

In [26]:
classify_local('chiller')

found in bag: chil
[('check_ahu', 0.092), ('check_all_hvac', 0.0516), ('check_boiler_plant', 0.0302), ('check_chiller_plant', 0.6513), ('check_energy', 0.0246), ('check_future_weather', 0.0725), ('check_how_it_should_all_work', 0.033), ('check_power', 0.0831), ('check_runtime', 0.0559), ('check_zone_temps', 0.0632), ('goodbye', 0.0865), ('greeting', 0.1275), ('options', 0.0804), ('thanks', 0.0496), ('what_to_do', 0.17)]


'check_chiller_plant'

In [27]:
classify_local('vav boxes')

found in bag: vav
found in bag: box
[('check_ahu', 0.0148), ('check_all_hvac', 0.0203), ('check_boiler_plant', 0.0244), ('check_chiller_plant', 0.0291), ('check_energy', 0.0077), ('check_future_weather', 0.0186), ('check_how_it_should_all_work', 0.0222), ('check_power', 0.0129), ('check_runtime', 0.0189), ('check_zone_temps', 0.7831), ('goodbye', 0.0654), ('greeting', 0.0623), ('options', 0.0184), ('thanks', 0.0443), ('what_to_do', 0.0485)]


'check_zone_temps'

In [28]:
classify_local('zone temps')

found in bag: zon
found in bag: temp
[('check_ahu', 0.0205), ('check_all_hvac', 0.024), ('check_boiler_plant', 0.0586), ('check_chiller_plant', 0.034), ('check_energy', 0.0052), ('check_future_weather', 0.0157), ('check_how_it_should_all_work', 0.0251), ('check_power', 0.0187), ('check_runtime', 0.0199), ('check_zone_temps', 0.9151), ('goodbye', 0.063), ('greeting', 0.0389), ('options', 0.0243), ('thanks', 0.0294), ('what_to_do', 0.048)]


'check_zone_temps'