In [1]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

from sklearn.neural_network import MLPClassifier
import pickle

from operator import itemgetter
import pandas as pd
import numpy as np
import random
import json

In [2]:
# TRAINING DATA
with open('hvac_babble.json') as json_data:
    hvac_world = json.load(json_data)

In [3]:
words = []
classes = []
answers = []
documents = []
ignore_words = ['?','!','{','|','}','~','£','¥', '©', '«','´','·','»', '¿','á','ç','é','ï','ø','ı', 'ıs', 'ליפול', '؟', '–','‘', '’', '“', '”', '…', '☺', '♣','♥', '「」', '加油喔', '釋股','___________']

In [4]:
#for (category, q_a) in hvac_world.items():
   #print("Category: " + category)
   #print("Q & A: " + str(q_a))
    #print(q_a['question'])
    #print(q_a['answer'][0])

In [5]:
for (category, q_a) in hvac_world.items():
    # tokenize each word in the sentence
    w = nltk.word_tokenize(q_a['answer'][0])
    # add to our words list
    words.extend(w)
    # add to documents in our corpus
    documents.append((w, category))
    # add to our classes list
    if category not in classes:
        classes.append(category)

In [6]:
# stem and lower each word and remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# sort classes
classes = sorted(list(set(classes)))

# documents = combination between patterns and intents
print(len(documents), "documents")

# classes = intents
print(len(classes), "classes", classes)

# words = all words, vocabulary
print(len(words), "unique stemmed words", words)

16 documents
16 classes ['check_ahu', 'check_all_hvac', 'check_boiler_plant', 'check_chiller_plant', 'check_energy', 'check_future_weather', 'check_how_it_should_all_work', 'check_power', 'check_runtime', 'check_zone_temps', 'goodbye', 'greeting', 'noanswer', 'options', 'thanks', 'what_to_do']
81 unique stemmed words [',', '...', 'a', 'ah', 'air', 'and', 'ar', 'ask', 'at', 'be', 'box', 'build', 'ca', 'calc', 'can', 'cent', 'check', 'condit', 'consum', 'contract', 'control', 'cool', 'cur', 'dat', 'doing', 'energy', 'equip', 'for', 'forecast', 'get', 'handl', 'happy', 'hello', 'help', 'hold', 'how', 'hvac', 'i', 'if', 'in', 'is', 'lik', 'loc', 'look', 'mech', "n't", 'of', 'ok', 'on', 'op', 'or', 'outsid', 'pattern', 'plant', 'pow', 'recommend', 'run', 'runtim', 'see', 'seem', 'should', 'sid', 'someon', 'sorry', 'system', 'temp', 'thank', 'that', 'the', 'think', 'to', 'today', 'understand', 'us', 'vav', 'ver', 'we', 'weath', 'what', 'you', 'zon']


In [7]:
# create our training data
training = []
# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    
    # initialize our bag of words
    bag = []
    
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    
    # stem each word - create base word, in attempt to represent related words
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    
    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])


In [8]:
# shuffle our features and turn into np.array
#random.shuffle(training)
training = np.array(training)

  training = np.array(training)


In [9]:
type(training)

numpy.ndarray

In [10]:
training.shape

(16, 2)

In [11]:
# create train and test lists. X - patterns, Y - intents
train_x = list(training[:,0])
train_y = list(training[:,1])

#train_y

In [12]:
model = MLPClassifier(learning_rate_init=0.0001,max_iter=9000,shuffle=True).fit(train_x, train_y)

In [13]:
def clean_up_sentence(sentence):
    # tokenize the pattern - split words into array
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word - create short form for word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words


# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=True):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words - matrix of N words, vocabulary matrix
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)
    return(np.array(bag))
    

def classify_local(sentence):
    
    # generate probabilities from the model
    input_data = pd.DataFrame([bow(sentence, words)], dtype=float, index=['input']) 
    results = model.predict_proba(input_data)[0]
    
    results =  np.round(results,4).tolist()
   
    probs_and_classes = list(zip(classes,results))
    print(probs_and_classes)
    
    best_result = max(probs_and_classes,key=itemgetter(1))[0]
    
    # return tuple of intent and probability
    return best_result

In [14]:
p = bow("HVAC is a mechanical system in a building I think", words)

print(p)
print(classes)

inputvar = pd.DataFrame([p], dtype=float, index=['input'])
print(model.predict_proba(inputvar))

found in bag: hvac
found in bag: is
found in bag: a
found in bag: mech
found in bag: system
found in bag: in
found in bag: a
found in bag: build
found in bag: i
found in bag: think
[0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 1 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0]
['check_ahu', 'check_all_hvac', 'check_boiler_plant', 'check_chiller_plant', 'check_energy', 'check_future_weather', 'check_how_it_should_all_work', 'check_power', 'check_runtime', 'check_zone_temps', 'goodbye', 'greeting', 'noanswer', 'options', 'thanks', 'what_to_do']
[[0.00555716 0.0820922  0.00387896 0.00506481 0.02778849 0.00454313
  0.08677212 0.03427276 0.0063824  0.00324175 0.01533694 0.00372896
  0.01391959 0.03422364 0.00717254 0.18522253]]


In [15]:
class_info = classify_local('Hello, good day!')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: hello
found in bag: ,
[('check_ahu', 0.0288), ('check_all_hvac', 0.0498), ('check_boiler_plant', 0.0596), ('check_chiller_plant', 0.0494), ('check_energy', 0.0455), ('check_future_weather', 0.0495), ('check_how_it_should_all_work', 0.0293), ('check_power', 0.0409), ('check_runtime', 0.0608), ('check_zone_temps', 0.0316), ('goodbye', 0.079), ('greeting', 0.5956), ('noanswer', 0.115), ('options', 0.0638), ('thanks', 0.1052), ('what_to_do', 0.0373)]
greeting


['Hello, thanks for asking',
 'Good to see you again',
 'Hi there, how can I help?']

In [16]:
class_info = classify_local('How you can assist me?')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: how
found in bag: you
found in bag: can
[('check_ahu', 0.0709), ('check_all_hvac', 0.1812), ('check_boiler_plant', 0.0418), ('check_chiller_plant', 0.0305), ('check_energy', 0.0203), ('check_future_weather', 0.0565), ('check_how_it_should_all_work', 0.0422), ('check_power', 0.0394), ('check_runtime', 0.0237), ('check_zone_temps', 0.0655), ('goodbye', 0.5239), ('greeting', 0.0669), ('noanswer', 0.1523), ('options', 0.0745), ('thanks', 0.0556), ('what_to_do', 0.0674)]
goodbye


['See you!', 'Have a nice day', 'Bye! Come back again soon.']

In [17]:
classify_local('what is my mechanical system doing?')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: what
found in bag: is
found in bag: mech
found in bag: system
found in bag: doing
[('check_ahu', 0.0242), ('check_all_hvac', 0.1784), ('check_boiler_plant', 0.0325), ('check_chiller_plant', 0.0352), ('check_energy', 0.1223), ('check_future_weather', 0.0772), ('check_how_it_should_all_work', 0.3706), ('check_power', 0.0209), ('check_runtime', 0.0561), ('check_zone_temps', 0.1538), ('goodbye', 0.1517), ('greeting', 0.0658), ('noanswer', 0.0886), ('options', 0.206), ('thanks', 0.0354), ('what_to_do', 0.1562)]
goodbye


['See you!', 'Have a nice day', 'Bye! Come back again soon.']

In [18]:
class_info = classify_local('are people complaining at all?')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: ar
found in bag: at
[('check_ahu', 0.3514), ('check_all_hvac', 0.1308), ('check_boiler_plant', 0.1188), ('check_chiller_plant', 0.1372), ('check_energy', 0.1106), ('check_future_weather', 0.1677), ('check_how_it_should_all_work', 0.134), ('check_power', 0.0956), ('check_runtime', 0.0595), ('check_zone_temps', 0.2217), ('goodbye', 0.3278), ('greeting', 0.1051), ('noanswer', 0.1403), ('options', 0.0871), ('thanks', 0.18), ('what_to_do', 0.2758)]
check_ahu


['Checking to see how the air handlers are looking...']

In [19]:
class_info = classify_local('how is the HVAC running?')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: how
found in bag: is
found in bag: the
found in bag: hvac
found in bag: run
[('check_ahu', 0.0911), ('check_all_hvac', 0.8161), ('check_boiler_plant', 0.0182), ('check_chiller_plant', 0.0131), ('check_energy', 0.0594), ('check_future_weather', 0.0566), ('check_how_it_should_all_work', 0.1962), ('check_power', 0.0424), ('check_runtime', 0.0502), ('check_zone_temps', 0.0563), ('goodbye', 0.0333), ('greeting', 0.0491), ('noanswer', 0.0688), ('options', 0.0403), ('thanks', 0.0414), ('what_to_do', 0.0687)]
check_all_hvac


['Checking to see how the HVAC is operating...']

In [20]:
class_info = classify_local('has the power use for the building been bad?')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: the
found in bag: pow
found in bag: us
found in bag: for
found in bag: the
found in bag: build
[('check_ahu', 0.0609), ('check_all_hvac', 0.0323), ('check_boiler_plant', 0.0271), ('check_chiller_plant', 0.0288), ('check_energy', 0.0056), ('check_future_weather', 0.0064), ('check_how_it_should_all_work', 0.0255), ('check_power', 0.529), ('check_runtime', 0.0353), ('check_zone_temps', 0.0176), ('goodbye', 0.0236), ('greeting', 0.0753), ('noanswer', 0.0485), ('options', 0.1116), ('thanks', 0.0394), ('what_to_do', 0.0443)]
check_power


['Verifying that the power patterns for the building seem in check...']

In [21]:
class_info = classify_local('are we getting any free cooling or using mechanical cooling?')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: ar
found in bag: we
found in bag: get
found in bag: cool
found in bag: or
found in bag: us
found in bag: mech
found in bag: cool
[('check_ahu', 0.0646), ('check_all_hvac', 0.0116), ('check_boiler_plant', 0.0497), ('check_chiller_plant', 0.0483), ('check_energy', 0.013), ('check_future_weather', 0.0245), ('check_how_it_should_all_work', 0.028), ('check_power', 0.0316), ('check_runtime', 0.0084), ('check_zone_temps', 0.0576), ('goodbye', 0.137), ('greeting', 0.0091), ('noanswer', 0.0175), ('options', 0.0699), ('thanks', 0.0323), ('what_to_do', 0.2757)]
what_to_do


['I think we should get a hold of someone at the building or a local mechanical contractor']

In [22]:
class_info = classify_local('does that seem like its working correctly?')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: that
found in bag: seem
found in bag: lik
[('check_ahu', 0.1246), ('check_all_hvac', 0.0988), ('check_boiler_plant', 0.0819), ('check_chiller_plant', 0.0889), ('check_energy', 0.1611), ('check_future_weather', 0.1141), ('check_how_it_should_all_work', 0.0788), ('check_power', 0.6012), ('check_runtime', 0.0412), ('check_zone_temps', 0.0541), ('goodbye', 0.1217), ('greeting', 0.0829), ('noanswer', 0.112), ('options', 0.1262), ('thanks', 0.1024), ('what_to_do', 0.1049)]
check_power


['Verifying that the power patterns for the building seem in check...']

In [23]:
class_info = classify_local('are any of the vav boxes not moving air?')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: ar
found in bag: of
found in bag: the
found in bag: vav
found in bag: box
found in bag: air
[('check_ahu', 0.1584), ('check_all_hvac', 0.0177), ('check_boiler_plant', 0.0162), ('check_chiller_plant', 0.0166), ('check_energy', 0.0162), ('check_future_weather', 0.0167), ('check_how_it_should_all_work', 0.0343), ('check_power', 0.0159), ('check_runtime', 0.055), ('check_zone_temps', 0.6266), ('goodbye', 0.0381), ('greeting', 0.0236), ('noanswer', 0.0306), ('options', 0.0278), ('thanks', 0.0215), ('what_to_do', 0.0321)]
check_zone_temps


['Checking to see how the zones and vav boxes are doing...']

In [24]:
class_info = classify_local('what are the discharge temps looking like on the reheat vav box coils?')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: what
found in bag: ar
found in bag: the
found in bag: temp
found in bag: look
found in bag: lik
found in bag: on
found in bag: the
found in bag: vav
found in bag: box
[('check_ahu', 0.1785), ('check_all_hvac', 0.0098), ('check_boiler_plant', 0.0071), ('check_chiller_plant', 0.0085), ('check_energy', 0.0082), ('check_future_weather', 0.0174), ('check_how_it_should_all_work', 0.0384), ('check_power', 0.0145), ('check_runtime', 0.0604), ('check_zone_temps', 0.7196), ('goodbye', 0.0364), ('greeting', 0.0239), ('noanswer', 0.0287), ('options', 0.1178), ('thanks', 0.0179), ('what_to_do', 0.0185)]
check_zone_temps


['Checking to see how the zones and vav boxes are doing...']

In [25]:
class_info = classify_local('How is your energy use been past 10 days?')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: how
found in bag: is
found in bag: energy
found in bag: us
[('check_ahu', 0.0877), ('check_all_hvac', 0.2967), ('check_boiler_plant', 0.0527), ('check_chiller_plant', 0.0395), ('check_energy', 0.4014), ('check_future_weather', 0.1126), ('check_how_it_should_all_work', 0.0909), ('check_power', 0.0248), ('check_runtime', 0.0568), ('check_zone_temps', 0.0862), ('goodbye', 0.139), ('greeting', 0.0724), ('noanswer', 0.0652), ('options', 0.1624), ('thanks', 0.0772), ('what_to_do', 0.1025)]
check_energy


['Verifying that the energy consumption is in check...']

In [26]:
class_info = classify_local('energy')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: energy
[('check_ahu', 0.088), ('check_all_hvac', 0.1193), ('check_boiler_plant', 0.1757), ('check_chiller_plant', 0.1865), ('check_energy', 0.4099), ('check_future_weather', 0.2664), ('check_how_it_should_all_work', 0.1564), ('check_power', 0.0929), ('check_runtime', 0.1611), ('check_zone_temps', 0.1164), ('goodbye', 0.2903), ('greeting', 0.1448), ('noanswer', 0.1512), ('options', 0.142), ('thanks', 0.222), ('what_to_do', 0.1436)]
check_energy


['Verifying that the energy consumption is in check...']

In [27]:
class_info = classify_local('power')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: pow
[('check_ahu', 0.1793), ('check_all_hvac', 0.167), ('check_boiler_plant', 0.2238), ('check_chiller_plant', 0.174), ('check_energy', 0.0934), ('check_future_weather', 0.1744), ('check_how_it_should_all_work', 0.184), ('check_power', 0.4019), ('check_runtime', 0.1258), ('check_zone_temps', 0.1205), ('goodbye', 0.2353), ('greeting', 0.159), ('noanswer', 0.2345), ('options', 0.1845), ('thanks', 0.2234), ('what_to_do', 0.1886)]
check_power


['Verifying that the power patterns for the building seem in check...']

In [28]:
class_info = classify_local('boiler')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

[('check_ahu', 0.1593), ('check_all_hvac', 0.2368), ('check_boiler_plant', 0.2677), ('check_chiller_plant', 0.2796), ('check_energy', 0.1896), ('check_future_weather', 0.3278), ('check_how_it_should_all_work', 0.1971), ('check_power', 0.1873), ('check_runtime', 0.1476), ('check_zone_temps', 0.1606), ('goodbye', 0.3733), ('greeting', 0.256), ('noanswer', 0.2725), ('options', 0.141), ('thanks', 0.3222), ('what_to_do', 0.2294)]
goodbye


['See you!', 'Have a nice day', 'Bye! Come back again soon.']

In [29]:
class_info = classify_local('chiller')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

[('check_ahu', 0.1593), ('check_all_hvac', 0.2368), ('check_boiler_plant', 0.2677), ('check_chiller_plant', 0.2796), ('check_energy', 0.1896), ('check_future_weather', 0.3278), ('check_how_it_should_all_work', 0.1971), ('check_power', 0.1873), ('check_runtime', 0.1476), ('check_zone_temps', 0.1606), ('goodbye', 0.3733), ('greeting', 0.256), ('noanswer', 0.2725), ('options', 0.141), ('thanks', 0.3222), ('what_to_do', 0.2294)]
goodbye


['See you!', 'Have a nice day', 'Bye! Come back again soon.']

In [30]:
class_info = classify_local('vav boxes')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: vav
found in bag: box
[('check_ahu', 0.067), ('check_all_hvac', 0.103), ('check_boiler_plant', 0.1401), ('check_chiller_plant', 0.1405), ('check_energy', 0.073), ('check_future_weather', 0.1182), ('check_how_it_should_all_work', 0.1045), ('check_power', 0.0553), ('check_runtime', 0.0891), ('check_zone_temps', 0.6966), ('goodbye', 0.2005), ('greeting', 0.1319), ('noanswer', 0.1484), ('options', 0.0864), ('thanks', 0.1055), ('what_to_do', 0.107)]
check_zone_temps


['Checking to see how the zones and vav boxes are doing...']

In [31]:
class_info = classify_local('zone temps')
print(class_info)

responce = hvac_world.get(class_info, None)
responce['answer']

found in bag: zon
found in bag: temp
[('check_ahu', 0.0794), ('check_all_hvac', 0.1084), ('check_boiler_plant', 0.1633), ('check_chiller_plant', 0.1729), ('check_energy', 0.103), ('check_future_weather', 0.242), ('check_how_it_should_all_work', 0.1296), ('check_power', 0.0826), ('check_runtime', 0.1329), ('check_zone_temps', 0.259), ('goodbye', 0.285), ('greeting', 0.1848), ('noanswer', 0.1981), ('options', 0.2936), ('thanks', 0.2185), ('what_to_do', 0.1234)]
options


['I can help to check and see if the building temperature control system is OK, like zone temps, AHUs, central plant, power, or energy use',
 'What sort of system in your building do you want to check?']

In [32]:
# save the model to disk
hvacmodel = 'hvac_model.sav'
pickle.dump(model, open(hvacmodel, 'wb'))

In [33]:
# load the model from disk
loaded_model = pickle.load(open(hvacmodel, 'rb'))