# NLU

In [1]:
import nltk
from textblob import TextBlob
from nltk.stem import WordNetLemmatizer
from nltk.tree import Tree
from nltk.corpus import wordnet
lemmatizer = WordNetLemmatizer()


from little_heys import *

### 1. Define Grammar
#### Problem: Difficult to define our own grammar

In [2]:
g = open("grammar_beau.txt", "r")
grammar = nltk.CFG.fromstring(g)

### 2. WordNet

#### Get synonyms of all the base verbs, nouns and adjectives before passing them to our grammar
#### Problem: Hard to define our base verbs, nouns and adjs

In [3]:
nltk.download('wordnet')
syns = wordnet.synsets("program")

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
base_verbs = ["increase", "decrease", "swing", "set", "turn-on", 
              "turn", "activate","stop","speed","reduce","raise",
              "pause","lower","dry" ]
base_nouns = ["fan", "temperature", "humidity","air-condition","air","heat" ,"mode","room", "degree"]
base_adjs = ["cold", "hot", "windy"]

bases = [base_verbs, base_nouns, base_adjs]
base_names = ["base_verbs", "base_nouns", "base_adjs"]

verb_net = { key : [key] for key in base_verbs}
noun_net = { key : [key] for key in base_nouns}
adj_net = { key : [key] for key in base_adjs}

lemmatizer = WordNetLemmatizer()

for i in range(len(bases)):
    base = bases[i]
    for to_find in base:
        for syn in wordnet.synsets(to_find):
            for l in syn.lemmas():
                if to_find in base_verbs:
                    if l.name() not in verb_net[to_find]:
                        lemma = lemmatizer.lemmatize(l.name(), pos ='v')
                        verb_net[to_find].append(lemma)
                elif to_find in base_nouns:
                    if l.name() not in noun_net[to_find]:
                        lemma = lemmatizer.lemmatize(l.name(), pos ='n')
                        noun_net[to_find].append(lemma)
                else:
                    if l.name() not in adj_net[to_find]:
                        lemma = lemmatizer.lemmatize(l.name(), pos ='a')                        
                        adj_net[to_find].append(lemma)

In [5]:
# noun_net

### 3. Hey AC!

In [6]:
class hey_ac():
    def __init__(self,sent, grammar, var=False):
        self.orig_sent = sent
        self.sent = sent
#         print("="*50)
#         print("original sentence: ", self.sent)
        self.grammar = grammar
        # Convert words to numbers
        self.var = var
        if self.var == True:
            self.sent = self.sent.split()
            self.sent = [ "hey_num" if i.isdigit() else i for i in self.sent]

        # Preprocessing
        if self.var == True:
            self.parsed = TextBlob(' '.join(self.sent)) 
        else:
            self.parsed = TextBlob(self.sent)
        self.parsed = self.parsed.lower() #Increase TeMpeRATure -> increase temperature
#         print("="*50)
#         print("Sentence (lower case): ", self.parsed)
        
        # Spelling check
        self.parsed = self.parsed.correct() # increese -> increase
#         print("="*50)
#         print("Sentence (correction): ", self.parsed)
        self.sent_prep = []

    def lemmatization(self):
        # Lemmatization
        for i in range(len(self.parsed.words)):
            # lemmatization: colder -> cold 
            # pos = 'a' --> adj, 'v' --> verb, 'n' -> noun
            lemma = lemmatizer.lemmatize(self.parsed.words[i], pos ='a')
            lemma = lemmatizer.lemmatize(lemma, pos ='v')
            lemma = lemmatizer.lemmatize(lemma, pos ='n')
            # lemmatization: increases -> increase
            lemma = lemmatizer.lemmatize(lemma)
            self.sent_prep.append(lemma)
        
        # if contains 'please', remove
        if 'please' in self.sent_prep:
            self.sent_prep.remove('please')
            return self.sent_prep
        else:
            return self.sent_prep
    
    def wordnet(self, to_parse):
        sent_wordnet = []
        self.sent_wordnet_orig = len(to_parse)
#         count_v = []
#         count_n = []
#         count_adj = []
        for word in to_parse:
            for k,v in verb_net.items():
                if word in v:# and len(count_v) == 0:
#                     print('v: ', word, k)
#                     count_v.append(k)
                    sent_wordnet.append(k)
                else:
                    pass
            for k,v in noun_net.items():
                if word in v:# and len(count_n) == 0:
#                     print('n: ', word, k)
#                     count_n.append(k)
                    sent_wordnet.append(k)
                else:
                    pass
            for k,v in adj_net.items():
                if word in v:# and len(count_adj) == 0:
#                     print('adj: ', word, k)
#                     count_adj.append(k)
                    sent_wordnet.append(k)
                else:
                    pass
            if word in ["the","a","an","off","down","up", "on","to","hey_num","by", "from","a.m","at", "pm", "o'clock"]:
                sent_wordnet.append(word)
#             print(sent_wordnet)
#         print(len(sent_wordnet), self.sent_wordnet_orig)
        if len(sent_wordnet) != self.sent_wordnet_orig:
            raise Exception("Sorry I do not understand you")
        else:
            return sent_wordnet
    
    
    def check(self):
        self.to_parse = self.lemmatization()
        self.to_parse = self.wordnet(self.to_parse)      
        rd_parser = nltk.RecursiveDescentParser(self.grammar)
        try:
            for p in rd_parser.parse(self.to_parse):
#                 print("="*50)
#                 print('Leaves: ',p.leaves())
#                 print("="*50)
#                 print(p)
                if var == True:
                    numbers = [i for i in split([self.orig_sent]) if i.isdigit()]
                    for n in numbers:
                        p = str(p).replace('hey_num', n, 1)
                        self.to_parse = str(self.to_parse).replace('hey_num', n, 1)
                        self.to_parse = self.to_parse.strip('][').split(', ')

                return self.to_parse, p
        except:
            raise Exception("Sorry I do not understand you")
            
            
    # Pruning 
    def classify_me(self):
        to_prune, p = self.check()
        VB = []
        NN = []
        CO = []
        RP = []
        for i in Tree.fromstring(str(p)).subtrees():
            if i.label() == 'VB':
                VB.append(i.leaves()[0])
            elif i.label() == 'NN':
                NN.append(i.leaves()[0])
            elif i.label() == 'CO':
                CO.append(i.leaves()[0])
            elif i.label() == 'RP':
                RP.append(i.leaves()[0])
        print('Use me to classify: ')
        print('VB', VB)
        print('NN', NN)
        print('CO', CO)
        print('RP', RP)

### TEST

In [7]:
sentences = [
    "Please turn up the heat",
    "turn the air-condition off please",
    "turn on the fan mode",
    "turn off the fan",
#     "turn down the temperature",
#     "turn down the fan",
    "swing up the air",
    "swing the air-condition",
    "stop the swing",
    "speed up the fan",
    "reduce the temperature",
    "raise the temperature by 3 degrees",
    "pause the swing",
    "increase the humidity",
    "increase the fan speed",
    "activate the fan mode",
    "lower the fan speed",
    "lower the air",
    "dry the room",
    "decrease the humidity",
    "turn on the air-condition from twenty-four pm to one o'clock",
     "please increase the temperature by twenty-five degrees"
 ] 

for i in range(len(sentences)):
    print('='*50)
    sent = sentences[i]
    print('User: ',sent)
    print('='*50)   
    var, sent = check_var(sent)
    hey_ac(sent, grammar, var = var).classify_me()

User:  Please turn up the heat
Use me to classify: 
VB ['turn']
NN ['heat']
CO []
RP ['up']
User:  turn the air-condition off please
Use me to classify: 
VB ['turn']
NN ['air-condition']
CO []
RP ['off']
User:  turn on the fan mode
Use me to classify: 
VB ['turn']
NN ['fan', 'mode']
CO []
RP ['on']
User:  turn off the fan
Use me to classify: 
VB ['turn']
NN ['fan']
CO []
RP ['off']
User:  swing up the air
Use me to classify: 
VB ['swing']
NN ['air']
CO []
RP ['up']
User:  swing the air-condition
Use me to classify: 
VB ['swing']
NN ['air-condition']
CO []
RP []
User:  stop the swing
Use me to classify: 
VB ['stop']
NN ['swing']
CO []
RP []
User:  speed up the fan
Use me to classify: 
VB ['speed']
NN ['fan']
CO []
RP ['up']
User:  reduce the temperature
Use me to classify: 
VB ['reduce']
NN ['temperature']
CO []
RP []
User:  raise the temperature by 3 degrees
Use me to classify: 
VB ['raise']
NN ['temperature']
CO ['3']
RP []
User:  pause the swing
Use me to classify: 
VB ['pause']
NN [

#### NOTE: possible to also convert word->num
Still error because of the incomplete grammar

In [8]:
# sent = "deecrease the temperature twenty-five degrees"
# hey_ac(sent,grammar, var=True).hey_ac_hey() # variable = True