'''
    The below chatbot books a Restaurant for you, given your specific preferences 
    and also, books a Doctor's appointment.
    
    I am identifying the intent of the user based on the classification algorith which 
    uses the Bag of Words feature extraction, with the Linear SVM classifier (Bonus criterion).
    
    As I have a relatively big dataset (250+ rows), I was able to achieve the accuracy of ~95% 
    for the dataset.
    
    High Level Algo:
        1. Find the intent of the user and set the context for either booking restaurant/doctor
        2. Collect the parameters iteratively
        3. Filter and present the most appropriate results
        4. Send book instruction
'''

In [1]:
from Contexts import *
import json
from Intents import *
import random
import os
import re
import numpy as np
import pandas as pd

In [2]:
def check_required_params(current_intent, attributes, context):
    '''Collects attributes pertaining to the current intent'''
    for para in current_intent.params:
        if para.required:
            if para.name not in attributes:
                #Example of where the context is born, implemented in Contexts.py
                if para.name=='RegNo':
                    context = GetRegNo()
                #returning a random prompt frmo available choices.
                return random.choice(para.prompts), context

    return None, context


def input_processor(user_input, context, attributes, curr_intent):
    '''Spellcheck and entity extraction functions go here'''
    #update the attributes, abstract over the entities in user input
    attributes, cleaned_input = getattributes(user_input, context, attributes)
    
    return attributes, cleaned_input

def loadIntent(path, intent):
    with open(path) as fil:
        dat = json.load(fil)
        intent = dat[intent]
        return Intent(intent['intentname'], intent['Parameters'], intent['actions'])

import re
only_alnum = re.compile(r"[^a-z0-9]+")
## Replaces one or more occurrence of any characters other than a-z and 0-9 with a space
## This automatically replaces multiple spaces by 1 space

def cleanUp(s):
    return re.sub(only_alnum, " ", s.lower())

fluff = set([w.strip() for w in open("fluff.txt")])
def get_useful_words(s):
    return [w for w in cleanUp(s).split() if len(w) > 2 and w not in fluff]

### IAS: TODO TODO 
word_vector_size = 501

def convert_to_BoW(dataset, number_of_documents, label_list, valid_words):
    bow_representation = np.zeros((number_of_documents, word_vector_size))
    labels = np.zeros((number_of_documents, 1))
    
    i = 0
    for text in dataset:
        # Read all text in file
        # For each word
        for word in text:
            if word in valid_words:
                bow_representation[i, valid_words[word]] += 1

        # Increment document counter
        i += 1
    
    return bow_representation, label_list

def intentIdentifier(clean_input, context,current_intent):
    clean_input = clean_input.lower()

    from sklearn.externals import joblib
    import operator
    from functools import reduce

    clf = joblib.load('clf.pkl') 
    valid_words = read_dictionary = np.load('valid_words.npy').item()

    useful_words = reduce(operator.concat, [get_useful_words(statement) for statement in clean_input.split(' ')])
    bow_rep, _ = convert_to_BoW([useful_words], 1, None, valid_words)
    if clf.predict(bow_rep) == 0: # Restaurant
        return loadIntent('params/newparams.cfg', 'RestaurantSearch')
    else:
        return loadIntent('params/newparams.cfg', 'DoctorAppointment')
    
files = os.listdir('./entities/')
entities = {}
for fil in files:
    if not fil.startswith('.'):
        lines = open('./entities/'+fil).readlines()
        for i, line in enumerate(lines):
            lines[i] = line[:-1]
        entities[fil[:-4]] = '|'.join(lines)
    
def getattributes(uinput, context, attributes):
    '''This function marks the entities in user input, and updates
    the attributes dictionary'''
    #Can use context to context specific attribute fetching
    if context.name.startswith('IntentComplete'):
        return attributes, uinput
    else:
        #Extract entity and update it in attributes dict
        for entity in entities:
            for i in entities[entity].split('|'):
                if i.lower() in uinput.lower():
                    attributes[entity] = i
        for entity in entities:
                uinput = re.sub(entities[entity], r'$'+entity, uinput, flags=re.IGNORECASE)

        return attributes, uinput

In [3]:
class Session:
    def __init__(self, attributes=None, active_contexts=[FirstGreeting(), IntentComplete() ]):
        
        '''Initialise a default session'''
        
        #Active contexts not used yet, can use it to have multiple contexts
        self.active_contexts = active_contexts
        
        #Contexts are flags which control dialogue flow, see Contexts.py        
        self.context = FirstGreeting()
        
        #Intent tracks the current state of dialogue
        #self.current_intent = First_Greeting()
        self.current_intent = None
        
        #attributes hold the information collected over the conversation
        self.attributes = {}

    def get_probable_restaurants(self):
        data = pd.read_csv("relational_data/restaurants.csv")
        db_attributes = list(data.columns.values)[:-1]
        
        for f, v in self.attributes.items():
            if v and f in db_attributes:
                data = data.loc[data[f] == v]
                
        return data.Restaurant.values

    def get_probable_doctors(self):
        data = pd.read_csv("relational_data/doctors.csv")
        db_attributes = list(data.columns.values)[:-1]
        
        for f, v in self.attributes.items():
            if v and f in db_attributes:
                data = data.loc[data[f] == v]
                
        return data.Doctor.values    
    
    def check_actions(self, current_intent, attributes, context):
        '''This function performs the action for the intent
        as mentioned in the intent config file'''
        '''Performs actions pertaining to current intent
        for action in current_intent.actions:
            if action.contexts_satisfied(active_contexts):
                return perform_action()
        '''
        rstr = "Booking Options: "
        probable_hits = ''
        self.context = IntentComplete()
        ### IAS: Get the Restaurants/Doctors satisfying the attributes
        if self.current_intent.name == 'RestaurantSearch':
            probable_hits = self.get_probable_restaurants()
        elif self.current_intent.name == 'DoctorAppointment':
            probable_hits = self.get_probable_doctors()
        else:
            rstr = 'Error!!! Please restart'
            
        print('****************** Booked: ', self.attributes, 'Intent: ', self.current_intent.name)
        return rstr + str(probable_hits), self.context
        
    def update_contexts(self):
        '''Not used yet, but is intended to maintain active contexts'''
        for context in self.active_contexts:
            if context.active:
                context.decrease_lifespan()

    def reply(self, user_input):
        '''Generate response to user input'''
        if not self.current_intent:
            self.current_intent = intentIdentifier(user_input, self.context, self.current_intent)
        self.attributes, clean_input = input_processor(user_input, self.context, self.attributes, self.current_intent)
        prompt, self.context = check_required_params(self.current_intent, self.attributes, self.context)
        
        #prompt being None means all parameters satisfied, perform the intent action
        if prompt is None:
            if self.context.name != 'IntentComplete':
                prompt, self.context = self.check_actions(self.current_intent, self.attributes, self.context)
        
        #Resets the state after the Intent is complete
        if self.context.name=='IntentComplete':
            self.attributes = {}
            self.context = FirstGreeting()
            self.current_intent = None
        
        return prompt

In [4]:
session = Session()

print ('BOT: Hi! How may I assist you?')

while True:
    
    inp = input('User: ')
    print ('BOT:', session.reply(inp))

BOT: Hi! How may I assist you?
User: having pain in ass
BOT: Who specific speciality are you looking for?
User: GYN
BOT: What's your preferred location?
User: north
BOT: What timeslot you are looking for (morning/afternoon/evening)?
User: afternoon
****************** Booked:  {'Speciality': 'GYN', 'Location': 'north', 'timeslot': 'afternoon'} Intent:  DoctorAppointment
BOT: Booking Options: ['Dr. GYN North Afternoon Guy']
User: pain in eye
BOT: What's the speciality?
User: OPTH
BOT: What's your preferred location?
User: Hyd north
BOT: Please specify a timeslot (morning/afternoon/evening)
User: evening
****************** Booked:  {'Speciality': 'OPTH', 'Location': 'north', 'timeslot': 'evening'} Intent:  DoctorAppointment
BOT: Booking Options: ['Dr. OPTH North Evening Guy']


KeyboardInterrupt: 

In [None]:
# with open('params/newparams.cfg') as fil:
#     json.load(fil)

In [None]:
# import pandas as pd
# data = pd.read_csv("relational_data/restaurants.csv")
# attributes = {'budget': 'bad', 'cuisine': 'Chinese', 'timeslot': '', 'Location': 'south'}
# # attributes = {'budget': 'bad'}
# for f, v in attributes.items():
#     if v:
#         print(f, v)
#         data = data.loc[data[f] == v]
# data.Restaurant.values
# data.loc[data['budget'] == 'cheap']




In [None]:
#**Booked:  {'Speciality': 'ENT', 'Location': 'south', 'timeslot': 'morning'} Intent:  DoctorAppointment
# import pandas as pd
# data = pd.read_csv("relational_data/doctors.csv")
# attributes = {'Speciality': 'GYN', 'Location': 'south', 'timeslot': 'morning'}

# db_attributes = list(data.columns.values)[:-1]
# for f, v in attributes.items():
#     if v and f in db_attributes:
#         print(f, v)
#         data = data.loc[data[f] == v]
# data.Doctor.values
