In [9]:
import os
import time
import re
import slackclient
import pandas as pd
import numpy as np

#NLTK
import nltk
from nltk.tokenize import TreebankWordTokenizer
from nltk.corpus import stopwords
from nltk.stem import wordnet
nltk.download('stopwords')
from nltk import pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
stopwords = stopwords.words('english')
from nltk import word_tokenize, sent_tokenize

#SKLEARN
from sklearn.metrics import pairwise_distances # to perfrom cosine similarity
from sklearn.feature_extraction.text import TfidfVectorizer # to perform tfidf

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/ankitkothari/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/ankitkothari/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/ankitkothari/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/ankitkothari/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


#### CONNECT THE SLACKBOT BY THIS SECRET TOKEN 

In [10]:
slack_clients = slackclient.SlackClient('<slack_secret_token>')

#### DATA: QUESTIONS AND RESPONSES

In [12]:
pd.set_option('display.max_colwidth', -1)
data = pd.read_csv('./coffee.csv')
data.ffill(axis=0, inplace= True)
context = data['Context'].values
context_string = data['Context'].str.cat(sep='\n')
data.head()

Unnamed: 0,Context,Text Response
0,What is the best temperature to brew coffee?,"According to chemical studies, the optimal water temperature for drip coffee is 95-98C. According to my notes, colder water doesn't extract enough caffeine/essential oils from the beans, and above such temperature the acidity increases wildly."
1,Quality of coffee,"The quality of a brew depends on the following factors (in no particular order):\nTime since grinding the beans.\nTime since roasting.\nCleanliness with brewing equipment.\nBean quality (what crop, etc.).\nWater quality.\n"
2,What is the difference between arabica and robusta?,"Arabica beans and robusta beans are two different species of coffee. They are the primary species of coffee that find their way into the American cup. The general differences are those of taste, and the conditions under which the two species differ in production."
3,Just how much ground coffee do I need for x amount of coffee?,a. Whatever seems right to you. b. It may change slightly from coffee to coffee and according to freshness.
4,What are the different between Preparation Methods\n,Drip\nFrench Press\nEspresso\nPercolator


#### Text Preprocessing and not removing stop-words

In [14]:
def nltk_cleaning(text):
  token_text = word_tokenize(text)
  clean_text = []
  lemma = wordnet.WordNetLemmatizer()
  tag_list = pos_tag(token_text, tagset=None)
  for token, pos_token in tag_list:
   if token not in '\n\n \n\n\n!"-#$%&()--.*''+,-/:;``<=>[``?@[\\]^_`''{|}~\t\n`\'\'':
     if pos_token.startswith('V'):  # Verb
         pos_val='v'
     elif pos_token.startswith('J'): # Adjective
         pos_val='a'
     elif pos_token.startswith('R'): # Adverb
         pos_val='r'
     else:
         pos_val='n' # Noun
     lemma_token= lemma.lemmatize(token,pos_val)
     clean_text.append(lemma_token.lower())
   else:
      continue 
  return " ".join(clean_text)
data['nltk_cleaning']= data['Context'].apply(nltk_cleaning)
data['nltk_cleaning'].head()

0    what be the best temperature to brew coffee                 
1    quality of coffee                                           
2    what be the difference between arabica and robusta          
3    just how much ground coffee do i need for x amount of coffee
4    what be the different between preparation methods           
Name: nltk_cleaning, dtype: object

#### Text Vectorization TFIDF

In [15]:
tfidf = TfidfVectorizer() # intializing the count vectorizer
X = tfidf.fit_transform(data['nltk_cleaning']).toarray()
features = tfidf.get_feature_names()
df_idf = pd.DataFrame(X, columns = features)
Question_tf ='thanks for your support!'
Question_lemma_tf = nltk_cleaning(Question_tf) # applying the function that we created for text normalizing
Question_tf = tfidf.transform([Question_lemma_tf]).toarray() # applying bow
cosine_value_tf = 1- pairwise_distances(df_idf, Question_tf, metric = 'cosine' )
index_value = cosine_value_tf.argmax() # returns the index number of highest value
data['similarity_tfidf']=cosine_value_tf # creating a new column
#print(data.sort_values('similarity_tfidf', ascending=False).iloc[0:2])
df_simi_tf = pd.DataFrame(data, columns=['Text Response','similarity_tfidf']) # taking similarity value of responses for the question we took
#print(df_simi_tf.sort_values('similarity_tfidf', ascending=False))
ending_text = ['bye','done', 'thanks', 'exit', 'ok', 'x']

#### Parsing the Incoming Commands from Slack

In [16]:
def parse_bot_commands(slack_events):
    for event in slack_events:
        if event["type"] == "message" and not "subtype" in event:
            message = event["text"]
            return message, event["channel"], event['user']
    return None, None, None


#### How to interpret the incoming message from Slack and send appropriate responses

In [17]:
def handle_command(command, channel,user):
    # Default response is help text for the user
    accepted_users = ['<slack_user_id>']
    ending_text = ['bye','done', 'thanks', 'exit', 'ok', 'x']
    if user not in accepted_users:
          response = "Not an authorised user"
    elif command in ending_text:
          response = "Bye! Thanks for chatting"
    else:
      try:
        Question_lemma_tf = nltk_cleaning(command) # applying the function that we created for text normalizing
        Question_tf = tfidf.transform([Question_lemma_tf]).toarray() # applying bow
        cosine_value_tf = 1- pairwise_distances(df_idf, Question_tf, metric = 'cosine' )
        index_value = cosine_value_tf.argmax()
        response = data['Text Response'].loc[index_value]
      except:
          response='Sorry, Not sure what you mean'
        
    

    # Sends the response back to the channel
    slack_clients.api_call(
        "chat.postMessage",
        type="divider",
        channel=channel,
        #text=':coffee:'+':coffee:'+"*"+response+"*" + ':coffee:'+':coffee:',
        attachments=[{
        "blocks":[
		{
			"type": "section",
			"block_id": "section567",
			"text": {
				"type": "mrkdwn",
				"text": ':coffee:'+':coffee:'+"*"+response+"*" + ':coffee:'+':coffee:'
			},
			"accessory": {
				"type": "image",
				"image_url": "https://png.pngtree.com/png-clipart/20190903/original/pngtree-yellow-woven-bag-of-coffee-beans-png-image_4418658.jpg",
				"alt_text": "Haunted hotel image"
			}}
		]}]
    )

##Running the application
print("Done")

Done


#### Making the SLACK API call to read all the incoming text

In [None]:
# constants
RTM_READ_DELAY = 1 # 1 second delay between reading from RTM
if __name__ == "__main__":
    if slack_clients.rtm_connect(with_team_state=False):
        print("Starter Bot connected and running!")
        # Read bot's user ID by calling Web API method `auth.test`
        starterbot_id = slack_clients.api_call("auth.test")["user_id"]
        while True:
            command, channel, user = parse_bot_commands(slack_clients.rtm_read())
            if command == 'shutdownthebot':
                break
            else:
               if command is None:
                  continue
               else:
                  handle_command(command, channel, user)
            time.sleep(RTM_READ_DELAY)
    else:
        print("Connection failed. Exception traceback printed above.")

Starter Bot connected and running!
