In [None]:
# This is a 'smart' chat bot program

In [1]:
pip install nltk



In [2]:
pip install newspaper3k



In [3]:
# Import the libraries
from newspaper import Article
import random
import string 
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import warnings
warnings.filterwarnings('ignore') # ignore any warning that we get

In [4]:
# Download the punkt package
nltk.download('punkt', quiet=True)


True

In [5]:
# Get the article
article = Article('https://www.mayoclinic.org/diseases-conditions/chronic-kidney-disease/symptoms-causes/syc-20354521')
article.download()
article.parse() # analise
article.nlp() # adding a natural langue to processing it to the article
corpus = article.text

In [6]:
# Print the articles text

print(corpus)

Learn more from kidney doctor Andrew Bentall, M.D.

I'm Dr. Andrew Bentall, a kidney doctor at Mayo Clinic. I look after patients with kidney disease, either in the early stages, or with more advanced kidney disease considering dialysis and transplantation as treatment options. In this video, we'll cover the basics of chronic kidney disease. What is it? Who gets it? The symptoms, diagnosis and treatment. Whether you are looking for answers for yourself or for someone you love, we're here to give you the best information available.

Chronic kidney disease is a disease characterized by progressive damage and loss of function in the kidneys. It's estimated that chronic kidney disease affects about one in seven American adults. And most of those don't know they have it. Before we get into the disease itself, let's talk a little bit about the kidneys and what they do. Our kidneys play many important roles keeping our bodies in balance. They remove waste and toxins, excess water from the blo

In [7]:
# Tokenization

text = corpus
sentence_list = nltk.sent_tokenize(text) # A list of sentences
print(sentence_list)

['Learn more from kidney doctor Andrew Bentall, M.D.', "I'm Dr. Andrew Bentall, a kidney doctor at Mayo Clinic.", 'I look after patients with kidney disease, either in the early stages, or with more advanced kidney disease considering dialysis and transplantation as treatment options.', "In this video, we'll cover the basics of chronic kidney disease.", 'What is it?', 'Who gets it?', 'The symptoms, diagnosis and treatment.', "Whether you are looking for answers for yourself or for someone you love, we're here to give you the best information available.", 'Chronic kidney disease is a disease characterized by progressive damage and loss of function in the kidneys.', "It's estimated that chronic kidney disease affects about one in seven American adults.", "And most of those don't know they have it.", "Before we get into the disease itself, let's talk a little bit about the kidneys and what they do.", 'Our kidneys play many important roles keeping our bodies in balance.', 'They remove wast

In [8]:
# A function to return a random greeting response to a users greeting

def greeting_response(text):
  text = text.lower()
  #Bots greeting response
  bot_greetings = ['howdy', 'hi', 'hey', 'hello',' hola']
  # Users greeting
  user_greeting = ['hey', 'hi', 'greetings', 'hello',' hola', 'wassup']

  for word in text.split():
    if word in user_greeting:
      return random.choice(bot_greetings)

In [9]:
# Create a function to sort the list
def index_sort(list_var):
  length = len(list_var)
  list_index = list(range(0, length)) # a range from 0 to the last index

  x = list_var

  # loop for sort the list
  for i in range(length):
    for j in range(length):
      if x [list_index[i]] > x[list_index[j]]:
        #swap
        temp = list_index[i]
        list_index[i] = list_index[j]
        list_index[j] =  temp

  return list_index

In [10]:
# Create the bot response
def bot_response(user_input):
  user_input = user_input.lower()
  sentence_list.append(user_input)
  bot_response = ''
  cm = CountVectorizer().fit_transform(sentence_list) # transform the sentence list in a count matrix 
  similarity_scores = cosine_similarity(cm[-1], cm) # last sentence the users input, and compare it to everything in the count matrix. This give us the similarity score to the user input
  similarity_scores_list = similarity_scores.flatten() # this reduce the dimensionality of the similarity score and store in the variable
  # find the index of the highest score in the similarity scores list, for this use the function sort to sort the list and find the index
  index = index_sort(similarity_scores_list) # index will gonna be the values sort from the highest values and similarity score to lowest value.
                                             # the index find the highest value and placing it at the lowest index and the index_list
  index = index[1:]# contain all of the elements from one onward
  response_flag = 0 # use this to see if there's a response back to the user if the similarity scores found a sentence that similar in the text to what the userrs query is or user's input is

  j = 0
  # if we found two or less similar sentences and it's gonna only bring back the top two similar sentences to the users input 

  for i in range (len(index)):
    if similarity_scores_list[index[i]] > 0.0 : # if the highest score is greater than 0, means that we have a similarity, ie it means we found a sentence that similars to users input
      # then we want the bot response to be equal to the bot response plus the most similar so that's in the sentence_score_list and it's at the position index position i
      bot_response = bot_response + ' ' + sentence_list[index[i]]
      response_flag = 1 # let u know that it found a response
      j += 1 # represents how many scores there are that are above zero
      # limit the number of sentences back if j is greater than 2
      if j > 2:
        break
  if response_flag == 0:
    bot_response = bot_response + " " + "I apologize, I don't understand."

  # now that we done with the sentence list we remove that users response from the sentence list
  sentence_list.remove(user_input)

  return bot_response

In [12]:
# Start the chat

print ('Doc Bot: I am Doctor Bot or Doc Bot for short. I will answer your queries about Chronic Kidney Disease. If you want to exit, type bye.')

exit_list = ['exit', 'see you later', 'bye', 'quit', 'break']

# Let's create a list that basically tells this loop to stop
while(True):
  user_input = input()
  if user_input.lower() in exit_list:
    print("Doc Bot: Chat with you later!")
    break
  else:
    # the but give back a greety
    if greeting_response(user_input) != None:
      print('Doc Bot: ' + greeting_response(user_input))
    else:
      print("Dot Bot: " + bot_response(user_input))

Doc Bot: I am Doctor Bot or Doc Bot for short. I will answer your queries about Chronic Kidney Disease. If you want to exit, type bye.
hi 
Doc Bot: hey
what is chronic kidney disease?
Dot Bot:  Chronic kidney disease is a disease characterized by progressive damage and loss of function in the kidneys. What is it? Diabetes is the most common cause of kidney disease.
bye
Doc Bot: Chat with you later!


In [None]:
  # Testing
  user_input = ' hello world '
  sentence_list.append(user_input)
  bot_response = ''
  cm = CountVectorizer().fit_transform(sentence_list)
  similarity_scores = cosine_similarity(cm[-1], cm)
  similarity_scores_list = similarity_scores.flatten() 
  index = index_sort(similarity_scores_list)

In [None]:
# Now we know where the highest values are and the similarity scores list
similarity_scores_list

In [None]:
index # contains the highest values (last indexes of similarity_scores_list with value 1) at position 0 