In [79]:
pip install spacy



In [80]:
pip install newspaper3k



In [0]:
from newspaper import Article
import random
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import spacy
import numpy as np
import warnings


In [0]:
nlp = spacy.load("en_core_web_sm")

In [0]:
warnings.filterwarnings('ignore')

In [93]:
# Loading the article using Newspaper

article1 = Article('https://www.who.int/health-topics/coronavirus#tab=tab_1')
article2=Article('https://globalnews.ca/news/6809479/ontario-coronavirus-covid-19-cases-april-12/')
article3=Article('https://www.webmd.com/lung/qa/how-can-i-prevent-coronavirus-infection')

article1.download()
article1.parse()
article2.download()
article2.parse()
article3.download()
article3.parse()

article.text=article1.text+article2.text+article3.text
corpus=nlp(article.text)


print(corpus)


Coronavirus disease (COVID-19) is an infectious disease caused by a newly discovered coronavirus.



Most people infected with the COVID-19 virus will experience mild to moderate respiratory illness and recover without requiring special treatment. Older people, and those with underlying medical problems like cardiovascular disease, diabetes, chronic respiratory disease, and cancer are more likely to develop serious illness.



The best way to prevent and slow down transmission is be well informed about the COVID-19 virus, the disease it causes and how it spreads. Protect yourself and others from infection by washing your hands or using an alcohol based rub frequently and not touching your face.

The COVID-19 virus spreads primarily through droplets of saliva or discharge from the nose when an infected person coughs or sneezes, so it’s important that you also practice respiratory etiquette (for example, by coughing into a flexed elbow).

At this time, there are no specific vaccines or t

In [94]:
#  Sentence Tokenisation
text = corpus
sent_tokens=[]
for sentenc in text.sents:
  sent_tokens.append(sentenc.string)
  
print(sent_tokens)


['Coronavirus disease (COVID-19) is an infectious disease caused by a newly discovered coronavirus.\n\n\n\n', 'Most people infected with the COVID-19 virus will experience mild to moderate respiratory illness and recover without requiring special treatment. ', 'Older people, and those with underlying medical problems like cardiovascular disease, diabetes, chronic respiratory disease, and cancer are more likely to develop serious illness.\n\n\n\n', 'The best way to prevent and slow down transmission ', 'is be well informed about the COVID-19 virus, the disease it causes and how it spreads. ', 'Protect yourself and others from infection by washing your hands or using an alcohol based rub frequently and not touching your face.\n\n', 'The COVID-19 virus spreads primarily through droplets of saliva or discharge from the nose when an infected person coughs or sneezes, so ', 'it’s important that you also practice respiratory etiquette (for example, by coughing into a flexed elbow).\n\n', 'At 

In [95]:
type(text)

spacy.tokens.doc.Doc

In [0]:
#converting user response to spacy token
def responsetotoken(user_response):
    return nlp(user_response)


# lemmatized lowercase words after removing punctuations with SPACY
def LemNormalize(text):
  text=responsetotoken(text)
  return [token.lower_ for token in text if not token.is_stop and not token.is_punct]






In [0]:
# Intent Classification

GREETING_INPUTS = ["hi","hello","hola","greetings","wassup","hey","howdy","namastey"]
GREETING_RESPONSES = ["howdy","hi","hey","what's good", "hello", "hey there","Greetings,human"]

INTENT_SUPPORT = ["support","financial support","economic support"]
INTENT_SUPPORT_RESPONSES = ["Which kind of support would you like to know more about? individual or business"]

INTENT_IND = ["support for individuals","individual support","individual","financial support for business"]
INTENT_IND_RESPONSES= ["There are three types of support for individuals: CERB, EI benefits and Other support. Which one would you like to know about?"]

INTENT_CERB= ["cerb"]
INTENT_CERB_RESPONSES = ["Here's the page you can view : https://www.canada.ca/en/services/benefits/ei/cerb-application.html?topic=Economic+and+financial+support&economic=Support+for+individuals&individuals=The+Canada+Emergency+Response+Benefit+%28CERB%29"]

INTENT_EI = ["ei","employee ensurance","employee insurance benefits","ei benefits","others","other","other benefits"]
INTENT_EI_RESPONSES = ["This page can help you : https://www.canada.ca/en/services/benefits/ei/ei-sickness/qualify.html?topic=Economic+and+financial+support&economic=Support+for+individuals&individuals=Improved+access+to+Employment+Insurance"]

INTENT_BUSINESS = ["support for business", "business support","financial support  for business","business"]
INTENT_BUS_RESPONSES = ["This page can help you : https://www.canada.ca/en/department-finance/economic-response-plan.html?topic=Economic+and+financial+support&economic=Support+for+businesses#businesses"]

BYE_INPUTS = ["thanks","thank","thankyou","thank you"]
BYE_RESPONSES = ["welcome! Glad to be of help","You are welcome","My Pleasure"]


# Function to return a random greeting response

def greeting(sentence):
  for word in sentence.split():
    if word.lower() in GREETING_INPUTS:
      return random.choice(GREETING_RESPONSES)
    elif word.lower() in INTENT_SUPPORT:
      return random.choice(INTENT_SUPPORT_RESPONSES)
    elif word.lower() in INTENT_IND:
      return random.choice(INTENT_IND_RESPONSES)
    elif word.lower() in INTENT_CERB:
      return random.choice(INTENT_CERB_RESPONSES)
    elif word.lower() in INTENT_BUSINESS:
      return random.choice(INTENT_BUS_RESPONSES)
    elif word.lower() in INTENT_EI:
      return random.choice(INTENT_EI_RESPONSES)
    elif word.lower() in BYE_INPUTS:
      return random.choice(BYE_RESPONSES)


In [0]:
# generate the response

def response(user_response):

  # The users response / query
  user_response = user_response.lower() # make the response lower case

  #print (user_response)
  #set the chatbot response to an empty string
  robo_response =''

  #Append the users response to the sentence list
  sent_tokens.append(user_response)
  #print(sent_tokens)




  #Create a Tfidfvectorizer Object
  TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words='english')

  # convert the text to a matrix of TF-Idf features
  tfidf = TfidfVec.fit_transform(sent_tokens)

  #### Print the TFIDF features
  #print(tfidf)

  # get the measure of similarity
  vals = cosine_similarity(tfidf[-1], tfidf)
  #print (vals)


  #Get the index of the most similar text for users response
  idx = vals.argsort()[0][-2]

  # Reduce the dimensionallity of vals
  flat = vals.flatten()

  # sort the list in ascending order
  flat.sort()

  # get the most similar score to the users response
  score = flat[-2]

  # print the similarity score
  #print(score)

  # if the variable score is 0 then the most similar score to the users response
  if (score == 0):
    robo_response = robo_response+"I apologize, I don't understand."
  else:
    robo_response = robo_response+sent_tokens[idx]

  # Print the chatbot response
  #print(robo_response)
  sent_tokens.remove(user_response)

  return robo_response

In [105]:
flag = True
print("DOCBot: I am Doctor Bot or DOCBot for short. I will answer your queries about Coronavirus. If you want to exit, type Bye!")
while (flag == True):
  user_response = input()
  user_response = user_response.lower()
  if(user_response !='bye'):    
    if(greeting(user_response) !=None):
      print("DOCBot: "+greeting(user_response))
    else:
      print("DOCBot:  "+response(user_response))
            
  else:
    flag = False
    print("DOCBot: Chat with you later !")

DOCBot: I am Doctor Bot or DOCBot for short. I will answer your queries about Coronavirus. If you want to exit, type Bye!
hello
DOCBot: hello
thanks
DOCBot: You are welcome
hello
DOCBot: Greetings,human
hello
DOCBot: hey there
thankyou
DOCBot: My Pleasure
bye
DOCBot: Chat with you later !
