In [None]:
import nltk
import numpy as np
import random
import string
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import warnings
warnings.filterwarnings('ignore')

#nltk.download('popular')
#nltk.download('punkt')
#nltk.download('wordnet')

In [None]:
# Reading in the data
with open('input.txt','r', encoding='utf8', errors ='ignore') as fin:
    raw = fin.read().lower()
    
sent_tokens = nltk.sent_tokenize(raw) # converts to list of sentences
word_tokens = nltk.word_tokenize(raw) # converts to list of words

In [None]:
# Pre-processing the raw text
lemmer = WordNetLemmatizer()
def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))


In [None]:
# Keyword Matching
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up","hey",)
GREETING_RESPONSES = ["hi", "hey", "*nods*", "hi there", "hello", "I am glad! You are talking to me"]

def greeting(sentence):
    """If user's input is a greeting, returns a greeting response"""
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

In [None]:
# Generating response
def response(user_response):
    robo_response=''
    sent_tokens.append(user_response)

    # Tfidf vectorizer to convert a collection of raw documemts to a matrix of TF-IDF features
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)

    # Cosine similarity is used to find the similarity betwenn words
    # entered by user and the words in the corpus
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx=vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf==0):
        robo_response=robo_response+"I am sorry! I don't understand you"
        return robo_response
    else:
        robo_response = robo_response+sent_tokens[idx]
        return robo_response

In [147]:
flag=True
print("BOT: My name is BOT. I will answer your queries about COVID-19. \nIf you want to exit, type Bye!")
while(flag==True):
    user_response = input("\nYou: ")
    user_response=user_response.lower()
    if(user_response!='bye'):
        if(user_response=='thanks' or user_response=='thank you' ):
            flag=False
            print("BOT: You are welcome..")
        else:
            if(greeting(user_response)!=None):
                print("BOT: "+greeting(user_response))
            else:
                print("BOT: ",end="")
                print(response(user_response))
                sent_tokens.remove(user_response)
    else:
        flag=False
        print("ROBO: Bye! take care..")    

BOT: My name is BOT. I will answer your queries about COVID-19. 
If you want to exit, type Bye!

You: Hello
BOT: I am glad! You are talking to me

You: what is covid-19 full form
BOT: full form of covid-19 
coronavirus disease 2019

about coronavirus
coronavirus disease 2019 (covid-19) is a contagious respiratory and vascular disease caused by severe acute respiratory syndrome coronavirus 2 (sars-cov-2).

You: what are common symptoms
BOT: some symptoms of covid-19 can be relatively non-specific; the two most common symptoms are fever (88 percent) and dry cough (68 percent).among those who develop symptoms, approximately one in five may become more seriously ill and have difficulty in breathing.

You: Are there any preventive measures
BOT: preventive measures
preventive measures include social distancing, quarantining, ventilation of indoor spaces, covering coughs and sneezes, hand washing, and keeping unwashed hands away from the face.

You: Is there any vaccine
BOT: vaccine for coron