In [1]:
import nltk
import numpy as np
import random
import string
import bs4 as bs
import urllib.request
import re
import warnings

nltk.download('punkt')
nltk.download('wordnet')
warnings.filterwarnings("ignore")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


**Fetch and process Wikipedia data**

In [2]:
get_link = urllib.request.urlopen('https://en.wikipedia.org/wiki/Artificial_intelligence')
get_link = get_link.read()

data = bs.BeautifulSoup(get_link, 'lxml')
data_paragraphs = data.find_all('p')
data_text = ''

for para in data_paragraphs:
    data_text += para.text

data_text = data_text.lower()

In [3]:
data_text = re.sub(r'\[[0-9]*\]', ' ', data_text)
data_text = re.sub(r'\s+', ' ', data_text)

**Tokenize the processed text**

In [4]:
data_sentences = nltk.sent_tokenize(data_text)
data_words = nltk.word_tokenize(data_text)

**Lemmatizer and text processing**

In [5]:
wnlemmatizer = nltk.stem.WordNetLemmatizer()

def perform_lemmatization(tokens):
    return [wnlemmatizer.lemmatize(token) for token in tokens]

punctuation_removal = dict((ord(punctuation), None) for punctuation in string.punctuation)

def get_processed_text(document):
    return perform_lemmatization(nltk.word_tokenize(document.lower().translate(punctuation_removal)))

**Greeting handling**

In [6]:
greeting_inputs = ("hey", "good morning", "good evening", "morning", "evening", "hi", "whatsup")
greeting_responses = ["hey", "hey how's you?", "nods", "hello, how you doing", "hello", "Welcome, I am good and you"]

def generate_greeting_response(greeting):
    for token in greeting.split():
        if token.lower() in greeting_inputs:
            return random.choice(greeting_responses)


In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

**Generate a bot response**

In [8]:
def generate_response(user_input):
    bot_response = ''
    data_sentences.append(user_input)
    word_vectorizer = TfidfVectorizer(tokenizer=get_processed_text, stop_words='english')
    all_word_vectors = word_vectorizer.fit_transform(data_sentences)
    similar_vector_values = cosine_similarity(all_word_vectors[-1], all_word_vectors)
    similar_sentence_number = similar_vector_values.argsort()[0][-2]

    matched_vector = similar_vector_values.flatten()
    matched_vector.sort()
    vector_matched = matched_vector[-2]

    if vector_matched == 0:
        bot_response = "I am sorry! I could not understand you."
    else:
        bot_response = data_sentences[similar_sentence_number]

    data_sentences.pop(-1)
    return bot_response

**Chatbot conversation loop**

In [9]:
continue_dialog = True
print("Hello I am from AI Sciences, you can ask me anything regarding AI")
while continue_dialog:
    human_text = input().lower()
    if human_text != 'bye':
        if human_text in ['thanks', 'thank you very much', 'thank you']:
            continue_dialog = False
            print('AI Sciences: Most Welcome')
        else:
            if generate_greeting_response(human_text) is not None:
                print('AI Sciences: ' + generate_greeting_response(human_text))
            else:
                print("AI Sciences: " + generate_response(human_text))
    else:
        continue_dialog = False
        print("AI Sciences: Goodbye and take care of yourself.")


Hello I am from AI Sciences, you can ask me anything regarding AI
hi
AI Sciences: hello, how you doing
what is artificial intelligence
AI Sciences:  artificial intelligence (ai), in its broadest sense, is intelligence exhibited by machines, particularly computer systems.
what is natural language processing
AI Sciences: natural language processing (nlp) allows programs to read, write and communicate in human languages such as english.
data scraping
AI Sciences: sensitive user data collected may include online activity records, geolocation data, video or audio.
difference between web scraping and data scraping 
AI Sciences: from the early days of the development of artificial intelligence, there have been arguments, for example, those put forward by joseph weizenbaum, about whether tasks that can be done by computers actually should be done by them, given the difference between computers and humans, and between quantitative calculation and qualitative, value-based judgement.
bye
AI Scien