## Text Summarization

In [4]:
from gensim.summarization import summarize

text = '''
TextRank is based on PageRank algorithm that is used on Google Search Engine. Its base concept is 
"The linked page is good, much more if it from many linked page". The links between the pages are expressed 
by matrix (like Round-robin table). We can convert this matrix to transition probability matrix by dividing 
the sum of links in each page. And the page surfer moves the page according to this matrix.
'''

summarize(text, ratio=0.3)

'The links between the pages are expressed \nAnd the page surfer moves the page according to this matrix.'

In [8]:
# Read in Declaration of Independence
f = open("./data/declaration_of_independence.txt", "r")
declaration = f.read()

# Summarize the document
summarize(declaration, ratio=0.05).split('\n')

["The unanimous Declaration of the thirteen united States of America, When in the Course of human events, it becomes necessary for one people to dissolve the political bands which have connected them with another, and to assume among the powers of the earth, the separate and equal station to which the Laws of Nature and of Nature's God entitle them, a decent respect to the opinions of mankind requires that they should declare the causes which impel them to the separation.",
 'He has refused for a long time, after such dissolutions, to cause others to be elected; whereby the Legislative powers, incapable of Annihilation, have returned to the People at large for their exercise; the State remaining in the mean time exposed to all the dangers of invasion from without, and convulsions within.']

## Keyword Extraction

In [10]:
from gensim.summarization import keywords

keywords(text, ratio=0.5, split=True, lemmatize=True)

['pages', 'search', 'surfer', 'probability', 'round', 'matrix', 'like', 'base']

In [15]:
from collections import Counter

In [18]:
Counter(declaration.split()).most_common(50)

[('of', 78),
 ('the', 76),
 ('to', 64),
 ('and', 55),
 ('our', 25),
 ('for', 20),
 ('their', 20),
 ('has', 20),
 ('in', 18),
 ('He', 18),
 ('a', 15),
 ('these', 13),
 ('by', 13),
 ('have', 11),
 ('them', 11),
 ('that', 10),
 ('all', 10),
 ('is', 10),
 ('which', 9),
 ('with', 9),
 ('be', 9),
 ('his', 9),
 ('For', 9),
 ('are', 8),
 ('on', 8),
 ('We', 7),
 ('Laws', 6),
 ('they', 6),
 ('from', 6),
 ('us', 6),
 ('it', 5),
 ('among', 5),
 ('such', 5),
 ('most', 5),
 ('an', 5),
 ('should', 4),
 ('as', 4),
 ('right', 4),
 ('been', 4),
 ('Assent', 4),
 ('large', 4),
 ('at', 4),
 ('time', 4),
 ('States', 3),
 ('powers', 3),
 ('them,', 3),
 ('hold', 3),
 ('People', 3),
 ('new', 3),
 ('its', 3)]

In [14]:
keywords(declaration, split=True, lemmatize=True, scores=True)

[('new', 0.18274612451912534),
 ('government', 0.1704268404672953),
 ('laws', 0.15023721944877813),
 ('large', 0.1394680839981137),
 ('establish', 0.13581001475788437),
 ('right', 0.12343159322740224),
 ('hither', 0.12121412023708288),
 ('power', 0.1148350292906247),
 ('times', 0.11449164313896629),
 ('peace', 0.11394267290295407),
 ('absolute', 0.11293090860696055),
 ('long', 0.10872930877644976),
 ('war', 0.10027328418022759),
 ('state', 0.10024546224147723),
 ('united', 0.0988677173113322),
 ('jurisdiction foreign', 0.09523350837414297),
 ('mankind', 0.09219102102538687),
 ('purpose', 0.09219102102538682),
 ('domestic', 0.09219102102538677),
 ('seas', 0.09219102102538668),
 ('political', 0.09219102102538666),
 ('firm', 0.09219102102538666),
 ('unusual', 0.09219102102538661),
 ('object', 0.0921910210253866),
 ('indian', 0.0921910210253866),
 ('ages', 0.09219102102538654),
 ('repeated', 0.0921910210253865),
 ('arms', 0.09219102102538644),
 ('equal', 0.09219102102538643),
 ('pursuing',

## Training a ChatBot with ChatterBot

In [19]:
from chatterbot import ChatBot
from chatterbot.trainers import ListTrainer

chatbot = ChatBot("First Chatbot")

conversation = [
    "Hello",
    "Hi there!",
    "How are you doing?",
    "I'm doing great.",
    "That is good to hear",
    "Thank you.",
    "You're welcome."
]

trainer = ListTrainer(chatbot)
trainer.train(conversation)

List Trainer: [###                 ] 14%

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\bruno\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\bruno\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


List Trainer: [####################] 100%


In [20]:
response = chatbot.get_response("Hi!")
print(response)

How are you doing?


In [21]:
from chatterbot import ChatBot
from chatterbot.trainers import ChatterBotCorpusTrainer
import os

bot= ChatBot('Bot')
trainer = ChatterBotCorpusTrainer(bot)

corpus_path = 'C:/Users/bruno/Downloads/chatterbot-corpus-chatterbot_corpus-data/english/'
    
for file in os.listdir(corpus_path):
    trainer.train(corpus_path + file)

while True:
    message = input('You:')
    print(message)
    if message.strip() == 'Bye':
        print('ChatBot: Bye')
        break
    else:
        reply = bot.get_response(message)
        print('ChatBot:', reply)

Training ai.yml: [                    ] 1%

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\bruno\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\bruno\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training ai.yml: [####################] 100%
Training botprofile.yml: [####################] 100%
Training computers.yml: [####################] 100%
Training conversations.yml: [####################] 100%
Training emotion.yml: [####################] 100%
Training food.yml: [####################] 100%
Training gossip.yml: [####################] 100%
Training greetings.yml: [####################] 100%
Training health.yml: [####################] 100%
Training history.yml: [####################] 100%
Training humor.yml: [####################] 100%
Training literature.yml: [####################] 100%
Training money.yml: [####################] 100%
Training movies.yml: [####################] 100%
Training politics.yml: [####################] 100%
Training psychology.yml: [####################] 100%
Training science.yml: [####################] 100%
Training sports.yml: [####################] 100%
Training trivia.yml: [####################] 100%


You: Hello


Hello
ChatBot: Hi there!


You: What is your favorite sport?


What is your favorite sport?
ChatBot: My favorite subjects include robotics, computer science, and natural language processing.


You: Do you prefer text classification or topic modeling?


Do you prefer text classification or topic modeling?
ChatBot: What can you eat


You: Fish and eggs


Fish and eggs
ChatBot: Complex is better than complicated.


You: Bye


Bye
ChatBot: Bye
