# Texas Classification for a simple ChatBot

## We will develop a simple chatbot by training it on Jane Austin's novel Persuasion. This corpus is quite small, so we shouldn't expect a great performance from the chatbot.

In [5]:
import nltk
import numpy as np
import pandas as pd
import random
import string
import en_core_web_sm
nlp = en_core_web_sm.load()
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import gutenberg
import re
import spacy
import warnings
# Import libraries
from chatterbot import ChatBot
from chatterbot.trainers import ListTrainer, ChatterBotCorpusTrainer
from chatterbot.conversation import Statement
warnings.filterwarnings("ignore")
nltk.download('gutenberg')
!python -m spacy download en

[nltk_data] Downloading package gutenberg to
[nltk_data]     C:\Users\00233270\AppData\Roaming\nltk_data...
[nltk_data]   Package gutenberg is already up-to-date!


Collecting en_core_web_sm==2.3.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz (12.0 MB)
symbolic link created for C:\Users\00233270\Anaconda3\envs\py37\lib\site-packages\spacy\data\en <<===>> C:\Users\00233270\Anaconda3\envs\py37\lib\site-packages\en_core_web_sm
[+] Download and installation successful
You can now load the model via spacy.load('en_core_web_sm')
[+] Linking successful
C:\Users\00233270\Anaconda3\envs\py37\lib\site-packages\en_core_web_sm -->
C:\Users\00233270\Anaconda3\envs\py37\lib\site-packages\spacy\data\en
You can now load the model via spacy.load('en')


In [6]:
# Utility function for standard text cleaning
def text_cleaner(text):
    # Visual inspection identifies a form of punctuation that spaCy does not
    # recognize: the double dash --.  Better get rid of it now!
    text = re.sub(r'--',' ',text)
    text = re.sub("[\[].*?[\]]", "", text)
    text = re.sub(r"(\b|\s+\-?|^\-?)(\d+|\d*\.\d+)\b", " ", text)
    text = ' '.join(text.split())
    return text

In [7]:
# Load and clean the data
persuasion = gutenberg.raw('austen-persuasion.txt')

# The chapter indicator is idiosyncratic
persuasion = re.sub(r'Chapter \d+', '', persuasion)
    
persuasion = text_cleaner(persuasion)

In [8]:
# Parse the cleaned novels. This can take some time.
#nlp = spacy.load('en')
persuasion_doc = nlp(persuasion)

In [15]:
# Group into sentences
# Use the sentences that have more than one character
persuasion_sents = [sent.text for sent in persuasion_doc.sents if len(sent.text) > 1]
persuasion_sents[:2]

['Sir Walter Elliot, of Kellynch Hall, in Somersetshire, was a man who, for his own amusement, never took up any book but the Baronetage; there he found occupation for an idle hour, and consolation in a distressed one; there his faculties were roused into admiration and respect, by contemplating the limited remnant of the earliest patents; there any unwelcome sensations, arising from domestic affairs changed naturally into pity and contempt as he turned over the almost endless creations of the last century; and there, if every other leaf were powerless, he could read his own history with an interest which never failed.',
 'This was the page at which the favourite volume always opened: "ELLIOT OF KELLYNCH HALL. "']

In [16]:
GREETING_INPUTS = ["hello", "hi", "greetings", "what's up","hey"]
GREETING_RESPONSES = ["hello", "hi", "hey", "hi there"]
def greeting(sentence):
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

## Now we can create our own chatbot and train it using Persuasion:

In [17]:
# Create a chatbot
chatbot = ChatBot('Persuasion')
# This is to remove the accumulated knowledge base
chatbot.storage.drop()

# Create a new trainer for the chatbot
trainer = ListTrainer(chatbot)

# Train the chatbot based on Persuasion
trainer.train(persuasion_sents)

List Trainer: [####################] 100%


## Next, run the chatbot:

In [18]:
print("Persuasion: I will try to respond to you reasonably. If you want to exit, type bye.")

# Below is the chatting
while True:
    
    user_input = input("User: ")
    user_input=user_input.lower()
    
    if(user_input!='bye'):
        if(user_input == 'thanks' or user_input == 'thank you'):
            break
            print("Persuasion: You're welcome.")
        else:
            if(greeting(user_input) != None):
                print("Persuasion: " + greeting(user_input))
            else:
                print("Persuasion: ", end = "")
                print(chatbot.get_response(user_input))
    else:
        print("Persuasion: Bye! It was a great chat.")
        break

Persuasion: I will try to respond to you reasonably. If you want to exit, type bye.


User:  Hello


Persuasion: hi there


User:  how are you


Persuasion: `Sixty,' said I, `or perhaps sixty-two.'


User:  where


Persuasion: Oh!


User:  how


Persuasion: This is always my luck.


User:  why


Persuasion: Let me plead for my present friend I cannot call him, but for my former friend.


User:  end


Persuasion: He had been remarkably handsome in his youth; and, at fifty-four, was still a very fine man.


User:  


No value for search_text was available on the provided input


Persuasion: Sir Walter Elliot, of Kellynch Hall, in Somersetshire, was a man who, for his own amusement, never took up any book but the Baronetage; there he found occupation for an idle hour, and consolation in a distressed one; there his faculties were roused into admiration and respect, by contemplating the limited remnant of the earliest patents; there any unwelcome sensations, arising from domestic affairs changed naturally into pity and contempt as he turned over the almost endless creations of the last century; and there, if every other leaf were powerless, he could read his own history with an interest which never failed.


User:  


No value for search_text was available on the provided input


Persuasion: Sir Walter Elliot, of Kellynch Hall, in Somersetshire, was a man who, for his own amusement, never took up any book but the Baronetage; there he found occupation for an idle hour, and consolation in a distressed one; there his faculties were roused into admiration and respect, by contemplating the limited remnant of the earliest patents; there any unwelcome sensations, arising from domestic affairs changed naturally into pity and contempt as he turned over the almost endless creations of the last century; and there, if every other leaf were powerless, he could read his own history with an interest which never failed.


User:  bye


Persuasion: Bye! It was a great chat.
