In [1]:
import difflib
import pandas as pd
from rake_nltk import Rake
import requests
from bs4 import BeautifulSoup

In [2]:
def get_keywords(text):
    rake = Rake()
    rake.extract_keywords_from_text(text)
    return rake.get_ranked_phrases_with_scores()

In [3]:
#Create a DataFrame from the source .csv file
df = pd.read_csv('data/sodata.csv')
df.head(3)

Unnamed: 0,title,accepted_answer_id,tags,answer_count,title_keywords
0,How to automate download of weekly export serv...,10182845,salesforce,7,"[(16.0, 'weekly export service files'), (4.0, ..."
1,java local timeformat without year,9720812,java|localization|date-format,8,"[(25.0, 'java local timeformat without year')]"
2,How do you enforce foreign key constraints in ...,9835781,java|sqlite|jdbc|foreign-keys,8,"[(16.0, 'enforce foreign key constraints'), (1..."


In [4]:
#Function to get the datasource.
#We use the DataFrame defined above to avoid repeated CSV to DataFrame conversions.
def get_datasource():
    datasource = df
    
    return datasource

In [5]:
# Define a function to match a user input to a question in the dataset
def match_question(user_input, data):
    keywords = get_keywords(user_input)
    keywords = [x[1] for x in keywords][:3]

    best_match = ""
    best_score = 0

    #Filter the dataframe using key words
    df_titles = data[data["title"].str.contains("|".join(keywords))]
    
    #Loop through the list of filtered records
    for index, row in df_titles.iterrows():
        title = row['title']
        
        #Use difflib to find the best match between the use input and the titles of the questions in our Dataset.
        score = difflib.SequenceMatcher(None, title.lower(), user_input.lower()).ratio()
        
        #Loop through the keywords from user input and score each word.
        #Pick the record that has the highest score
        for keyword in keywords:
            if keyword in title.lower():
                score += 0.1
            if score > best_score:
                best_match = row
                best_score = score
        
    #Return the best matched record 
    return best_match

In [6]:
# Define a function to retrieve the answer text for a question
#Retrieve the answer text from Stack Overflow website using the accepted_answer_id 
def get_answer_text(query):
    
    #url = 'https://stackoverflow.com/questions/62725561/cannot-connect-to-database-server-mysql'
    url = f'https://stackoverflow.com/a/{query["accepted_answer_id"]}'
    # Send a GET request to the URL
    response = requests.get(url)

    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")

    # Find all the answer posts
    post_body = soup.find('div', class_='s-prose js-post-body')
    
    #question = post_body.find_all('p')[:2]
    question = post_body.find_all('p')

    if len(question) >= 2:
        print("Is this question similar to what you are looking for?:\n")
        print(question[0].text.strip(),"\n")

    answer_body = soup.find('div', class_='answer js-answer accepted-answer js-accepted-answer')

    if answer_body is None:
        print("Sorry I could not find matching responses to your question, provide more details to your question.\
        This can include errors that you are facing. Else key in 'quit' or 'exit' to close me.\n ")
    else:

        answers = answer_body.find_all('p')
        if len(answers) != 0:
            print("Here are some answers related to this question: \n")
            for i,answer in enumerate(answers):
                #print("Answer : ",i+1, "\n",answer.text,'\n')
                print(answer.text,'\n')

        print("\nIf you are not satisfied with this answer, \
        please provide more details to your question. This can include errors that you are facing.\
        Else key in 'quit' or 'exit' to close me.\n")

In [7]:
# Define the chatbot function
def chatbot():
    print('Welcome to team CALIBAN AI chatbot! How can I help you?\n')
    while True:
        user_input = input('> ').strip()
        
        if user_input.lower() in ['quit', 'exit']:
            print('Goodbye!')
            break
        query = match_question(user_input, get_datasource())
        
        if query is None:
            print('Sorry, I could not find a relevant answer to your question. Please rephrase your \
            question and include specific details e.g errors or key words. \n')
        elif len(query) ==0:
            print('Sorry, I could not find a relevant answer to your question. Please rephrase your \
            question and include specific details e.g errors or key words. \n')
        else:
            get_answer_text(query)            

In [8]:
#Test the chatbot.
#This will prompt you to key in your question.
#To exit the program, use quit or exit key words.
chatbot()

Welcome to team CALIBAN AI chatbot! How can I help you?

> Error establishing a database connection
Is this question similar to what you are looking for?:

I have a wordpress site on a VPS running Debian 7 that was working fine when I last visited it and I've made no changes since, but now when I visit the site it says: Error establishing a database connection. It also gives the same error in the admin portal. 

Here are some answers related to this question: 

Okay, after troubleshooting a while, it turned out that the entire file system is read-only. I tried making new files and in each case it says it can't cause "Read-only filesystem" 

It seems to be a bad image or some sort of change my VPS provider made. So I'll be contacting them. 


If you are not satisfied with this answer,         please provide more details to your question. This can include errors that you are facing.        Else key in 'quit' or 'exit' to close me.

> quit
Goodbye!
