In [3]:
import re
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Define the website URL
URL = "https://www.csbsju.edu/computer-science"

# Scrape the website for all links and their corresponding URLs
response = requests.get(URL)
soup = BeautifulSoup(response.text, "html.parser")


links = {'Admission & Aid': 'admission', 'Academics': 'academics', 
           'Apply Now': '/admission/apply', 
         'life': 'https://www.instagram.com/csbsju/', 
         'Curriculum': 'computer-science/curriculum', 
         'Learning Outcomes': 'computer-science/learning-outcomes', 
         'Faculty and Staff': 'computer-science/faculty',
         'Student Resources': 'computer-science/student-resources', 
         'Frequently Asked Questions': 'computer-science/frequently-asked-questions',
         'Apply to CSB+SJU': 'admission/apply', 
         'Career Paths': 'computer-science/career-paths', 
         'Imad Rahal': 'computer-science/faculty/imad-rahal'}

#Create a list of variations for the above links
variations = { 'Admission & Aid': ['Admission & Aid', 'Admissions', 'Admission and Aid', 'Financial Aid'],
              'Curriculum': ['Curriculum','Class','Classes','Courses'],
              'Faculty and Staff': ['Faculty and Staff','Prof','Profs','Teacher','Teachers','Professor','Professors'],
              'Frequently Asked Questions': ['Frequently Asked Questions', 'FAQ','Question','Questions'],
              'Imad Rahal': ['Imad Rahal', 'Chair', 'Department Chair','Leader','Head'] }

input_combinations = []
for key, value in variations.items():
    input_combinations += [f"{combo} {key}" for combo in value]+[f"{key} {combo}" for combo in value]
    #print(input_combinations)

#Now enter the Matrix. i.e. take the different strings and vectorize them
vectorizer = CountVectorizer()
corpus = input_combinations + list(links.keys())
X = vectorizer.fit_transform(corpus) #so X is the big honking matrix

sorry = "Sorry, I'm not sure where you want to go. Can you please try again?"

# Define a function to handle user requests
def handle_request(request):
    # Try to match the user's request to a link on the website
    for key in links.keys():
        if re.search(key, request, re.IGNORECASE):
            return links[key]
        else: #vectorize the user input request and compare to X
            request_vec = vectorizer.transform([request])
            similarities = cosine_similarity(request_vec,X)
            index = similarities.argmax()
            if similarities[0][index] > 0.1:
                #now find the corresponding link to that row
                link_key = corpus[index]
                for key in links.keys():
                    if re.search(key, link_key, re.IGNORECASE):
                        return links[key]
    # If no match was found, prompt the user to try again
    return sorry 

# Define a main function to handle user input and generate responses
def main():
    print("Hi, I'm a chatbot that can help you navigate " + URL)
    while True:
        request = input("What can I help you with? (Write exit to exit)")
        if request.lower() == "exit":
            break
        else:
            response = handle_request(request)
            #print(response)
            if response == sorry:
                print(response)
            else:
                print("https://www.csbsju.edu/"+response)

if __name__ == '__main__':
    main()


Hi, I'm a chatbot that can help you navigate https://www.csbsju.edu/computer-science
What can I help you with? (Write exit to exit)computer science
Sorry, I'm not sure where you want to go. Can you please try again?
What can I help you with? (Write exit to exit)curriculum
https://www.csbsju.edu/computer-science/curriculum
What can I help you with? (Write exit to exit)classes
https://www.csbsju.edu/computer-science/curriculum
What can I help you with? (Write exit to exit)professors
https://www.csbsju.edu/computer-science/faculty
What can I help you with? (Write exit to exit)admission
https://www.csbsju.edu/admission
What can I help you with? (Write exit to exit)aid
https://www.csbsju.edu/admission
What can I help you with? (Write exit to exit)help
Sorry, I'm not sure where you want to go. Can you please try again?
What can I help you with? (Write exit to exit)exit


## Want to have it return a valid URL, e.g.  https://www.csbsju.edu/computer-science