In [None]:
!pip install openai



In [None]:
import numpy as np
from typing import Optional, List, Tuple, Any
import spacy
import xml.etree.ElementTree as ET
import os
from spacy.lang.en.stop_words import STOP_WORDS
from sklearn.feature_extraction.text import TfidfVectorizer
import openai
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import sqlite3
from scipy.spatial.distance import cosine

%env OPENAI_API_KEY="sk-proj-"


In [None]:
class ChatbotDatabase:
    def __init__(self, db_path: str):
        self.connection = sqlite3.connect(db_path)
        self.cursor = self.connection.cursor()
        self.setup_database()

    def setup_database(self):
        """Initialize the database tables with adjusted schema for unique embeddings."""
        self.cursor.execute('''
            CREATE TABLE IF NOT EXISTS Terms (
                term_id INTEGER PRIMARY KEY,
                term TEXT UNIQUE NOT NULL
            );
        ''')
        self.cursor.execute('''
            CREATE TABLE IF NOT EXISTS Embeddings (
                term_id INTEGER UNIQUE NOT NULL,
                embedding BLOB UNIQUE NOT NULL,
                FOREIGN KEY (term_id) REFERENCES Terms(term_id)
            );
        ''')
        self.cursor.execute('''
            CREATE TABLE IF NOT EXISTS Facts (
                fact_id INTEGER PRIMARY KEY,
                term_id INTEGER NOT NULL,
                fact TEXT NOT NULL,
                FOREIGN KEY (term_id) REFERENCES Terms(term_id)
            );
        ''')
        self.connection.commit()

    def add_term_with_embedding(self, term: str, embedding: bytes):
        """Add a term along with its unique embedding."""
        self.cursor.execute('INSERT OR IGNORE INTO Terms (term) VALUES (?)', (term,))
        term_id = self.cursor.lastrowid
        if term_id:
            self.cursor.execute('INSERT INTO Embeddings (term_id, embedding) VALUES (?, ?)', (term_id, embedding))
        else:
            self.cursor.execute('''
                UPDATE Embeddings
                SET embedding = ?
                WHERE term_id = (SELECT term_id FROM Terms WHERE term = ?)
            ''', (embedding, term))
        self.connection.commit()

    def add_fact(self, term: str, fact: str):
        """Add a fact associated with a term, identified by the term text."""
        self.cursor.execute('''
            INSERT INTO Facts (term_id, fact)
            VALUES ((SELECT term_id FROM Terms WHERE term = ?), ?)
        ''', (term, fact))
        self.connection.commit()

    def retrieve_facts(self, term: str) -> List[str]:
        """Retrieve facts for a given term."""
        self.cursor.execute('''
            SELECT fact FROM Facts
            INNER JOIN Terms ON Facts.term_id = Terms.term_id
            WHERE term = ?
        ''', (term,))
        return [row[0] for row in self.cursor.fetchall()]

    def execute_query(self, query: str, params: Tuple[Any, ...] = ()) -> List[Tuple]:
        """Execute an arbitrary query for flexibility."""
        self.cursor.execute(query, params)
        return self.cursor.fetchall()

    def retrieve_facts_by_embedding(self, input_embedding: np.ndarray) -> list:
        self.cursor.execute('SELECT term_id, embedding FROM Embeddings')
        embeddings = self.cursor.fetchall()

        similarities = []
        for term_id, stored_embedding in embeddings:
            stored_embedding_arr = np.frombuffer(stored_embedding, dtype=np.float64)
            similarity = 1 - cosine(input_embedding, stored_embedding_arr)
            similarities.append((similarity, term_id))

        similarities.sort(reverse=True, key=lambda x: x[0])

        top_matches = similarities[:5]
        facts_with_scores = []
        for similarity, term_id in top_matches:
            self.cursor.execute('SELECT fact FROM Facts WHERE term_id = ?', (term_id,))
            facts = self.cursor.fetchall()
            for fact in facts:
                facts_with_scores.append((fact[0], similarity))

        return facts_with_scores

    def retrive_term_by_term_id(self, term_id: int) -> str:
        """Retrieve the term associated with a given term_id."""
        self.cursor.execute('SELECT term FROM Terms WHERE term_id = ?', (term_id,))
        return self.cursor.fetchone()[0]

In [None]:
class PresidentsQA:
    """
    A class to generate questions and answers about U.S. Presidents using the OpenAI API, allowing for a specified number of question-answer pairs.

    Methods:
    generateQA(num_pairs: int) -> dict: Generates a specified number of general question-answer pairs about U.S. Presidents.
    generateQA_withTerm(term: str, num_pairs: int) -> dict: Generates a specified number of question-answer pairs about U.S. Presidents, incorporating a given term.
    """

    def __init__(self):
        """
        Initializes the PresidentsQA class by setting up the OpenAI client with an API key.
        """
        try:
            api_key = os.getenv("OPENAI_API_KEY")
            if not api_key:
                raise ValueError("OPENAI_API_KEY is not set in environment variables.")
            self.client = openai.OpenAI(api_key=api_key)
        except Exception as e:
            print(f"Failed to initialize OpenAI client: {e}")

    def _api_call(self, prompt):
        """
        Internal method to make a chat completion API call to OpenAI.

        Parameters:
        prompt (str): The prompt to send to the API.

        Returns:
        str: The API's response text.
        """
        try:
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "As a knowledgeable AI you provide a series of well-structured question-and-answer pairs related to U.S. presidents. Your responses should adhere to the following format:\n Q: [Clearly stated question about a U.S. president] \n A: [Concise and accurate answer to the question] \n Q: Which U.S. President signed the Emancipation Proclamation during the Civil War? \n A: Abraham Lincoln signed the Emancipation Proclamation on January 1, 1863, freeing enslaved individuals in the Confederate states."},
                    {"role": "user", "content": prompt}
                ]
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            print(f"API call failed: {e}")
            return ""

    def _parse_qa_pairs(self, text):
      pattern = r"Q: (.*?)\nA: (.*?)\n?(?=Q:|$)"
      matches = re.findall(pattern, text, re.DOTALL)
      return {question.strip(): answer.strip() for question, answer in matches}


    def generateQA(self, num_pairs):
        """
        Generates a specified number of general question-answer pairs about U.S. Presidents.

        Parameters:
        num_pairs (int): The number of question-answer pairs to generate.
        """
        if num_pairs < 1:
            print("Number of pairs must be at least 1.")
            return {}

        prompt = f"Generate {num_pairs} question(s) and answer(s) about U.S. Presidents. Your output should strictly follow the following format:\n Q: Which U.S. President served as the 16th President of the United States and led the country during the American Civil War? \n A: Abraham Lincoln.\n Q: Which U.S. President signed the Emancipation Proclamation during the Civil War? \n A: Abraham Lincoln signed the Emancipation Proclamation on January 1, 1863, freeing enslaved individuals in the Confederate states."
        response = self._api_call(prompt)
        qa_pairs = self._parse_qa_pairs(response)

        if qa_pairs:
            for q, a in qa_pairs.items():
                print(f"Q: {q}\nA: {a}\n")
            return self._parse_qa_pairs(response)
        else:
            print("Failed to generate question and answer pairs.")

    def generateQA_withTerm(self, term, num_pairs):
        """
        Generates a specified number of question-answer pairs about U.S. Presidents, incorporating a given term.

        Parameters:
        term (str): The term to include in the question or answer.
        num_pairs (int): The number of question-answer pairs to generate.
        """
        if num_pairs < 1:
            print("Number of pairs must be at least 1.")
            return {}

        if not term:
            print("Term is required.")
            return {}

        prompt = f"Generate {num_pairs} question(s) and answer(s) about U.S. Presidents that include the term '{term}' in the question. Ensure the term '{term}' is included in the question  "
        response = self._api_call(prompt)
        qa_pairs = self._parse_qa_pairs(response)

        if qa_pairs:
            for q, a in qa_pairs.items():
                print(f"Q: {q}\nA: {a}\n")
            return self._parse_qa_pairs(response)

        else:
            print("Failed to generate question and answer pairs with specified term.")


In [None]:
class OpenAIEmbedder:
    """
    A class to interact with the OpenAI API to obtain and process text embeddings.

    This class is designed to fetch embeddings from OpenAI's API, reduce the embedding
    dimension to 256, and apply L2 normalization.
    """

    def __init__(self, model: str = "text-embedding-3-small"):
        """
        Initializes the OpenAIEmbedder with a specific model.

        Parameters:
            model (str): The model to be used for text embeddings.
        """
        self.client = openai.OpenAI()
        self.model = model

    @staticmethod
    def normalize_l2(x: np.ndarray) -> np.ndarray:
        """
        Applies L2 normalization to an embedding.

        Parameters:
            x (np.ndarray): The embedding to normalize.

        Returns:
            np.ndarray: The L2 normalized embedding.
        """
        norm = np.linalg.norm(x)
        return x / norm if norm > 0 else x

    def get_embedding(self, text: str) -> Optional[np.ndarray]:
        """
        Obtains and processes the embedding for a given text.

        This method fetches the embedding, reduces its dimensionality to 256,
        and applies L2 normalization.

        Parameters:
            text (str): The text to get the embedding for.

        Returns:
            Optional[np.ndarray]: The processed embedding, or None if an error occurs.
        """
        processed_text = text.replace("\n", " ")
        try:
            response = self.client.embeddings.create(
                input=[processed_text], model=self.model, encoding_format="float"
            )
            embedding = np.array(response.data[0].embedding[:256])
            normalized_embedding = self.normalize_l2(embedding)
            return normalized_embedding
        except openai.error.RateLimitError:
            print("Rate limit exceeded. Please try again later.")
        except openai.error.InvalidRequestError as e:
            print(f"Invalid request: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
        return None

In [None]:
def fetch_page_content(url):
    """
    Fetches the content of a webpage.

    Args:
        url (str): The URL of the webpage to fetch.

    Returns:
        str: The HTML content of the page.

    Raises:
        requests.exceptions.RequestException: If an error occurs during the request.
    """
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the page content: {e}")
        return None

def parse_html(html_content):
    """
    Parses HTML content to create a BeautifulSoup object.

    Args:
        html_content (str): The HTML content to parse.

    Returns:
        BeautifulSoup: The BeautifulSoup object for parsed HTML content.
    """
    return BeautifulSoup(html_content, 'html.parser')

def extract_links(soup, base_url):
    """
    Extracts and formats Wikipedia links related to US Presidents based on the provided HTML structure.

    Args:
        soup (BeautifulSoup): The BeautifulSoup object containing the parsed HTML content.
        base_url (str): The base URL to append to relative links for completeness.

    Returns:
        list of str: A list of formatted strings containing president names and their Wikipedia links.
    """
    links = []
    for td in soup.find_all('td', {'data-sort-value': True}):
        president_name = td['data-sort-value']
        link_tag = td.find('a', href=True)
        if link_tag and president_name:
            full_link = base_url + link_tag['href']
            links.append(f"{full_link}")
    return links

def display_links(links):
    """
    Prints each link in the list on a new line.

    Args:
        links (list of str): The list of links to display.
    """
    for link in links:
        print(link)

def main():
    url = "https://en.wikipedia.org/wiki/List_of_presidents_of_the_United_States"
    base_url = "https://en.wikipedia.org"

    html_content = fetch_page_content(url)
    if html_content:
        soup = parse_html(html_content)
        links = extract_links(soup, base_url)
        display_links(links)
        return links
    else:
        print("Failed to fetch or parse page content.")

In [None]:
links = main()

https://en.wikipedia.org/wiki/George_Washington
https://en.wikipedia.org/wiki/John_Adams
https://en.wikipedia.org/wiki/Thomas_Jefferson
https://en.wikipedia.org/wiki/James_Madison
https://en.wikipedia.org/wiki/James_Monroe
https://en.wikipedia.org/wiki/John_Quincy_Adams
https://en.wikipedia.org/wiki/Andrew_Jackson
https://en.wikipedia.org/wiki/Martin_Van_Buren
https://en.wikipedia.org/wiki/William_Henry_Harrison
https://en.wikipedia.org/wiki/John_Tyler
https://en.wikipedia.org/wiki/James_K._Polk
https://en.wikipedia.org/wiki/Zachary_Taylor
https://en.wikipedia.org/wiki/Millard_Fillmore
https://en.wikipedia.org/wiki/Franklin_Pierce
https://en.wikipedia.org/wiki/James_Buchanan
https://en.wikipedia.org/wiki/Abraham_Lincoln
https://en.wikipedia.org/wiki/Andrew_Johnson
https://en.wikipedia.org/wiki/Ulysses_S._Grant
https://en.wikipedia.org/wiki/Rutherford_B._Hayes
https://en.wikipedia.org/wiki/James_A._Garfield
https://en.wikipedia.org/wiki/Chester_A._Arthur
https://en.wikipedia.org/wiki/Gr

In [None]:
class WebCrawler:
    def __init__(self, urls):
        print(f"URLs provided: {urls}")
        self.urls = urls

    def crawl(self):
        for url in self.urls:
            try:
                response = self.fetch_content(url)
                text = self.extract_text(response)
                self.save_to_file(url, text)
            except Exception as e:
                print(f"An error occurred while processing {url}: {e}")

    def fetch_content(self, url):
        headers = {'User-Agent': 'Custom Web Crawler'}
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response

    def extract_text(self, response):
        soup = BeautifulSoup(response.content, 'html.parser')
        return soup.get_text()

    def save_to_file(self, url, text):
        filename = self.create_filename(url)
        with open(filename, 'w', encoding='utf-8') as file:
            file.write(text)

    @staticmethod
    def create_filename(url):
        return f"{url.replace('http://', '').replace('https://', '').replace('/', '_')}.txt"


In [None]:
type(links)

list

In [None]:
urls = links
crawler = WebCrawler(urls)
crawler.crawl()

URLs provided: ['https://en.wikipedia.org/wiki/George_Washington', 'https://en.wikipedia.org/wiki/John_Adams', 'https://en.wikipedia.org/wiki/Thomas_Jefferson', 'https://en.wikipedia.org/wiki/James_Madison', 'https://en.wikipedia.org/wiki/James_Monroe', 'https://en.wikipedia.org/wiki/John_Quincy_Adams', 'https://en.wikipedia.org/wiki/Andrew_Jackson', 'https://en.wikipedia.org/wiki/Martin_Van_Buren', 'https://en.wikipedia.org/wiki/William_Henry_Harrison', 'https://en.wikipedia.org/wiki/John_Tyler', 'https://en.wikipedia.org/wiki/James_K._Polk', 'https://en.wikipedia.org/wiki/Zachary_Taylor', 'https://en.wikipedia.org/wiki/Millard_Fillmore', 'https://en.wikipedia.org/wiki/Franklin_Pierce', 'https://en.wikipedia.org/wiki/James_Buchanan', 'https://en.wikipedia.org/wiki/Abraham_Lincoln', 'https://en.wikipedia.org/wiki/Andrew_Johnson', 'https://en.wikipedia.org/wiki/Ulysses_S._Grant', 'https://en.wikipedia.org/wiki/Rutherford_B._Hayes', 'https://en.wikipedia.org/wiki/James_A._Garfield', 'htt

In [None]:
def clean_text_file(input_dir, output_dir, filename):
    """Cleans the content of a given file using NLP techniques."""
    nlp = spacy.load("en_core_web_sm")

    input_path = os.path.join(input_dir, filename)
    output_path = os.path.join(output_dir, filename)

    with open(input_path, 'r', encoding='utf-8') as file:
        raw_text = file.read()
    doc = nlp(raw_text.lower())
    cleaned_text = []

    for token in doc:
        if token.is_stop or token.is_punct or token.is_space:
            continue
        cleaned_text.append(token.lemma_)
    with open(output_path, 'w', encoding='utf-8') as file:
        file.write(" ".join(cleaned_text))


def process_directory(input_dir, output_dir):
    """Processes each file in the input directory with the cleaning function."""

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for filename in os.listdir(input_dir):
        if filename.endswith('.txt'):
            clean_text_file(input_dir, output_dir, filename)
            print(f"Processed {filename}")



def extract_important_terms(directory, max_features=40):
    """Extracts important terms from cleaned text files in a directory using TF-IDF."""
    texts = []
    file_names = []
    for file_name in os.listdir(directory):
        file_path = os.path.join(directory, file_name)
        with open(file_path, 'r', encoding='utf-8') as file:
            texts.append(file.read().lower())  # Ensure text is in lowercase
            file_names.append(file_name)

    # Calculate TF-IDF
    vectorizer = TfidfVectorizer(stop_words='english', max_features=max_features)
    tfidf_matrix = vectorizer.fit_transform(texts)

    # Extract feature names and tf-idf scores
    feature_names = vectorizer.get_feature_names_out()
    scores = np.mean(tfidf_matrix, axis=0).tolist()[0]  # Mean tf-idf score for each term across all documents
    terms_scores = list(zip(feature_names, scores))
    important_terms = sorted(terms_scores, key=lambda x: x[1], reverse=True)[:max_features]

    return important_terms

In [None]:
input_dir = "/content/"
output_dir = "cleaned_content"
if not os.path.exists(output_dir):
  os.makedirs(output_dir)
process_directory(input_dir, output_dir)

Processed en.wikipedia.org_wiki_James_A._Garfield.txt
Processed en.wikipedia.org_wiki_Abraham_Lincoln.txt
Processed en.wikipedia.org_wiki_John_F._Kennedy.txt
Processed en.wikipedia.org_wiki_Andrew_Johnson.txt
Processed en.wikipedia.org_wiki_John_Quincy_Adams.txt
Processed en.wikipedia.org_wiki_James_K._Polk.txt
Processed en.wikipedia.org_wiki_Herbert_Hoover.txt
Processed en.wikipedia.org_wiki_Harry_S._Truman.txt
Processed en.wikipedia.org_wiki_George_Washington.txt
Processed en.wikipedia.org_wiki_Jimmy_Carter.txt
Processed en.wikipedia.org_wiki_Zachary_Taylor.txt
Processed en.wikipedia.org_wiki_Ronald_Reagan.txt
Processed en.wikipedia.org_wiki_Dwight_D._Eisenhower.txt
Processed en.wikipedia.org_wiki_Franklin_Pierce.txt
Processed en.wikipedia.org_wiki_James_Buchanan.txt
Processed en.wikipedia.org_wiki_Lyndon_B._Johnson.txt
Processed en.wikipedia.org_wiki_William_Henry_Harrison.txt
Processed en.wikipedia.org_wiki_Woodrow_Wilson.txt
Processed en.wikipedia.org_wiki_John_Tyler.txt
Processed

In [None]:
directory = "/content/cleaned_content"
important_terms = extract_important_terms(directory)
for term, score in important_terms:
  print(f"{term}: {score}")

pp: 0.3051555291061783
president: 0.2903299944865041
new: 0.20092786762125509
united: 0.19593249952838646
retrieve: 0.18270913737991165
war: 0.17005405353192107
states: 0.1686455338039934
american: 0.16424519437704616
john: 0.15817821914786767
party: 0.15589151694145992
state: 0.14865087711222255
election: 0.12455077729661221
house: 0.11308364154442556
archive: 0.11285098859956856
presidential: 0.11076310615112814
william: 0.10922090032882903
york: 0.10679949442953898
james: 0.10561738405970919
original: 0.10002566339379568
isbn: 0.0984507124317899
national: 0.09669474622225474
republican: 0.09552153900996377
washington: 0.08929812357287535
978: 0.0852724743374781
roosevelt: 0.08511715130243766
act: 0.08399491103154239
johnson: 0.08251232151290039
presidency: 0.07960920975284719
george: 0.07529541331922283
campaign: 0.07378120695930578
2017: 0.07303790319617184
march: 0.0681605999786964
bush: 0.0669952863651995
january: 0.06403223231440514
december: 0.062293151633934424
2021: 0.0607360

In [None]:
SQLite_database_path = '/content/chatbot_database.db'

def GenerateData(number_of_question_at_each_iter, number_of_iter, term=None, SQLite_database=SQLite_database_path):
    presidentsQA = PresidentsQA()
    db = ChatbotDatabase(SQLite_database)
    embedder = OpenAIEmbedder()

    for _ in range(number_of_iter):
        if term:
            qa_pairs = presidentsQA.generateQA_withTerm(term, number_of_question_at_each_iter)
        else:
            qa_pairs = presidentsQA.generateQA(number_of_question_at_each_iter)

        for question, answer in qa_pairs.items():
            try:
                embedding = embedder.get_embedding(question)
                embedding_blob = sqlite3.Binary(embedding)
                db.add_term_with_embedding(question, embedding_blob)
                db.add_fact(question, answer)
            except sqlite3.IntegrityError as e:
                print(f"Skipped adding {question} due to IntegrityError: {e}")
            except openai.error.OpenAIError as e:
                print(f"An OpenAI API error occurred: {e}")
            except Exception as e:
                print(f"An unexpected error occurred: {e}")

In [None]:
for term in important_terms:
    print(f"Generating data for term: {term}")
    try:
        GenerateData(number_of_question_at_each_iter=20, number_of_iter=1, term=term)
    except Exception as e:
        print(f"An error occurred while generating data for term '{term}': {e}")


Generating data for term: ('pp', 0.3051555291061783)
API call failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: "sk-proj**********************************************************************************************************************************************************5cA". You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
Failed to generate question and answer pairs with specified term.
An error occurred while generating data for term '('pp', 0.3051555291061783)': 'NoneType' object has no attribute 'items'
Generating data for term: ('president', 0.2903299944865041)
API call failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: "sk-proj**********************************************************************************************************************************************************5cA". You can find your API key at https://p

In [None]:
nlp = spacy.load("en_core_web_sm")
DATABASE_PATH = '/content/chatbot_database.db'
class ChatBot:
    def __init__(self):
        self.greetings = ["Hello! I'm here to help you learn about U.S. Presidents.",
                          "Hi there! Ask me anything about U.S. Presidents."]
        self.user_name = ""
        self.favorite_president = ""
        self.embedder = OpenAIEmbedder()
        self.database = ChatbotDatabase(DATABASE_PATH)
        self.similarity_threshold = 0.5

    def introduce_and_ask_info(self):
        print(np.random.choice(self.greetings))
        user_input = input("What's your name? ")
        self.user_name = self.extract_name(user_input)
        print(f"Nice to meet you, {self.user_name}! Which U.S. President would you like to learn about?")
        pres_input = input()
        self.favorite_president = self.extract_president(pres_input)
        self.save_preference()

    def extract_name(self, text: str) -> str:
        doc = nlp(text)
        for ent in doc.ents:
            if ent.label_ == "PERSON":
                return ent.text
        return "there"

    def extract_president(self, text: str) -> str:
        doc = nlp(text)
        for ent in doc.ents:
            if ent.label_ == "PERSON":
                return ent.text
        return "Unknown"

    def extract_questions(self, text: str) -> List[str]:
        """
        Extracts questions from the given text.

        Parameters:
        - text (str): The input text from which to extract questions.

        Returns:
        - List[str]: A list of strings, each a question found in the input text.
        """
        questions = []
        doc = nlp(text)
        for sent in doc.sents:
            if sent.text.strip().endswith('?'):
                questions.append(sent.text.strip())
        return questions

    def save_preference(self):
        root = ET.Element("UserPreferences")
        ET.SubElement(root, "Name").text = self.user_name
        ET.SubElement(root, "FavoritePresident").text = self.favorite_president
        tree = ET.ElementTree(root)
        tree.write("user_preferences.xml")


    def handle_user_query(self, query: str):
      extracted_questions = self.extract_questions(query)

      if not extracted_questions:
        extracted_questions = [query]

      for question in extracted_questions:
        question_embedding = self.embedder.get_embedding(question)

        if question_embedding is not None:

            retrieved_facts = self.database.retrieve_facts_by_embedding(question_embedding)

            if retrieved_facts and retrieved_facts[0][2] >= self.similarity_threshold:
                print(f"For your question: \"{question}\"")
                print(f"Interesting Fact: {retrieved_facts[0][1]}")

                print("You might also find these questions intriguing:")
                for fact in retrieved_facts[1:]:
                    if fact[2] >= self.similarity_threshold - 0.2:
                        print(f"- {self.database.retrieve_term_by_term_id(fact[0])}")
                        print("\n")
            else:
                  print(f"For your question: \"{question}\", I couldn't find enough information.")
        else:
              print("I'm sorry, I couldn't process your request. Please try rephrasing.")



if __name__ == "__main__":
    chatbot = ChatBot()
    chatbot.introduce_and_ask_info()
    while True:
        user_query = input("What would you like to know? Please format your input like a question and make sure it has ? at the end of that. ")
        if user_query.lower() == "exit":
            print("Goodbye!")
            break
        chatbot.handle_user_query(user_query)


Hello! I'm here to help you learn about U.S. Presidents.
