In [159]:
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Dict,
    Iterable,
    List,
    Optional,
    Tuple,
    Type,
)

In [160]:
from qvdb import VectorDB

qvdb = VectorDB(is_persistent=False)

In [161]:
import requests
from bs4 import BeautifulSoup
from langchain.text_splitter import RecursiveCharacterTextSplitter

def extract_url_content(url: str) -> List[str]:
        """
        Extracts the content from the given URL.

        Args:
            url (str): The URL to extract content from.

        Returns:
            str: The extracted content.
        """
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        text = ' '.join([p.text for p in soup.find_all('p')])  # Extract text from <p> tags

        # We need to split it up into smaller pieces
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        text_splits = text_splitter.split_text(text)

        return text_splits

In [162]:
from googlesearch import search

def search_and_store(qvdb: VectorDB, query: str, num_results: int = 5) -> None:
        """
        Performs a Google Search for documents related to the given query,
        extracts their content, and stores them in the vector store.

        Args:
            query (str): The query to search for documents.
            num_results (int): The number of URLs to retrieve. Defaults to 5.

        Returns:
            None
        """
        for index, url in enumerate(search(query, num_results)):
            print(f"<info> ({index}) Extracting content from: {url}")
            text_splits = extract_url_content(url=url)
            for jindex, text in enumerate(text_splits):
                qvdb.add(
                    documents=[text],
                    metadatas=[{"source": url}],
                    ids=[f"id{index}.{jindex}"]
                )

In [163]:
#model = 'mistral'
model= 'llama3'
system="You are a helpful AI assistant that helps answer questions based on documents and sources."
template = """
Your goal is to answer the provided question based on the provided documents and sources.
You should produce a text answer which ends with the sources you used to answer the question.
The text answer should have a maximum column width of 72 characters.
Do not add any extra text or comment that is not in the documents!

Example of the documents:

SOURCE: https://www.example.com
DOCUMENT: This is an example document.

Here is the question: {question}

Here are the documents: {documents}

"""

In [164]:
#question = "What makes Virus different from Bacteria?"
#question = "How do I print just a few lines of a file using the sed command?"
question = "Elaborate on who came up with the Periodical System?"

In [165]:
search_and_store(qvdb=qvdb, query=question)
results = qvdb.query(question, num_results=3)
print(results)



<info> (0) Extracting content from: https://www.asbmb.org/asbmb-today/science/020721/a-brief-history-of-the-periodic-table
<info> (1) Extracting content from: https://en.wikipedia.org/wiki/History_of_the_periodic_table
<info> (2) Extracting content from: https://www.rsc.org/periodic-table/history/about
<info> (3) Extracting content from: https://academic.oup.com/book/40719/chapter/348474886
<info> (4) Extracting content from: https://www.sciencehistory.org/stories/magazine/mendeleevs-legacy-the-periodic-system/
{'ids': [['id1.45', 'id4.38', 'id2.11']], 'distances': [[0.43994587659835815, 0.4471851587295532, 0.46915537118911743]], 'metadatas': [[{'source': 'https://en.wikipedia.org/wiki/History_of_the_periodic_table'}, {'source': 'https://www.sciencehistory.org/stories/magazine/mendeleevs-legacy-the-periodic-system/'}, {'source': 'https://www.rsc.org/periodic-table/history/about'}]], 'embeddings': None, 'documents': [['Eventually, the periodic table was appreciated for its descriptive p

In [166]:
docs=[]
for d, s in zip(results['documents'][0], results['metadatas'][0]):
    docs.append(f"SOURCE: {s['source']}\nDOCUMENT: {d}\n")
documents = ' '.join(docs)

In [167]:
print(documents)

SOURCE: https://en.wikipedia.org/wiki/History_of_the_periodic_table
DOCUMENT: Eventually, the periodic table was appreciated for its descriptive power and for finally systematizing the relationship between the elements,[50] although such appreciation was not universal.[51] In 1881, Mendeleev and Meyer had an argument via an exchange of articles in British journal Chemical News over priority of the periodic table, which included an article from Mendeleev, one from Meyer, one of critique of the notion of periodicity, and many more.[52] In 1882, the Royal Society in London
 SOURCE: https://www.sciencehistory.org/stories/magazine/mendeleevs-legacy-the-periodic-system/
DOCUMENT: out Dmitri Ivanovich Mendeleev, and for the very legacy of periodicity, we are indebted to him. Eric Scerriis a lecturer in the Department of Chemistry and Biochemistry at the University of California, Los Angeles. He is the author of The Periodic Table: Its Story and Significance (Oxford University Press, 2006), fr

In [168]:
import ollama

prompt = template.format(question=question, documents=documents)
output = ollama.generate(model=model, system=system, prompt=prompt, stream=False)
response = output['response'].strip()

In [169]:
print(response)

According to historical records, Dmitri Ivanovich Mendeleev came up with the Periodic System. He discovered it while attempting to organize elements in 1869.

Sources:
https://en.wikipedia.org/wiki/History_of_the_periodic_table
https://www.sciencehistory.org/stories/magazine/mendeleevs-legacy-the-periodic-system/
https://www.rsc.org/periodic-table/history/about


In [170]:
import textwrap

# Assuming response is your text
paragraphs = response.split("\n\n")

# Wrap the first paragraph to 72 characters wide
wrapped_paragraph = textwrap.fill(paragraphs[0], width=72)



In [171]:
print(wrapped_paragraph)
print("")
if len(paragraphs) > 1:
    print(paragraphs[1])

According to historical records, Dmitri Ivanovich Mendeleev came up with
the Periodic System. He discovered it while attempting to organize
elements in 1869.

Sources:
https://en.wikipedia.org/wiki/History_of_the_periodic_table
https://www.sciencehistory.org/stories/magazine/mendeleevs-legacy-the-periodic-system/
https://www.rsc.org/periodic-table/history/about


In [172]:
qvdb.reset()