In [None]:
# An experiment rewriting the ChatBot as the executor of a SimpleBot instead.
# Building an organism here...????
%load_ext autoreload
%autoreload 2

In [None]:
from llamabot import ChatBot


bot = ChatBot("You are a chatbot.", session_name="chatbot")
bot("hey there")

In [None]:
str(bot)

In [None]:
from llamabot import SimpleBot
from llamabot.components.history import History

In [None]:
# SimpleBot rewrite using the OpenAI API and Ollama API
model_name = "mistral:7b"
from openai import OpenAI
from llamabot.bot.model_dispatcher import ollama_model_keywords
from dotenv import load_dotenv

load_dotenv()


def make_client(model_name):
    """Use OpenAI, or else use LiteLLM to interface with local LLMs.

    You will need to install LiteLLM in order to access local LLMs.
    """
    client = OpenAI()
    if model_name.split(":")[0] in ollama_model_keywords():
        client = OpenAI(base_url="http://0.0.0.0:8000", api_key="dummy")
    return client

In [None]:
from llamabot.components.messages import (
    HumanMessage,
    AIMessage,
)

In [None]:
from llamabot.config import default_language_model


from llamabot.bot.simplebot import SimpleBot


bot = SimpleBot(
    model_name="gpt-3.5-turbo",
    system_prompt="You are a helpful and humorous llama.",
    stream=True,
)

response = bot("Say yes or no.")

In [None]:
%load_ext autoreload
%autoreload 2

from llamabot.bot.chatbot import ChatBot
from llamabot.components.history import RAGHistory, History

bot = ChatBot(
    system_prompt="You are a very helpful Llama.",
    session_name="testing",
    chat_history_class=History,
    model_name="gpt-3.5-turbo",
    stream=True,
)

bot("Hey there!")

In [None]:
bot("How are you doing?")

In [None]:
bot("What's going on in the world today?")

In [None]:
bot("What are you doing right now?")

In [None]:
bot("What do you think about messages?")

In [None]:
bot("Make up a joke.")

## now let's compose querybot

In [None]:
from llamabot.components.docstore import DocumentStore
from pathlib import Path
from llamabot.doc_processor import magic_load_doc, split_document
from llamabot.components.messages import (
    RetrievedMessage,
    retrieve_messages_up_to_budget,
)
from llamabot.bot.model_tokens import model_context_window_sizes, DEFAULT_TOKEN_BUDGET


class QueryBot:
    """QueryBot is a bot that uses simple RAG to answer questions about a document."""

    def __init__(
        self,
        system_prompt: str,
        document_paths: Path | list[Path],
        collection_name: str,
        temperature: float = 0.0,
        model_name: str = default_language_model(),
        stream=True,
    ):
        self.bot = SimpleBot(
            system_prompt=system_prompt,
            temperature=temperature,
            model_name=model_name,
            stream=stream,
        )
        self.document_store = DocumentStore(collection_name=collection_name)
        self.add_documents(document_paths=document_paths)
        self.response_budget = 2_000
        self.model_name = model_name

    def add_documents(
        self,
        document_paths: Path | list[Path],
        chunk_size: int = 2_000,
        chunk_overlap: int = 500,
    ):
        if isinstance(document_paths, Path):
            document_paths = [document_paths]

        for document_path in document_paths:
            document = magic_load_doc(document_path)
            splitted_document = split_document(
                document, chunk_size=chunk_size, chunk_overlap=chunk_overlap
            )
            splitted_document = [doc.text for doc in splitted_document]
            self.document_store.extend(splitted_document)

    def __call__(self, query, n_results: int = 10) -> AIMessage:
        messages = []

        context_budget = model_context_window_sizes.get(
            self.model_name, DEFAULT_TOKEN_BUDGET
        )
        retrieved = retrieve_messages_up_to_budget(
            messages=[
                RetrievedMessage(content=chunk)
                for chunk in self.document_store.retrieve(query, n_results=n_results)
            ],
            character_budget=context_budget - self.response_budget,
        )
        messages.extend(retrieved)
        messages.append(HumanMessage(content=query))
        response: str = self.bot.generate_response(messages)
        return AIMessage(content=response)

In [None]:
from openai import OpenAI

client = OpenAI()

response = client.chat.completions.create(
    model="gpt-3.5-turbo-1106",
    response_format={"type": "json_object"},
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant designed to output JSON.",
        },
        {"role": "user", "content": "Who won the world series in 2020?"},
    ],
)
print(response.choices[0].message.content)

In [None]:
import json
from llamabot.config import default_language_model

from llamabot.prompt_manager import prompt
from llamabot.bot.simplebot import SimpleBot


@prompt
def kgbot_sysprompt() -> str:
    """You are an expert ontologist. You are tasked with taking in a chunk of text
    and extracting as many relationships as possible from that text
    without extrapolating any relationships that are not explicitly stated.
    If you encounter a bibliography entry, you should ignore it.

    For each relationship, return a JSON according to the following schema:

    {
        "subject": "string",
        "predicate": "string",
        "object": "string",
        "evidence": "quote from the text"
    }

    You should return it as a list of dictionaries, like so:

    [
        {"subject": "string", "predicate": "string", "object": "string", "evidence": "string"},
        {"subject": "string", "predicate": "string", "object": "string", "evidence": "string"},
        {"subject": "string", "predicate": "string", "object": "string", "evidence": "string"},
        ...
    ]

    If the entire chunk is comprised of bibliographic entries, then return an empty list.
    Do not justify your actions.
    Ensure that for the "evidence" field you are quoting the text verbatim.
    """


# json_cleaner = SimpleBot("You are a JSON cleaner. You will be provided with dirty JSON, and your task is to clean it up to be valid JSON.", model_name="mistral/mistral-tiny")


class KGBot:
    """KGBot is the Knowledge Graph bot.

    It takes in a chunk of text and returns a JSON of triplets.
    """

    def __init__(
        self,
        system_prompt=kgbot_sysprompt(),
        temperature: float = 0.0,
        model_name: str = default_language_model(),
        stream: bool = True,
    ):
        self.bot = SimpleBot(
            system_prompt=system_prompt,
            temperature=temperature,
            model_name=model_name,
            stream=stream,
        )

    def __call__(self, query: str) -> dict:
        """Call the bot with a query and return a JSON of triplets."""
        response = self.bot(query)
        return json.loads(response.content.strip("```json").strip("```"))


kgbot = KGBot(model_name="mistral/mistral-tiny")
# kgbot("Alice is married to Bob. Alice lives in London. Bob lives in Paris. ")

In [None]:
from llamabot.doc_processor import magic_load_doc, split_document
from pyprojroot import here

document = magic_load_doc(here() / "data/codonbert.pdf")
chunks = split_document(document, chunk_size=1_024, chunk_overlap=256)

In [None]:
from llamabot.bot.qabot import DocQABot


qabot = DocQABot(collection_name="codonbert")
# qabot("How did the authors show that CodonBERT learns the genetic code?")

In [None]:
qabot("How did the authors show that CodonBERT learns the genetic code?")


In [None]:
from llamabot import SimpleBot
from llamabot.prompt_manager import prompt
import json


@prompt
def jeopardy_bot_sysprompt():
    """
    You are an expert at taking texts and constructing questions from them.
    You will be given a text.
    Extract as many question-and-answer pairs.
    Each answer may have multiple questions; be sure to cover as many as possible.
    Return a JSON array of the following schema:

    {
        "questions_and_answers": [
            {
                "question": "string",
                "answer": "string",
            },
            ...
        ]
    }
    """


jeopardy_bot = SimpleBot(
    system_prompt=jeopardy_bot_sysprompt(),
    # model_name="gpt-3.5-turbo-1106",
    # model_name="ollama/tinyllama",
    model_name="mistral/mistral-tiny",
    json_mode=False,
)

In [None]:
import litellm

litellm.set_verbose = False

In [None]:
jeopardy_bot("The capital of France is Paris.")

In [None]:
# json.loads(jeopardy_bot("The capital of France is Paris.").content)

We can incorporate Jeopardy Bot into the embedding process. This is how we will do it. Store the questions in the vector DB under the collection `{collection_name}_questions`, and within the metadata, store the hash of the document. Then, when we get asked a question, we do a vector similarity search against `{collection_name}_questions`, get the top 1 or 2 relevant results, and then use the hash to retrieve the original document.

In [None]:
question_store = DocumentStore(collection_name="codonbert_questions")
document_store = DocumentStore(collection_name="codonbert")
question_store.reset()
document_store.reset()

In [None]:
from llamabot.components.docstore import DocumentStore
from hashlib import sha256

for chunk in chunks:
    doc_id = sha256(chunk.text.encode()).hexdigest()
    document_store.append(chunk.text, metadata=dict(doc_id=doc_id))
    q_and_a = json.loads(jeopardy_bot(chunk.text).content)
    for q_and_a in q_and_a["questions_and_answers"]:
        q_a_concat = f"Q: {q_and_a['question']} A: {q_and_a['answer']}"
        question_store.append(q_a_concat, metadata=dict(parent_doc=doc_id))

In [None]:
q_and_as = question_store.retrieve("What is CodonBERT?")
q_and_as

In [None]:
query_text = "What papers did this paper cite?"
result = question_store.collection.query(query_texts=query_text, n_results=20)
result["metadatas"][0]

In [None]:
# Get the unique parent_doc IDs:
# preserve order
parent_doc_ids = []
for metadata in result["metadatas"][0]:
    if metadata["parent_doc"] not in parent_doc_ids:
        parent_doc_ids.append(metadata["parent_doc"])
# parent_doc_ids = set([metadata["parent_doc"] for metadata in result["metadatas"][0]])
parent_doc_ids

In [None]:
results = document_store.collection.query(
    query_texts=query_text, where={"doc_id": {"$in": list(parent_doc_ids)}}, n_results=3
)
len(results["documents"][0])
relevant_documents = results["documents"][0]
relevant_documents

In [None]:
# Now, compose the final prompt that includes the Q&A results and the retrieved documents


@prompt
def q_and_a_prompt(query, q_and_a_results, relevant_documents):
    """Q&A Results:

        {{ q_and_a_results }}

    Relevant documents:

        {{ relevant_documents }}

    Query:

        {{ query }}
    """


response_bot = SimpleBot(
    "Based on Q&A results and relevant documents, please answer the query."
)
response_bot(q_and_a_prompt(query_text, q_and_as, relevant_documents))

In [None]:
# ChatBot as a mixin
from llamabot.bot.simplebot import SimpleBot
from llamabot.components.history import History
from llamabot.components.messages import HumanMessage, AIMessage


class ChatBot(SimpleBot, History):
    def __init__(
        self,
        system_prompt: str,
        session_name: str,
        temperature=0.0,
        model_name="mistral/mistral-tiny",
        stream=True,
        response_budget=2_000,
    ):
        SimpleBot.__init__(
            self,
            system_prompt=system_prompt,
            temperature=temperature,
            model_name=model_name,
            stream=stream,
        )
        History.__init__(self, session_name=session_name)
        self.model_name = model_name
        self.response_budget = response_budget
        self.session_name = session_name

    def __call__(self, message: str) -> AIMessage:
        """Call the ChatBot.

        :param human_message: The human message to use.
        :return: The response to the human message, primed by the system prompt.
        """
        human_message = HumanMessage(content=message)
        history = self.retrieve(
            query=human_message, character_budget=self.response_budget
        )
        messages = [self.system_prompt] + history + [human_message]
        response = self.generate_response(messages)
        # autorecord(human_message, response.content)

        self.append(human_message)
        self.append(response)
        return response

In [None]:
chatbot = ChatBot("You are a non-chatty bot.", session_name="chat_session")
chatbot("Hello!")

In [None]:
# We need to make the document store now
from llamabot.components.docstore import DocumentStore

document_store = DocumentStore(collection_name="codonbert_documents")

In [None]:
kgbot(chunks[0].text)

In [None]:
triplets = []
for chunk in chunks:
    triplets.extend(kgbot(chunk.text))

In [None]:
# # Draw a sample
# rng = torch.Generator(device="cuda")
# rng.manual_seed(789001)

# sequence = generator("Give me a character description", rng=rng)
# print(sequence)
# # {
# #   "name": "clerame",
# #   "age": 7,
# #   "armor": "plate",
# #   "weapon": "mace",
# #   "strength": 4171
# # }

# sequence = generator("Give me an interesting character description", rng=rng)
# print(sequence)
# # {
# #   "name": "piggyback",
# #   "age": 23,
# #   "armor": "chainmail",
# #   "weapon": "sword",
# #   "strength": 0
# # }

In [None]:
from pyprojroot import here

qb = QueryBot(
    "You are an expert in answering questions about a paper that you will be provided.",
    collection_name="FOCA_paper",
    document_paths=here() / "data" / "JMLR-23-0380-1.pdf",
    model_name="mistral/mistral-medium",
)

In [None]:
qb("What is POF?")

In [None]:
from llamabot.components.messages import HumanMessage, SystemMessage
from unstructured.chunking.title import chunk_by_title

docstore = DocumentStore(collection_name="querybot_stuff")
history = History(session_name="querybot_stuff")

bot = SimpleBot(
    "You are an expert at reading papers.", model_name="mistral/mistral-medium"
)


doc_path = here() / "data" / "JMLR-23-0380-1.pdf"
document = magic_load_doc(doc_path)

In [None]:
docstore.append(document[0].text)

In [None]:
splitted_document = split_document(document[0], chunk_size=4_000, chunk_overlap=200)
len(splitted_document)

In [None]:
docstore.collection

In [None]:
for doc in splitted_document:
    docstore.append(doc.text)

In [None]:
# Summarize by doing a summary of each chunk
# splitted_document[0]

In [None]:
from llamabot.prompt_manager import prompt
from llamabot.prompt_library.zotero import docbot_sysprompt, paper_summary


@prompt
def summarization_bot_prompt(text_to_summarize):
    """Here is the text to summarize:

    {{ text_to_summarize }}

    Your summary should not be a mere regurgitation of the abstract.
    Rather, your summary should highlight the key findings,
    methodology, and implications.
    """


import os

os.environ["TOKENIZERS_PARALLELISM"] = "true"

summarization_bot = SimpleBot(
    system_prompt=docbot_sysprompt(),
    model_name="mistral/mistral-medium",
)

new_summary = summarization_bot(summarization_bot_prompt(splitted_document[0].text))

In [None]:
# Help define certain statements


@prompt
def define(term, text):
    """Here is a text: {{ text }}

    Please help me see if `text` defines the term {{ term }}.

    Based on that infrmation, fill out the following JSON for me:

    {
        "term": "term",
        "definition": "definition",
        "context": "exact quote from text",
        "source": "source"
    }

    If the term is not defined in the text, then return None as values as follows:

    {
        "term": "term",
        "definition": None,
        "context": None,
        "source": None
    }
    """


@prompt
def key_terms(text):
    """
    Here is a text: {{ text }}

    Within the text, identify key terms that have definitions present in the text.

    Then, for each term, fill out the following JSON for me:

    {
        "term": "term",
        "definition": "definition",
        "context": "exact quote from text",
        "source": "source"
    }

    Return an array of JSONs.
    Ensure that each term is only defined once.
    """


definition_bot = SimpleBot(
    system_prompt="You are a bot that searches texts for definitions of terms.",
    model_name="mistral/mistral-tiny",
)


definition_bot(define("FOCA", splitted_document[0].text))

Build a knowledge graph of the document. Schema:

- (hash node)--is hash of--(text)
- (definition)--is defined in--(hash)
- (concept)--relates to--(definition)

In [None]:
import networkx as nx
from hashlib import sha256

G = nx.Graph()

for doc in splitted_document:
    # add node, node = hash, attribute text=doc.text
    G.add_node(
        sha256(doc.text.encode("utf-8")).hexdigest(), text=doc.text, node_type="text"
    )

In [None]:
paper_definitions = {}
import json

for doc in splitted_document:
    try:
        parsed_definitions = definition_bot(key_terms(doc.text)).content
        definitions = json.loads(parsed_definitions)
        for definition in definitions:
            paper_definitions[definition["term"]] = definition

    except Exception as e:
        print(e)

In [None]:
paper_definitions.keys()

In [None]:
from llamabot.components.retrieve_messages_up_to_budget import (
    retrieve_messages_up_to_budget,
)

query = "Based on the content below from a paper, please summarize the paper for me."
results = docstore.retrieve(query, n_results=50)
# results[0][3]

In [None]:
# results

The embedding that you use affects retrieval, but not synthesis.
So that means we can use an entirely locally hosted embedding model,
such as Sentence Transformers,
or we can use a remotely hosted embedding model,
such as OpenAI's embeddings API.

I am also going to see how much I can decouple from llama_index.

In [None]:
from unstructured.partition.auto import partition
from unstructured.cleaners.core import (
    clean,
    replace_unicode_quotes,
    group_broken_paragraphs,
)
from unstructured.chunking.title import chunk_by_title
from pyprojroot import here

In [None]:
from chromadb.utils import embedding_functions
import os

openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-ada-002"
)
try:
    client.delete_collection("dshiring")
except ValueError:
    pass

In [None]:
collection = client.create_collection("dshiring", get_or_create=True)

In [None]:
elements = partition(here() / "data" / "dshiring.pdf")
chunks = chunk_by_title(elements, new_after_n_chars=5_000)

In [None]:
def cleanup(elements: list, cleaning_funcs: list):
    cleaned_elements = []
    for element in elements:
        for cleaning_func in cleaning_funcs:
            if isinstance(element, str):
                element = cleaning_func(element)
            else:
                element = cleaning_func(element.text)
        cleaned_elements.append(element)
    return cleaned_elements


from functools import partial

clean = partial(
    clean, bullets=True, extra_whitespace=True, dashes=True, trailing_punctuation=True
)

In [None]:
cleaned_elements = cleanup(
    chunks, cleaning_funcs=[replace_unicode_quotes, clean, group_broken_paragraphs]
)

In [None]:
cleaned_elements[148]

In [None]:
chunk_texts = list(set([c for c in cleaned_elements]))
ids = [sha256(c.encode()).hexdigest() for c in chunk_texts]

In [None]:
len(chunk_texts)

In [None]:
chunk_texts[142]

In [None]:
keyword_bot = SimpleBot(
    "You are a knowledge assistant. I will give you a broad topic that I am interested in. You will return for me example (subject, predicate, object) keywords that are relevant for that field."
)
keyword_bot("Data science")

In [None]:
chunk_texts[145]

In [None]:
kg_bot = SimpleBot(
    "You are a knowledge parsing expert bot. "
    "You accept chunks of texts and return JSON-formatted property graph information. "
    "Do not lift from the text verbatim. "
    "Ensure that the top-level of the JSON is always an entity. "
    "Check your answers thrice before returning them, ensuring accuracy. "
    "Then, format the JSON into triplets.",
    model_name="gpt-4",
)
kg_bot(chunk_texts[145])

In [None]:
collection.add(documents=chunk_texts, ids=ids)

In [None]:
collection.get()["documents"][-10:]

In [None]:
results = collection.query(
    query_texts="What does Monica say we need to be prepared for data science?"
)

In [None]:
results["documents"][0]

In [None]:
for result in results["documents"][0]:
    print(len(result))

Design choices:

- There is only one ChromaDB database by default, unless user specify otherwise.
- Each collection of documents is its own collection within the ChromaDB database.
- User gets to name that collection of documents, so this is a required argument.
  - Doing so saves us a ton of complexity in inferring what constitutes a collection in the user's mind.
  - Also lets users have control over memorable names for collections of documents.
- Embedding model is specified per collection, defaults to Sentence Transformers, which seems to be good enough and, crucially, _free and local_.

RAGHistory can be built on top of this. Every chat session gets its own collection prefixed with `chat-<date>`, and when the user asks a question, we query that chat history collection for the most relevant context, and then stuff that context into the prompt for SimpleBot to generate a response.

In [None]:
from llamabot import QueryBot


class QueryBot:
    def __init__(
        self,
        system_prompt,
        collection_name: str,
        temperature=0.0,
        model_name=default_language_model(),
        streaming=True,
        response_budget=2_000,
        db_path: str = str(Path.home() / ".llamabot" / "chroma.db"),
    ):
        self.chroma_client = chromadb.PersistentClient(path=db_path)
        self.collection = self.chroma_client.create_collection(
            collection_name, get_or_create=True
        )

        self.bot = SimpleBot(
            system_prompt=system_prompt,
            temperature=temperature,
            model_name=model_name,
            streaming=streaming,
        )
        self.model_name = model_name
        self.chat_history: History = History()
        self.response_budget = response_budget

    def __call__(self, human_message: str) -> AIMessage:
        self.chat_history.append(HumanMessage(content=human_message))
        history = self.chat_history.retrieve(character_budget=2_000)
        messages = [self.bot.system_prompt] + history
        response = self.bot.generate_response([m.model_dump() for m in messages])
        self.chat_history.append(response)
        return response