### Environment and Global Configuration

In [None]:
import os

In [None]:
if not os.environ.get("OPENAI_API_KEY"):
    raise ValueError("Please set OPENAI_API_KEY environment variable")

LLM_MODEL = "gpt-4o-mini"
LLM_TEMPERATURE = 0

## A simple ChatBot

In [None]:
from IPython.display import Markdown

from langchain_core.messages import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

Instanciating a model

In [None]:
base_model = ChatOpenAI(model=LLM_MODEL, temperature=LLM_TEMPERATURE)

A System Prompt and a query

In [None]:
BASE_PROMPT = """
You are a Financial Analyst. Do your best to help the client with their request based on your expertise. Give a succinct and clear response.
"""

In [None]:
request = "I want to invest in the tech sector. What are the best options?"

response = base_model.invoke(
    [
        SystemMessage(BASE_PROMPT),
        HumanMessage(request),
    ]
)

In [None]:
Markdown(response.content)

## Retrieval Augmented Generation (RAG)

In [None]:
import pickle

from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_core.messages import ToolMessage

In [None]:
EMBEDDING_MODEL = "text-embedding-3-small"
RETRIEVAL_K = 3

### Vector Database and Retrieval

Some utility functions for document handling

In [None]:
def load_documents(pickle_filepath: str) -> list[Document]:
    """Load documents from a pickle file."""
    with open(pickle_filepath, "rb") as file:
        return pickle.load(file)


def initialize_vector_store(document_chunks: list[Document]) -> Chroma:
    """Reset the Chroma collection and initialize a vector store using document chunks."""
    Chroma().reset_collection()
    embedding_model = OpenAIEmbeddings(model=EMBEDDING_MODEL)
    return Chroma.from_documents(documents=document_chunks, embedding=embedding_model)

Load documents from pickle file

In [None]:
data_dir = "../data/"
data_file = "bloomberg_financial_news_1k.pkl"

documents = load_documents(os.path.join(data_dir, data_file))

An example document

In [None]:
documents[0].metadata

In [None]:
Markdown(documents[0].page_content)

Initialize vector store and create a retriever.

TODO: use `.as_retriever`...

In [None]:
vector_store = initialize_vector_store(documents[:1000])
retriever = vector_store.as_retriever(search_kwargs={"k": RETRIEVAL_K})

Retrieve documents

`.invoke()`...

In [None]:
retriever.invoke("tech sector market trends")

### Augmented LLM

### Creating a tool

Create retrieval tool

In [None]:
from langchain_core.tools import tool


@tool
def retrieval(retrieval_query: str) -> list[Document]:
    """Retrieve documents based on a query."""
    return retriever.invoke(retrieval_query)  # TODO: invoke the ....

Biding to tool

In [None]:
RAG_PROMPT = """
You are a Financial Analyst with access to a Bloomberg Financial News database.

Query the database to help the client with their request. Give a succinct and clear response based on the information you find.
"""

tools = [retrieval]
tools_by_name = {tool.name: tool for tool in tools}
rag_model = base_model.bind_tools(tools)

In [None]:
request = "I want to invest in the tech sector. What are the best options?"

rag_response = rag_model.invoke(
    [
        SystemMessage(RAG_PROMPT),
        HumanMessage(request),
    ]
)

In [None]:
rag_response.content

In [None]:
rag_response.tool_calls

Query the tool

In [None]:
if rag_response.tool_calls:
    tool_call = rag_response.tool_calls[0]
    tool = tools_by_name[tool_call["name"]]
    documents = tool.invoke(tool_call["args"])

In [None]:
# TODO: creatze a string for message for model with `ToolMessage` ....

documents_str = "\n\n".join(
    [f"{doc.metadata['Headline']}\n\n{doc.page_content}\n" for doc in documents]
)

In [None]:
Markdown(documents_str)

Invoke the base model to avoid repeated queries

In [None]:
response = base_model.invoke(
    [
        # TODO: add start of messages
        SystemMessage(RAG_PROMPT),
        HumanMessage(request),
        rag_response,
        ToolMessage(content=documents_str, tool_call_id=tool_call["id"]),
    ]
)

In [None]:
Markdown(response.content)