 # Cache Backed FAISS.

 https://python.langchain.com/docs/how_to/caching_embeddings/

In [1]:
from langsmith import traceable
from langchain_ollama import ChatOllama
from langchain_core.tools import tool
from langchain.storage import LocalFileStore
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import DirectoryLoader

#vector retriever tools
from langchain_community.vectorstores import FAISS
from langchain.embeddings import CacheBackedEmbeddings
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.tools.retriever import create_retriever_tool

#recursive why does that sound better?
#from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_text_splitters import CharacterTextSplitter

#from langgraph.checkpoint.memory import MemorySaver
#from langgraph.prebuilt import create_react_agent
from typing import List
#from langchain_core.tools import tool
from langchain_ollama import ChatOllama
import json






underlying_embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

store = LocalFileStore("./cache/")

cached_embedder = CacheBackedEmbeddings.from_bytes_store(
    underlying_embeddings, store, namespace="all-MiniLM-L6-v2"
)


  underlying_embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
  from tqdm.autonotebook import tqdm, trange


In [2]:
source_folder = "input"

### Note the method

In [3]:
list(store.yield_keys())

['all-MiniLM-L6-v21e5c5f16-2189-5d11-8943-a605b5b02e01',
 'all-MiniLM-L6-v2319c63c0-1f35-51dc-9a04-e1fc4d7b1243',
 'all-MiniLM-L6-v2329ee360-1b33-5515-9b76-2403727f8cd8',
 'all-MiniLM-L6-v249ca3786-dc64-5098-bac6-a4cee17c85d5',
 'all-MiniLM-L6-v26c96f969-60ec-50d7-ab28-276115267a97',
 'all-MiniLM-L6-v28288888f-b8a7-5dac-8d9d-11c84fd302a9',
 'all-MiniLM-L6-v2a625e1dc-4c12-54cf-82da-d4c8ba83d14e',
 'all-MiniLM-L6-v2a7bc4680-996e-58fc-a9a0-e873c0702370',
 'all-MiniLM-L6-v2af30bf4c-82fb-544a-86b0-fdee8485a842',
 'all-MiniLM-L6-v2cf674ad3-5ef8-5f4c-a89a-97a284b71379',
 'all-MiniLM-L6-v2dee9e127-6122-5300-b1a6-0fee083f2982',
 'all-MiniLM-L6-v2eeb1f7b2-241a-55ff-828c-2d4b2dd4c26c',
 '.ipynb_checkpoints\\all-MiniLM-L6-v2eeb1f7b2-241a-55ff-828c-2d4b2dd4c26c-checkpoint']

In [4]:
raw_documents = TextLoader("./input/story.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)

RuntimeError: Error loading ./input/story.txt

In [None]:
%%time
db = FAISS.from_documents(documents, cached_embedder)

In [None]:
%%time
db2 = FAISS.from_documents(documents, cached_embedder)

In [None]:
list(store.yield_keys())[:5]

In [None]:
retriever = db.as_retriever()

In [None]:
result = retriever.invoke("Gwen Stills")

In [None]:
print(type(result))

In [None]:
for el in result:
    print(f"{el}\n")

In [None]:
@tool
def query_documents(question):
    """
    Uses RAG to query documents for information to answer a question
    that requires specific context that could be found in documents

    Example call:

    query_documents("Who is Gwen Stills?")
    Args:
        question (str): The question the user asked that might be answerable from the searchable documents
    Returns:
        str: The list of texts (and their sources) that matched with the question the closest using RAG
    """
    similar_docs = db.similarity_search(question, k=3)
    docs_formatted = list(map(lambda doc: f"Source: {doc.metadata.get('source', 'NA')}\nContent: {doc.page_content}", similar_docs))

    return str(docs_formatted)

available_functions = {
    "query_documents": query_documents
}

def get_model():
    llm = ChatOllama( model="llama3.1", temperature=0,)
    return llm

llm = get_model()
llm_with_tools = llm.bind_tools([query_documents])

Steps to invoke a function call using Chat Completions API:

Step 1: Prompt the model with content that may result in model selecting a tool to use. The description of the tools such as a function names and signature is defined in the 'Tools' list and passed to the model in API call. If selected, the function name and parameters are included in the response.

Step 2: Check programmatically if model wanted to call a function. If true, proceed to step 3.

Step 3: Extract the function name and parameters from response, call the function with parameters. Append the result to messages.

Step 4: Invoke the chat completions API with the message list to get the response.


In [None]:
# Step #1: Prompt with content that may result in function call. In this case the model can identify the information requested by the user is potentially available in the database schema passed to the model in Tools description. 
messages = [{
    "role":"user", 
    "content": "Who is Gwen Stills?"
}]

In [None]:
response_message = llm_with_tools.invoke(messages)

In [None]:
print(response_message.tool_calls)

In [None]:
tool_calls = response_message.tool_calls

for tool_call in tool_calls:
    #print(tool_call.keys())
    tool_call_id = tool_call['id']
    tool_name = tool_call['name']
    args = tool_call['args']['question']

    if tool_name in available_functions:
        results = available_functions[tool_name](args)

        messages.append({
            "role":"tool", 
            "tool_call_id":tool_call_id, 
            "name": tool_name, 
            "content":results
        })
        # # Step 4: Invoke the chat completions API with the function response appended to the messages list
        # # Note that messages with role 'tool' must be a response to a preceding message with 'tool_calls'
        # model_response_with_function_call = client.chat.completions.create(
        #     model="gpt-4o",
        #     messages=messages,
        # )  # get a new response from the model where it can see the function response
        model_response_with_function_call = llm_with_tools.invoke(messages)

        print(model_response_with_function_call)
        #print(model_response_with_function_call.choices[0].message.content)
    else: 
        print(f"Error: function {tool_function_name} does not exist")
else: 
    # Model did not identify a function to call, result can be returned to the user 
    print(response_message.content) 
    

In [None]:
model_response_with_function_call.content

In [None]:
    # Step 2: determine if the response from the model includes a tool call.   
tool_calls = response_message.tool_calls
if tool_calls:
    # If true the model will return the name of the tool / function to call and the argument(s)  
    tool_call_id = tool_calls[0]
    tool_function_name = tool_calls[0].function.name
    tool_query_string = json.loads(tool_calls[0].function.arguments)['query']

    # Step 3: Call the function and retrieve results. Append the results to the messages list.      
    if tool_function_name == 'query_documents':
        results = query_documents(conn, tool_query_string)
        
        messages.append({
            "role":"tool", 
            "tool_call_id":tool_call_id, 
            "name": tool_function_name, 
            "content":results
        })
        
        # # Step 4: Invoke the chat completions API with the function response appended to the messages list
        # # Note that messages with role 'tool' must be a response to a preceding message with 'tool_calls'
        # model_response_with_function_call = client.chat.completions.create(
        #     model="gpt-4o",
        #     messages=messages,
        # )  # get a new response from the model where it can see the function response
        model_response_with_function_call = llm_with_tools.invoke(messages)
        
        print(model_response_with_function_call.choices[0].message.content)
    else: 
        print(f"Error: function {tool_function_name} does not exist")
else: 
    # Model did not identify a function to call, result can be returned to the user 
    print(response_message.content) 