In [None]:
# Building vector db from sections

In [2]:
import streamlit as st

from langchain.callbacks import StreamlitCallbackHandler
from langchain.agents import OpenAIFunctionsAgent, AgentExecutor
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
    AgentTokenBufferMemory,
)
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, AIMessage, HumanMessage
from langchain.prompts import MessagesPlaceholder
from langsmith import Client
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import os

In [3]:
local = True

In [5]:
def configure_retriever():
    if not os.path.exists('./PlanAndBuilding_chroma_db'):
        #print("does not exists")
        loader = DirectoryLoader("./sections")
        docs = loader.load()
        if local:
            embeddings = OpenAIEmbeddings()
        else:
            embeddings = OpenAIEmbeddings(openai_api_key=st.secrets["openai_api_key"])

        docsearch = Chroma.from_documents(docs, embeddings, persist_directory="./PlanAndBuilding_chroma_db")
        print("Persisting to disk: PlanAndBuilding_chroma_db")
        docsearch.persist()

        retriever = docsearch.as_retriever()

        return retriever
    else:
        if local:
            embeddings = OpenAIEmbeddings()
        else:
            embeddings = OpenAIEmbeddings(openai_api_key=st.secrets["openai_api_key"])
        # load from disk
        print("loading from disk: PlanAndBuilding_chroma_db")
        docsearch = Chroma(persist_directory="./PlanAndBuilding_chroma_db", embedding_function=embeddings)
        retriever = docsearch.as_retriever()
        return retriever

In [6]:
configure_retriever()

Persisting to disk: PlanAndBuilding_chroma_db


VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], metadata=None, vectorstore=<langchain.vectorstores.chroma.Chroma object at 0x00000202AA46FEE0>, search_type='similarity', search_kwargs={})

In [8]:
system_message = SystemMessage(
    content=(
        "You are a helpful chatbot who is tasked with answering questions about the contents of the Plan and Building Law. "
        "Unless otherwise explicitly stated, it is probably fair to assume that questions are about the Plan and Building Law. "
        "If there is any ambiguity, you probably assume they are about that."
    )
)

In [10]:
def reload_llm(model_choice="gpt-4", temperature=0):
    if local:
        llm = ChatOpenAI(temperature=temperature, streaming=True, model=model_choice, )
    else:
        llm = ChatOpenAI(temperature=temperature, streaming=True, model=model_choice, openai_api_key=st.secrets["openai_api_key"])

    message = system_message

    prompt = OpenAIFunctionsAgent.create_prompt(
        system_message=message,
        extra_prompt_messages=[MessagesPlaceholder(variable_name="history")],
    )

    tool = create_retriever_tool(
        configure_retriever(),
        "search_plan_and_building_law",
        "Search Plan and Building Law. This tool should be used when you want to get information from the Plan and Building Law."
    )
    tools = [tool]

    agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
    agent_executor = AgentExecutor(
        agent=agent,
        tools=tools,
        verbose=True,
        return_intermediate_steps=True,
    )
    memory = AgentTokenBufferMemory(llm=llm)
    print ("Reloaded LLM")
    return agent_executor, memory, llm


In [11]:
agent_executor, memory, llm = reload_llm()

loading from disk: PlanAndBuilding_chroma_db
Reloaded LLM
