In [77]:
import os
from dotenv import load_dotenv
load_dotenv()
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_key = os.environ["AZURE_OPENAI_KEY"]
search_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
search_key = os.environ["AZURE_SEARCH_ADMIN_KEY"]

In [78]:
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI

from langchain.vectorstores.azuresearch import AzureSearch
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

## Setup your docs

In [79]:
from langchain_community.document_loaders import TextLoader

# loader = DirectoryLoader(r"C:\Users\skim333\OneDrive - KPMG\Documents\GitHub\azure-search-vector-samples\demo-python\data\documents_board", glob="**/*.txt")
loader = DirectoryLoader("../data/documents_board", glob="**/*.txt")
documents = loader.load()

In [80]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
nodes = text_splitter.split_documents(documents) #chucked docs

In [81]:
nodes

[Document(page_content='Board Members:\n\nMike CEO\n\nmike@mycompany.com\n\nCEO\n\nOlivia Johnson\n\nolivia@mycompany.com\n\nCFO\n\nAlex Rodriguez\n\nalex@mycompany.com\n\nCOO\n\nExecutives:\n\nEmily Smith\n\nemily@mycompany.com\n\nCTO\n\nVictoria Chen\n\nvictoria@mycompany.com\n\nCMO\n\nMichael Turner\n\nmichael@mycompany.com\n\nCIO\n\nJessica Baker\n\njessica@mycompany.com\n\nVP of Sales\n\nEthan White\n\nethan@mycompany.com\n\nVP of Marketing\n\nRyan Walker\n\nryan@mycompany.com\n\nVP of Operations\n\nAmanda Reed\n\namanda@mycompany.com\n\nChief Legal Officer', metadata={'source': '..\\data\\documents_board\\BoardmemberList.txt'}),
 Document(page_content='Neclist', metadata={'source': '..\\data\\documents_board\\NECList.txt'}),
 Document(page_content="scenario\n\nask: how do I create a board paper agent: what are agnedas?\n\nask: *performance *operation *profit agent: eamiling NEC(Board members). List below List: ceo xyz, coo xyz, etc\n\n[need to fix this part as we need to wait/cha

## Embedding/vectorisation

In [82]:
embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002",
    api_key=azure_openai_key,
    azure_endpoint=azure_openai_endpoint,
    api_version="2023-09-01-preview",
    chunk_size=1 
)
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=search_endpoint,
    azure_search_key=search_key,
    index_name="boardai03",
    embedding_function=embeddings.embed_query,
)
vector_store.add_documents(documents=nodes)
print("done")

done


## Query(RAG search -> LLM -> Final answer)

[rag]

In [83]:
query = "Who should be involved in the creation of a board paper?"


# query = "Give me names of board members?"


docs = vector_store.similarity_search(
    query=query,
    k = 2,
    search_type = "hybrid"
)
docs

[Document(page_content="How to create and circulate board paper\n\n1) Based on the approved agenda(Usually approved agendas are performance, operation, profit + anything we want to add this time. ), Secretary emails NEC. (assume NEC is same as board members for now). If you don't have it, ask. Normally fixed ones are: performance, operation, profit\n\n2) NEC writes paper.\n\n3) Secretary circulates and distributes the paper to board members.", metadata={'source': '..\\data\\documents_board\\Template.txt'}),
 Document(page_content="How to create and circulate board paper\n\n1) Based on the approved agenda(Usually approved agendas are performance, operation, profit + anything we want to add this time. ), Secretary emails NEC. (assume NEC is same as board members for now). If you don't have it, ask. Normally fixed ones are: performance, operation, profit\n\n2) NEC writes paper.\n\n3) Secretary circulates and distributes the paper to board members.", metadata={'source': '..\\data\\document

[llm]

In [84]:
llm = AzureChatOpenAI(
    deployment_name="gpt-4",
    api_key=azure_openai_key,
    azure_endpoint=azure_openai_endpoint,
    api_version="2023-09-01-preview",
    # temperature=0
)

In [85]:
retriever = vector_store.as_retriever(search_key="hybrid", search_kwargs={"k": 5})


prompt = hub.pull("rlm/rag-prompt")
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)

In [86]:
docs = retriever.get_relevant_documents(query)
docs

[Document(page_content="How to create and circulate board paper\n\n1) Based on the approved agenda(Usually approved agendas are performance, operation, profit + anything we want to add this time. ), Secretary emails NEC. (assume NEC is same as board members for now). If you don't have it, ask. Normally fixed ones are: performance, operation, profit\n\n2) NEC writes paper.\n\n3) Secretary circulates and distributes the paper to board members.", metadata={'source': '..\\data\\documents_board\\Template.txt'}),
 Document(page_content="How to create and circulate board paper\n\n1) Based on the approved agenda(Usually approved agendas are performance, operation, profit + anything we want to add this time. ), Secretary emails NEC. (assume NEC is same as board members for now). If you don't have it, ask. Normally fixed ones are: performance, operation, profit\n\n2) NEC writes paper.\n\n3) Secretary circulates and distributes the paper to board members.", metadata={'source': '..\\data\\document

In [87]:
res = rag_chain.invoke(query)
res

'The creation of a board paper should involve the Secretary and the NEC, which can be assumed to be the same as the board members. The Secretary is responsible for initiating the process based on the approved agenda, while the NEC is tasked with writing the paper.'

In [88]:
import textwrap
print("\n".join(textwrap.wrap(res, width = 140)))

The creation of a board paper should involve the Secretary and the NEC, which can be assumed to be the same as the board members. The
Secretary is responsible for initiating the process based on the approved agenda, while the NEC is tasked with writing the paper.
