In [1]:
# the purpose of this notebook is to try and create a vector DB on the contents of the AI Policy.

In [77]:
# use the beautiful soup loader
from langchain.document_loaders import BSHTMLLoader

In [78]:
# how is the doc being embedded
from langchain.embeddings import OpenAIEmbeddings

In [79]:
# how will text be split?
from langchain.text_splitter import CharacterTextSplitter

In [80]:
# now the database
from langchain.vectorstores import Chroma

In [81]:
from langchain.chat_models import ChatOpenAI

In [82]:
# first load the document
loader = BSHTMLLoader('./policy.html')
documents = loader.load()

In [83]:
# split the docs into chunks
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=500)
split_docs = text_splitter.split_documents(documents)



In [84]:
# now connect to embedding function
embedding_function = OpenAIEmbeddings()

In [85]:
# load back the doc
db_new_connection = Chroma(
    persist_directory='.',
    embedding_function=embedding_function
)

In [86]:
# find similar text first
question = 'Who and which organizations helped create the policy?'
similar_docs = db_new_connection.similarity_search(new_query)

In [87]:
# we need a MultiQuery
from langchain.retrievers.multi_query import MultiQueryRetriever

In [88]:
# finally, connnect the llm and use it for query
llm = ChatOpenAI(temperature=0)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=db_new_connection.as_retriever(),
    llm = llm
)

In [89]:
# get more logging in output
# logging behind scenes
import logging
logging.basicConfig()
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)

In [90]:
# finally execute the chat query and print the result
unique_docs = retriever_from_llm.get_relevant_documents(question)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. Which individuals and organizations played a role in the development of the policy?', '2. Can you provide information about the people and organizations involved in the creation of the policy?', '3. Who were the key contributors, both individuals and organizations, in the policy-making process?']


In [91]:
# finally, summarize the text that was retrieved
matching_docs = ''

for doc in unique_docs:
    matching_docs += doc.page_content

In [92]:
# now execute a new query to LLM
from langchain.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate, ChatPromptTemplate

In [93]:
system_template = 'You are an expert and analyzing the given text input and extracting the relevant information. Answer the user question based only the text provided and no external data'
system_prompt = SystemMessagePromptTemplate.from_template(system_template)

In [94]:
human_message = '''Please answer my {question}. Here is the relevant information below: 
```
{relevant_information}
```
'''
human_prompt = HumanMessagePromptTemplate.from_template(human_message)

In [95]:
chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_prompt])
request = chat_prompt.format_prompt(question=question,relevant_information=matching_docs ).to_messages()
#print(request)

[SystemMessage(content='You are an expert and analyzing the given text input and extracting the relevant information. Answer the user question based only the text provided and no external data'), HumanMessage(content='Please answer my Who and which organizations helped create the policy?. Here is the relevant information below: \n```\nAdvance the responsible use of AI\xa0in healthcare and the development of affordable and life-saving drugs. The Department of Health and Human Services will also establish a safety program to receive reports of—and act to remedy – harms or unsafe healthcare practices involving AI.\xa0\nShape AI’s potential to transform education\xa0by creating resources to support educators deploying AI-enabled educational tools, such as personalized tutoring in schools.\n\nSupporting Workers\nAI is changing America’s jobs and workplaces, offering both the promise of improved productivity but also the dangers of increased workplace surveillance, bias, and job displacement

In [96]:
response = llm(request)

In [97]:
response

AIMessage(content='Based on the given information, the organizations that helped create the policy are not explicitly mentioned.')