***Parse PDF files***

In [None]:
%pip install -r ./requirements.txt

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

Unstructured  Partition PDF

In [2]:
from langchain_community.document_loaders import UnstructuredPDFLoader



In [3]:
loader = UnstructuredPDFLoader("pdf_files/mock_client_slide_deck_lat.pdf")
recomm_deck = loader.load()
print(recomm_deck)

[Document(page_content='Current Themes and Recommendations\n\nPreventative Care\n\nAccess to Care\n\nBehavioral Health\n\nThemes\n\nPreventative\n\nER utilization and urgent care visits are low • Convenience\n\nER utilization and urgent care visits are low • Convenience\n\ncare increased\n\nTelehealth\n\nutilized by 30% of members\n\n\n\n\n\n\n\n30% of members had a behavioral health conditions 35% of behavioral members are dependents 25% of prescriptions dispensed without PCP visit\n\nRecommendations HealthHive\n\nPreventive Care Kit Use HealthHive to schedule and track preventive care appointments.\n\nCareConnect Access Platform\n\nChronic Conditions\n\nObesity and hypertension were top chronic conditions, both are risk factors for Type 2 diabetes ChronicCare Companion Program\n\nObesity and hypertension were top chronic conditions, both are risk factors for Type 2 diabetes ChronicCare Companion Program\n\nPharmacy\n\nSpecialty drugs were a large part of expenses of high cost claims\

Add client_name & year to the Metadata

In [4]:
recomm_deck[0].metadata['client_name'] = 'mock_client'
recomm_deck[0].metadata['year'] = '2024'


In [18]:
print(recomm_deck[0].metadata)

{'source': 'pdf_files/mock_client_slide_deck_lat.pdf', 'client_name': 'mock_client', 'year': '2024'}


Embed & add documents to VectorStore

In [6]:
import chromadb

from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
new_client = chromadb.EphemeralClient()
vector_stor = Chroma.from_documents(recomm_deck, embeddings, client=new_client, collection_name = "openai_collection")


Self Querying Retriever

In [7]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI

Create Retriever

In [26]:
metadata_field_info = [
    AttributeInfo(
        name = "source",
        description="Name of the document",
        type="string",
    ),
    AttributeInfo(
        name = "client_name",
        description="Name of the client",
        type="string",
    ),
    AttributeInfo(
        name = "year",
        description="Document created year",
        type="string",
    ),

]
document_content_desc = "mock client recommendations"
llm = ChatOpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(llm,
                                        vector_stor,
                                        document_content_desc,
                                        metadata_field_info, search_kwargs={"k":1})


In [27]:
print(retriever.search_kwargs)

{'k': 1}


Relevant Documents

In [28]:
get_content = retriever.get_relevant_documents("what are curent themes for client mock_client?")

RAG prompt

In [29]:
from langchain import hub
rag_prompt = hub.pull("rlm/rag-prompt")
print(rag_prompt)

input_variables=['context', 'question'] metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]


In [30]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4", temperature=0)

In [31]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import HumanMessagePromptTemplate
from langchain_core.prompts import PromptTemplate

prompt = ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'],
    template = "You are an assistant for question answering tasks. Use the following pieces of retrieved context to answer the question. if you don't know the answer just say don't know. \nQuestion \nContext: {context} \nAnswer"))])
print(prompt)

input_variables=['context'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="You are an assistant for question answering tasks. Use the following pieces of retrieved context to answer the question. if you don't know the answer just say don't know. \nQuestion \nContext: {context} \nAnswer"))]


In [33]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
rag_chain = ({"context": retriever, "question": RunnablePassthrough()}
            | prompt
             | llm
             | StrOutputParser()
            )
rag_chain.invoke("What are the Preventive care recommendations suggested for mock_client in 2024?")

'The document discusses various themes and recommendations related to healthcare. Some of the themes include preventative care, access to care, and behavioral health. The document mentions that ER utilization and urgent care visits are low, and telehealth is utilized by 30% of members. It also states that 30% of members had a behavioral health condition. The top chronic conditions were obesity and hypertension, which are risk factors for Type 2 diabetes. The document recommends using HealthHive for scheduling and tracking preventive care appointments, CareConnect for finding nearby healthcare providers and services, and SpecialtyScript for finding nearby medical specialists. It also suggests using MindWave for accessing guided meditations and therapy sessions.'