In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from langchain_community.document_loaders import PyPDFLoader
file_path = "label.pdf"
loader = PyPDFLoader(file_path)
documents = loader.load()
len(documents)  # Should return the number of pages in the PDF

24

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitted_documents = [] 
chunk_size = 1000
chunk_overlap = 200
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
sections = text_splitter.split_documents(documents)

splitted_documents.extend(sections)

len(splitted_documents)

93

In [5]:
splitted_documents[10]

Document(metadata={'producer': 'Adobe PDF Library 11.0', 'creator': 'Acrobat PDFMaker 11 for Word', 'creationdate': '2019-04-04T13:11:17-04:00', 'author': 'fda/cder', 'keywords': 'Ibrance (palbociclib) Capsules', 'moddate': '2024-06-17T21:57:57-04:00', 'subject': 'Ibrance (palbociclib) Capsules', 'title': 'label', 'source': 'label.pdf', 'total_pages': 24, 'page': 2, 'page_label': '3'}, page_content='If Grade 3 on Day 15, continue IBRANCE at current dose to complete \ncycle and repeat complete blood count on Day 22. \nIf Grade 4 on Day 22, see Grade 4 dose modification guidelines below. \nConsider dose reduction in cases of prolonged (>1 week) recovery from \nGrade 3 neutropenia or recurrent Grade 3 neutropenia on Day 1 of \nsubsequent cycles. \nGrade 3 \nneutropeniab with \nfever ≥38.5 ºC \nand/or infection \nAt any time: \nWithhold IBRANCE until recovery to Grade ≤2. \nResume at the next lower dose. \nGrade 4 At any time: \nWithhold IBRANCE until recovery to Grade ≤2. \nResume at the 

In [8]:
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

In [10]:
persist_directory = "embeddings_db"
embedding_function = HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [11]:
vectordb = Chroma.from_documents(documents=splitted_documents, embedding=embedding_function, persist_directory=persist_directory)

In [13]:
import dspy
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
groq_api_key = os.getenv("GROQ_API_KEY")
lm = dspy.GROQ(model='moonshotai/kimi-k2-instruct', api_key=groq_api_key)
dspy.configure(lm=lm)

In [None]:
question = "What is capital of India?"
basic_chat = dspy.Predict("question -> response")

basic_chat(question=question)

Prediction(
    response='New Delhi'
)

In [27]:
retriever = vectordb.as_retriever()

def retrieve(inputs):
  return [doc.page_content for doc in retriever.invoke(inputs["question"])]

In [34]:
# Define our RAG module
class RAG(dspy.Module):
    def __init__(self):
        sign = dspy.Signature(
            "context, question -> response",
            instructions="Answer the question only based on the provided context. Do not fetch answers from external sources.",
        )
        self.response = dspy.Predict(sign)
        
    def forward(self, question):
        context = retrieve({"question": question})
        return self.response(context=context, question=question)

In [35]:
rag = RAG()

rag(question=question)

Prediction(
    response='The context provided does not contain any information about the capital of India.'
)

In [39]:
rag(question="Does Ibrance cause fatigue?")

Prediction(
    response='Yes, IBRANCE can cause fatigue.'
)

In [37]:
class COT_RAG(dspy.Module):
    def __init__(self):
        sign = dspy.Signature(
            "context, question -> response",
            instructions="Answer the question only based on the provided context. Do not fetch answers from external sources.",
        )
        self.respond = dspy.ChainOfThought(sign)
        
    def forward(self, question):
        context = retrieve({"question": question})
        return self.respond(context=context, question=question) 

In [38]:
cot_rag = COT_RAG()

cot_rag(question=question)

Prediction(
    rationale='Response: The context provided does not mention the capital of India.',
    response='Response: The context provided does not mention the capital of India.'
)

In [40]:
cot_rag(question='Does Ibrance cause fatigue?')

Prediction(
    rationale='Reasoning: Let\'s think step by step in order to determine whether IBRANCE causes fatigue. First, we need to look for any mention of fatigue or tiredness in the provided context. In [1], under the list of possible side effects, it explicitly states "tiredness" as a side effect. Similarly, in [2], under the common side effects, "tiredness" is again listed. Since "tiredness" is synonymous with fatigue, this confirms that IBRANCE can cause fatigue.',
    response='Yes, IBRANCE can cause fatigue (tiredness) as a side effect.'
)

In [44]:
cot_rag(question='How does it affect elderly patients?')

Prediction(
    rationale='Reasoning: Let\'s think step by step in order to determine how IBRANCE affects elderly patients. We need to look for any specific information in the context about elderly patients (those ≥65 years of age or ≥75 years of age) and how they respond to IBRANCE in terms of safety or effectiveness.\n\nLooking at the context, in section 8.5 "Geriatric Use," it states: "Of 444 patients who received IBRANCE in Study 1, 181 patients (41%) were ≥65 years of age and 48 patients (11%) were ≥75 years of age. Of 347 patients who received IBRANCE in Study 2, 86 patients (25%) were ≥65 years of age and',
    response='Response: No overall differences in safety or effectiveness of IBRANCE were observed between elderly patients (≥65 or ≥75 years of age) and younger patients.'
)

In [46]:
cot_rag(question='How does it affect pediatric patients?')

Prediction(
    rationale="Reasoning: Let's think step by step in order to determine how IBRANCE affects pediatric patients. The context mentions that toxicities in teeth independent of altered glucose metabolism were observed in rats, and it notes that other toxicities of potential concern to pediatric patients have not been evaluated in juvenile animals. Additionally, the pharmacokinetics of IBRANCE have not been evaluated in patients <18 years of age. Therefore, there is no specific information provided about how IBRANCE affects pediatric patients.",
    response='The pharmacokinetics of IBRANCE have not been evaluated in patients <18 years of age, and other toxicities of potential concern to pediatric patients have not been evaluated in juvenile animals.'
)

In [47]:
cot_rag(question='How does it affect cell cycle?')

Prediction(
    rationale="Reasoning: Let's think step by step in order to determine how palbociclib affects the cell cycle. The context states that palbociclib is an inhibitor of cyclin-dependent kinases (CDK) 4 and 6. These kinases, along with cyclin D1, are involved in signaling pathways that lead to cellular proliferation. Specifically, palbociclib reduces cellular proliferation of estrogen receptor (ER)-positive breast cancer cell lines by blocking progression of the cell from G1 into S phase of the cell cycle. This indicates that palbociclib halts the cell cycle at the G1 phase, preventing cells from entering the S phase where DNA replication occurs.",
    response='Palbociclib blocks'
)

In [50]:
cot_rag(question="Can it be passed with bodily fluids?")

Prediction(
    rationale="Let's think step by step in order to determine whether IBRANCE can be passed with bodily fluids. We need to look for any mention in the context about transmission through bodily fluids such as blood, semen, or breast milk.\n\n1. The context mentions that it is not known if IBRANCE passes into breast milk, and advises against breastfeeding during treatment and for 3 weeks after the last dose. This suggests some concern about transmission via breast milk, a bodily fluid.\n\n2. There is no explicit mention of transmission through other bodily fluids like blood or semen in the provided context.\n\n3. The context does not provide any information confirming or denying transmission through other bodily fluids.",
    response='The context does not provide enough information to confirm whether IBR'
)

In [41]:
class ReAct_RAG(dspy.Module):
    def __init__(self):
        sign = dspy.Signature(
            "context, question -> response",
            instructions="Answer the question only based on the provided context. Do not fetch answers from external sources.",
        )
        self.respond = dspy.ReAct(sign)
        
    def forward(self, question):
        context = retrieve({"question": question})
        return self.respond(context=context, question=question) 

In [42]:
react_rag = COT_RAG()

react_rag(question=question)

Prediction(
    rationale='Response: The context provided does not mention the capital of India.',
    response='Response: The context provided does not mention the capital of India.'
)

In [43]:
react_rag(question="How does Ibrance work?")

Prediction(
    rationale='Reasoning: Let\'s think step by step in order to determine how IBRANCE works. The context provided does not explicitly describe the mechanism of action of IBRANCE. However, it does state that "IBRANCE is a kinase inhibitor" [2]. While the context does not elaborate further on what this means or how it treats cancer, this is the only relevant information provided about how IBRANCE works.',
    response='IBRANCE is a kinase inhibitor.'
)

In [48]:
react_rag(question="Which medicines to avoid?")

Prediction(
    rationale="Reasoning: Let's think step by step in order to identify which medicines to avoid. The context mentions that IBRANCE and other medicines may affect each other, causing side effects. It also specifically advises avoiding grapefruit and grapefruit products during treatment with IBRANCE, as grapefruit may increase the amount of IBRANCE in the blood. Additionally, it instructs to tell the healthcare provider about all medicines taken, including prescription, over-the-counter medicines, vitamins, and herbal supplements, implying that some of these may interact with IBRANCE.",
    response='Avoid grapefruit and grapefruit products during treatment with IBRANCE. Also, inform your healthcare provider about all medicines you take, including prescription, over-the-counter medicines, vitamins, and herbal supplements, as some may'
)

In [49]:
react_rag(question="Can it be passed with bodily fluids?")

Prediction(
    rationale="Let's think step by step in order to determine whether IBRANCE can be passed with bodily fluids. We need to look for any mention in the context about transmission through bodily fluids such as blood, semen, or breast milk.\n\n1. The context mentions that it is not known if IBRANCE passes into breast milk, and advises against breastfeeding during treatment and for 3 weeks after the last dose. This suggests some concern about transmission via breast milk, a bodily fluid.\n\n2. There is no explicit mention of transmission through other bodily fluids like blood or semen in the provided context.\n\n3. The context does not provide any information confirming or denying transmission through other bodily fluids.",
    response='The context does not provide enough information to confirm whether IBR'
)