In [3]:
# Import libraries
import os 
import dspy
from dotenv import load_dotenv

In [4]:
# Load the environment file with open ai key
load_dotenv()

True

In [5]:
# Langchain text splitter 
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings

# text_splitter = SemanticChunker(OpenAIEmbeddings())

In [6]:
# Load the pdf documents
from langchain_community.document_loaders import PyPDFDirectoryLoader
loader = PyPDFDirectoryLoader("data/")

In [7]:
# Extract content to pass in text splitter 
docs = [doc.page_content for doc in loader.load()]

In [8]:
# docs = text_splitter.create_documents(docs)
# print(docs[0].page_content)

from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=20000,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

docs = text_splitter.create_documents(docs)
print(docs[0])

page_content='PRU Health Critical Illness  
Extended Care III
Continuous cover for ongoing critical illness –  
with lump-sum financial support for cancer every year,  
up to a total of 660% cover while it persists
Critical Illness Protection 
Hong Kong Edition'


In [9]:
# Extract the document part from list of document objects returned by text splitter
docs_str = [doc.page_content for doc in docs]

In [10]:
# docs_str

In [11]:
# Define FAISS retriver 
from dspy.retrieve.faiss_rm import FaissRM
frm = FaissRM(docs_str)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]



In [45]:

# Configure generation model and retriver model for dspy
# colbertv2_wiki17_abstracts = dspy.ColBERTv2(docs)
turbo = dspy.OpenAI(model='gpt-3.5-turbo', max_tokens=1000)
dspy.settings.configure(lm=turbo, rm=frm)

In [46]:
# dspy.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts)

In [47]:
# Define the inputs and outputs for RAG
class ragSignature(dspy.Signature):
    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1000 words")

In [48]:
# Define RAG module
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(ragSignature)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [49]:
# Test the RAG pipeline without compiling
my_question = "what are the differences between Pru health critical illness extended care III and first protector II"
pred = RAG(10)
prediction = pred.forward(my_question)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [50]:
print(prediction.answer)

PRU Health Critical Illness Extended Care III and PRU Health Critical Illness First Protect II are both comprehensive critical illness insurance plans offered by PRUDENTIAL. However, there are several key differences between the two plans in terms of coverage, benefits, and premiums.

1. Disease Coverage:
- PRU Health Critical Illness Extended Care III covers 117 disease conditions, including cancer, heart attack, and stroke, providing up to 860% coverage against critical illnesses.
- PRU Health Critical Illness First Protect II covers 74 disease conditions, offering whole-life, lump-sum cover against the financial impact of these conditions.

2. Cancer Coverage:
- PRU Health Critical Illness Extended Care III provides extended coverage for cancer, including additional payments for new or continued cancer strikes and cancer treatment extra benefits.
- PRU Health Critical Illness First Protect II also covers cancer but may have different terms and conditions compared to Extended Care II

In [51]:
prediction.context

['2Now, more than ever, people live with critical illnesses – including cancer. The continuous \nand extended treatment needed, particularly for cancer, can have a major impact \non your family’s finances. PRU Health Critical Illness Extended Care III offers ongoing \nprotection with limited years of premium payment. Even if you claim for a critical illness, \nit keeps protecting you with cover for cancer, heart attack and stroke. The plan also \naims to give you continuous annual lump-sum financial support throughout your cancer \ntreatment. It also pays a benefit if you have to stay in an intensive care unit (ICU) for \nhealth issues beyond critical illnesses, including infectious diseases and injuries. \nPlan highlightsPRU Health Critical Illness Extended Care III\nCovers 117 disease conditions\nUp to 860% coverage against critical illnesses \n•  Multiple coverage for Cancer,  \nHeart Attack and Stroke after a 100% \nMajor Disease Benefit claim\n•  Extra cover for new or continued  

In [52]:
turbo.inspect_history(n=1)





Given the fields `context`, `question`, produce the fields `answer`.

---

Follow the following format.

Context: may contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: often between 1000 words

---

Context:
[1] «2Now, more than ever, people live with critical illnesses – including cancer. The continuous 
and extended treatment needed, particularly for cancer, can have a major impact 
on your family’s finances. PRU Health Critical Illness Extended Care III offers ongoing 
protection with limited years of premium payment. Even if you claim for a critical illness, 
it keeps protecting you with cover for cancer, heart attack and stroke. The plan also 
aims to give you continuous annual lump-sum financial support throughout your cancer 
treatment. It also pays a benefit if you have to stay in an intensive care unit (ICU) for 
health issues beyond critical illnesses, including infectious diseases and inj