In [181]:
import os
import chromadb
import torch

In [182]:
from pprint import pprint

In [184]:
import dspy
from dspy.retrieve.chromadb_rm import ChromadbRM
from dspy.teleprompt import BootstrapFewShot

import groq
from groq import Groq

In [None]:
from configs import models_config
from configs import db_config

In [185]:
# from langchain_community.embeddings import HuggingFaceEmbeddings
import chromadb.utils.embedding_functions as embedding_functions

In [186]:
chroma_client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), db_config.DB_NAME))

In [187]:
from dotenv import load_dotenv
load_dotenv()

In [188]:
llama = dspy.GROQ(model=models_config.LM_NAME_GROQ, api_key=os.getenv('GROQ_API_KEY'), temperature=models_config.LM_TEMPERATURE, max_tokens=models_config.LM_MAX_TOKENS)

In [7]:
# device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
# model_embedding = HuggingFaceEmbeddings(model_name='BAAI/bge-small-en-v1.5', model_kwargs={'device': device})

In [189]:
huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
    api_key=os.getenv('HF_API_KEY'),
    model_name=models_config.EMBEDDING_MODEL_NAME
)

In [190]:
retrieve_model = ChromadbRM(
    db_config.COLLECTION_NAME,
    os.path.join(os.getcwd(), db_config.DB_NAME),
    embedding_function=huggingface_ef,
    k=models_config.RM_TOP_PASSAGES
)

In [191]:
dspy.settings.configure(lm=llama, rm=retrieve_model)

In [197]:
class GenerateAnswerSignature(dspy.Signature):
    """
    Answer based on the provided information, typically in 5-7 sentences. 
    Only answer if the question relates directly to the topic of cancer. 
    If the question does not directly match with the context, respond with 'Sorry, but I can provide you only the information about cancer.
    """

    context = dspy.InputField(desc="May contain relevant facts.")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="Answer based on the provided information, typically in 5-7 sentences.")

In [198]:
class CancerRAG(dspy.Module):
    def __init__(self, num_passages=models_config.RM_TOP_PASSAGES):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswerSignature)
        # self.generate_answer = dspy.ChainOfThought("context, question -> answer")
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [199]:
uncompiled_rag = CancerRAG()

In [200]:
test_query_uncompiled = "What is the best romance movie?"
response_uncompiled = uncompiled_rag(test_query_uncompiled)

In [228]:
print(response_uncompiled.answer)

Sorry, but I can provide you only the information about cancer.


In [204]:
test_query_1 = "What is lung carcinoid tumor?"
response_1 = uncompiled_rag(test_query_1)

In [229]:
print(response_1.answer)

Answer: A lung carcinoid tumor is a type of lung cancer that starts in neuroendocrine cells in the lungs.


In [206]:
test_query_2 = "Who is Harry Potter?"
response_2 = uncompiled_rag(test_query_2)

In [230]:
print(response_2.answer)

Sorry, but I can provide you only the information about cancer. The provided context does not relate to Harry Potter.


In [210]:
test_query_3 = "Tell me about survival rates of patients with thyroid cancer"
response_3 = uncompiled_rag(test_query_3)

In [231]:
print(response_3.answer)

According to the provided information, the 5-year relative survival rates for thyroid cancer are grouped based on how far the cancer has spread. The rates are as follows:

* For papillary thyroid cancer:
	+ Localized: >99.5%
	+ Regional: 99%
	+ Distant: 74%
	+ All SEER stages combined: >99.5%
* For follicular thyroid cancer:
	+ Localized: >99.5%
	+ Regional: 98%
	+ Distant: 67%
	+ All SEER stages combined: 98%
* For medullary thyroid cancer:
	+ Localized: >99.5%
	+ Regional: 92%
	+ Distant: 43%
	+ All SEER stages combined: 91%
* For anaplastic thyroid cancer:
	+ Localized: 39%
	+ Regional: 11%
	+ Distant: 4%
	+ All SEER stages combined: 8%

These numbers are based on people diagnosed with thyroid cancer between 2012 and 2018.


In [212]:
test_query_4 = "Tell me about treatment options for stomach cancer"
response_4 = uncompiled_rag(test_query_4)

In [232]:
print(response_4.answer)

Here is the answer:

When it comes to treatment options for stomach cancer, there are several approaches that may be considered. The first step is to determine the stage of the cancer, which will help guide the choice of treatment. If the cancer is localized and has not spread, surgery may be an option to remove the tumor and nearby lymph nodes. This can be a subtotal gastrectomy (removal of part of the stomach) or total gastrectomy (removal of the entire stomach). If the cancer has spread, treatment may focus on controlling the growth of the cancer and preventing or relieving symptoms. This may involve chemotherapy alone, chemotherapy plus immunotherapy, or chemotherapy along with radiation therapy. In some cases, targeted therapies such as trastuzumab may be added to the treatment plan. The goal of treatment will depend on the individual's specific situation and the stage of their cancer.


In [214]:
test_query_5 = "Do you like pinapple on pizza?"
response_5 = uncompiled_rag(test_query_5)

In [233]:
print(response_5.answer)

Sorry, but I can provide you only the information about cancer.


In [216]:
test_query_6 = "What are the symptoms of waldenstrom macroglobulinemia?"
response_6 = uncompiled_rag(test_query_6)

In [234]:
print(response_6.answer)

The symptoms of Waldenstrom macroglobulinemia (WM) include weakness, loss of appetite, fever, sweats, weight loss, and neuropathy (painful pins-and-needles feeling in the feet/legs).


In [218]:
test_query_7 = "Is Slack a good communication tool?"
response_7 = uncompiled_rag(test_query_7)

In [235]:
print(response_7.answer)

Sorry, but I can provide you only the information about cancer.


In [220]:
test_query_8 = "Tell me about best travel spots in France"
response_8 = uncompiled_rag(test_query_8)

In [236]:
print(response_8.answer)

Sorry, but I can provide you only the information about cancer.


In [222]:
test_query_9 = "Is it possible to treat pancreatic cancer?"
response_9 = uncompiled_rag(test_query_9)

In [237]:
print(response_9.answer)

Answer: Yes, it is possible to treat pancreatic cancer. According to the provided context, pancreatic cancer is treated based on its resectability - whether the pancreatic tumor has spread to other parts of the body and if it can be completely removed by a surgeon. Other factors, such as the patient's overall health, can also affect treatment options. The context mentions various treatment options, including surgery, ablation and embolization treatments, radiation therapy, chemotherapy, and immunotherapy. Additionally, targeted drugs and immunotherapy can be used to specifically target the changes in pancreatic cancer cells that help them grow.


In [241]:
test_query_10 = "What types of cancer can develop in children?"
response_10 = uncompiled_rag(test_query_10)

In [243]:
print(response_10.answer)

The types of cancers that can develop in children are different from those that develop in adults. The most common cancers in children are:

1. Leukemia
2. Brain and spinal cord tumors
3. Neuroblastoma
4. Wilms tumor
5. Lymphoma (including both Hodgkin and non-Hodgkin)
6. Rhabdomyosarcoma
7. Retinoblastoma
8. Bone cancer (including osteosarcoma and Ewing sarcoma)

These cancers are often the result of DNA (gene) changes in cells that take place very early in life, sometimes even before birth. Unlike many cancers in adults, childhood cancers are not strongly linked to lifestyle or environmental risk factors.
