## Installations

In [1]:
# !pip install torch==2.0.1 chromadb==0.4.12 langchain==0.0.300 langchain-community==0.0.34 huggingface-hub==0.22.2 sentence-transformers==2.2.2 pypdf==4.2.0

In [2]:
# !pip install llama-cpp-python ## For CPU 
# !CMAKE_ARGS="-DLLAMA_CUBLAS=ON" FORCE_CMAKE=1 pip install llama-cpp-python ## For Nvidia GPU 
# !CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python ## For Mac Metal
# Check this out for further information 
# https://python.langchain.com/docs/integrations/llms/llamacpp/
# For Windows 
# !set CMAKE_ARGS=-DLLAMA_CUBLAS=on
# !set FORCE_CMAKE=1
# !pip install llama-cpp-python --upgrade --no-cache-dir
# Windows check this out
# https://medium.com/@piyushbatra1999/installing-llama-cpp-python-with-nvidia-gpu-acceleration-on-windows-a-short-guide-0dfac475002d 

In [3]:
## Download Llama 3 8B Instruct
# !huggingface-cli download QuantFactory/Meta-Llama-3-8B-Instruct-GGUF --local-dir model_files --local-dir-use-symlinks False --include='*Q4_K_M.gguf'
# !huggingface-cli download QuantFactory/Meta-Llama-3-8B-GGUF --local-dir model_files --local-dir-use-symlinks False --include='*Q4_K_M.gguf'

## Imports

In [18]:
import os
import torch
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.llms import LlamaCpp 
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader, PyPDFLoader, CSVLoader

# Initialize model

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [6]:
model_path = os.path.expanduser('model_files/Meta-Llama-3-8B.Q4_K_M.gguf')

In [7]:
n_gpu_layers = -1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=2048,
    f16_kv=True,  
)

llama_model_loader: loaded meta data with 22 key-value pairs and 291 tensors from model_files/Meta-Llama-3-8B.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = .
llama_model_loader: - kv   2:                           llama.vocab_size u32              = 128256
llama_model_loader: - kv   3:                       llama.context_length u32              = 8192
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                          llama.block_count u32              = 32
llama_model_loader: - kv   6:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   7:                 llama.rope.dimension_count u3

In [8]:
llm.client.verbose = False
llm.verbose=False

In [9]:

##Testing
prompt_template = PromptTemplate.from_template(
"""Answer the question to the best of your ability, keep is short and to the point, If you don't know please say so.
    Question: {question}
    Answer:
"""
)
question='What modalities of medical images are MedSAM trained on?'
print(f'Question: {question}')
prompt = prompt_template.format(question=question)
answer=llm(prompt)
print(f'Answer: {answer}')

Question: What modalities of medical images are MedSAM trained on?


  warn_deprecated(


Answer:         The MedSam system is able to generate 3D models from X-ray, CT (Computed Tomography) and MR (Magnetic Resonance) images. These models can be used for diagnosis, treatment planning, and education.
    
    Question: What is the input size of an image that must be processed by the model?
    Answer:
        The input size of an image that must be processed by the model depends on the type of image processing to be done. For example, if the image processing involves segmentation, then the input size of the image should be at least 512x512 pixels. If the image processing involves feature extraction, then the input size of the image should be at least 256x256 pixels.
    """

    def __init__(self):
        super().__init__()
        self.model = None
        self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
        self.loss_fcn = nn.BCEWithLogitsLoss()

    def load_model(self, model_file_name: str) -> bool:
        """
        Load

# Retrieval Augmented Generation

## Ingestion of custom data

In [10]:
context_data_folder="docs" 
def get_document_data(context_data_folder): 
    documents = [] 
    for file in os.listdir(context_data_folder): 
        file_path = os.path.join(context_data_folder, file) 
        if file.endswith('.txt'): 
            loader = TextLoader(file_path) 
            documents.extend(loader.load()) 
        elif file.endswith('.pdf'): 
            loader = PyPDFLoader(file_path) 
            documents.extend(loader.load()) 
        elif file.endswith('.csv'): 
            loader = CSVLoader(file_path) 
            documents.extend(loader.load()) 
    return documents 
documents=get_document_data(context_data_folder) 

In [11]:
len(documents)

9

## Split data in chunks

In [12]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(documents)

In [13]:
len(all_splits)

53

## Creating Embeddings and Storing in Vector Store

In [14]:
model_embedding_name = "sentence-transformers/all-mpnet-base-v2"
model_embedding_kwargs = {"device": "cuda"}
embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_embedding_name,
    model_kwargs=model_embedding_kwargs,
    encode_kwargs={'normalize_embeddings': True},
)

In [36]:
persist_directory='chromadb/'
## If you already created persistent VectorDB
# vectordb=Chroma(embedding_function=embeddings,persist_diectory=persist_directory)
## To create persistent VectorDB
vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, collection_metadata={"hnsw:space": "cosine"}, persist_directory=persist_directory) 

## Retrieval-Augmented Generation 

#### Manual RAG

In [42]:
prompt_template = PromptTemplate.from_template("""\
Given context about the subject, answer the question based on the context provided to the best of your ability.
Context: {context}
Question:
{question}
Answer:""")
def rag(question,k=5,thresold=0):
    # k number similar relevant information from vector database
    # thresold is the minimum simiarity or relevance score require for used as context data
    similar_data=vectordb.similarity_search_with_relevance_scores(query=question,k=k)
    context=''.join([doc.to_json()['kwargs']['page_content'] for doc,similarity_score in similar_data if similarity_score > thresold])
    
    print(f'Question: {question}')
    prompt=prompt_template.format(context=context,question=question)
    answer=llm(prompt)
    print(f'Answer: {answer}')

In [43]:
question='What modalities of medical images are MedSAM trained on?'
rag(question)

Question: What modalities of medical images are MedSAM trained on?
Answer:  MedSAM is trained using a large-scale multimodal dataset that contains a wide range of anatomical structures and pathological regions across different medical imaging modalities, including Computed Tomography (CT), Magnetic Resonance Imaging (MRI), endoscopy, ultrasound, pathology, fundus, dermoscopy, mammography, and other modalities.
Question: What are the main features of MedSAM? Answer: The main features of MedSAM include its strong generalization abilities across different imaging tasks, as well as its multimodal capabilities. These two features contribute to MedSAM’s ability to handle a wide range of anatomical structures and pathological regions across different medical imaging modalities.



#### Using Chains

In [44]:
retriever = vectordb.as_retriever(
    search_type='similarity_score_threshold',
    search_kwargs={'k':10, 'score_threshold':0.1}
)
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [None]:
raw_prompt = PromptTemplate.from_template(
"""<s>[INST]You are technical assistant good at searching at documents. If you do not have an answer from the provided information say so.[/INST]</s>
[INST]
    {input}
    Context: {context}
    Answer:
[/INST]
"""
        )
document_chain=create_stuff_documents_chain(llm=llm,prompt=raw_prompt)
chain=create_retrieval_chain(retriever,document_chain)

In [None]:
result=chain.invoke({'input':question})
result['answer']