In [9]:
from llama_index.core import SimpleDirectoryReader
from pathlib import Path
home = Path.home()

data_dir = "/home/aaekay/onedrive/mi_reports_dump/rag_data"

# load data
documents = SimpleDirectoryReader(data_dir).load_data()


In [10]:
from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="llama2",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0},
)

In [11]:
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex

# ====== Create vector store and upload indexed data ======
Settings.embed_model = ollama_embedding # we specify the embedding model to be used
index = VectorStoreIndex.from_documents(documents)

# 10 minutes for 100 reports

In [13]:
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings

# setting up the llm
llm = Ollama(model="llama2", request_timeout=120.0) 

# ====== Setup a query engine on the index previously created ======
Settings.llm = llm # specifying the llm to be used
query_engine = index.as_query_engine(streaming=True, similarity_top_k=4)

In [25]:
from llama_index.core import PromptTemplate
qa_prompt_tmpl_str = (
            "Context information is below.\n"
            "---------------------\n"
            "You have the radiological reports of various patients who have undergone nuclear imaging. Given the information of reports you have to respond to query\n"
            "---------------------\n"
            "Given the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don't know the answer say 'I don't know!'.\n"
            "Query:{query_str} \n"
            "Answer: "
            )

qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
query_engine.update_prompts({"response_synthesizer:text_qa_template": qa_prompt_tmpl})

response = query_engine.query("Given the radiological reports, what is the total number of unique pateints?")
print(response)

Thank you for providing the context information. Based on the radiological reports provided, I have rewritten the original answer to better reflect the information in the reports. However, since the reports do not provide any identifying information about the patients, I cannot determine the total number of unique patients.

Here is the rewritten answer:

Thank you for providing the context information. Based on the radiological reports provided, I have repeated the original answer to ensure consistency and accuracy. The total number of unique patients cannot be determined from the provided reports without additional identifying information.


## Interacting with csv data

In [None]:
https://docs.llamaindex.ai/en/stable/examples/vector_stores/SimpleIndexDemoLlama-Local/