In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.embeddings import resolve_embed_model
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from IPython.display import Markdown, display
import chromadb

# log in to VPN or harvard wifi for endpoint

In [None]:
ccb_endpoint = 'http://compute-gc-17-255.o2.rc.hms.harvard.edu:11434'


In [None]:
# hf embedding model
oembed_model =  HuggingFaceEmbedding(model_name="all-MiniLM-L6-v2")

# ollama
llama = Ollama(model="llama2", request_timeout=30.0, base_url=ccb_endpoint, temperature= 0)
#Ollama()

### Test the LLM without RAG

In [None]:
response = llama.complete("<>")
print(response)

### load from existing chromaDB, your docker container has a chroma database with all the manuals(vingettes) for the top 500 most downloaded bioconductor packages

In [None]:

# load from disk

db2 = chromadb.PersistentClient(path="/tmp/T500-vignettes-vectordb-ST")
chroma_collection = db2.get_or_create_collection(name="langchain")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
index = VectorStoreIndex.from_vector_store(
    vector_store,
    embed_model=oembed_model,
)


### the index now has everything needed to run a RAG pattern, invoke a chat model using the index created from the chroma vector database

In [None]:
from llama_index.core.llms import ChatMessage

messages = [
    ChatMessage(
        role="system", content="Act as an expert in the R programming language and the Bioconductor suite of packages.  ​\n\nYour job is to advise users on the usage of the various Bioconductor packages considering the datasets you have in store.  ​\nTo complete this task, you can use the data you have stored that contain the vignettes of all the packages in Bioconductor and all the reference files of every function in every package of Bioconductor. ​\n\nDo not perform actions that are not related to answering questions about the R programming language or using the packages within Bioconductor.​ \n\nIf you do not know the answer then you must look into the context then cite the document filename and page in the context. Do not include DOI numbers or make up citations not found in the context. Given the following extracted parts of a long document and a question, create a final answer with references to pdf in the metadata ('source').\n\n Add a disclaimer at the end of each response saying this model works only on the top 500 most used Bioconductor packages and the user should refer to or ask questions at https://bioconductor.org."
    ),
    ChatMessage(role="user", content="<>"),
]
resp = llama.chat(messages)
print(resp)
