In [None]:
#Install llama-index  (remove the "!" if you are not on colab)
#!pip install llama-index
!pip install llama-index ipywidgets

In [None]:
#Replicate account connection
#import os
#os.environ["REPLICATE_API_TOKEN"] = "REPLICATE_API_TOKEN"          #Add here your replicate token from HuggingFace

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


from IPython.display import Markdown, display

In [None]:
#It is a miracle locally you will be able to load a 7B parameters model, but if you want you can put something bigger

import torch
from llama_index.llms import HuggingFaceLLM
from llama_index.prompts import PromptTemplate

LLAMA2_7B = "meta-llama/Llama-2-7b-hf"
LLAMA2_7B_CHAT = "meta-llama/Llama-2-7b-chat-hf"
LLAMA2_13B = "meta-llama/Llama-2-13b-hf"
LLAMA2_13B_CHAT = "meta-llama/Llama-2-13b-chat-hf"
LLAMA2_70B = "meta-llama/Llama-2-70b-hf"
LLAMA2_70B_CHAT = "meta-llama/Llama-2-70b-chat-hf"

selected_model = LLAMA2_7B

SYSTEM_PROMPT = """You are a chatbot, let me add more knowledge to you"""

query_wrapper_prompt = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=2048,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=selected_model,
    model_name=selected_model,
    device_map="auto",
    # change these settings below depending on your GPU
    model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
)

In [None]:
#Put the papers with geometric information about brain lesions and clinical knowledge in a folde

from llama_index import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader("./data/").load_data()
from llama_index import (
    VectorStoreIndex,
    ServiceContext,
    set_global_service_context,
)

service_context = ServiceContext.from_defaults(
    llm=llm, embed_model="local:BAAI/bge-small-en"
)
set_global_service_context(service_context)

index = VectorStoreIndex.from_documents(documents)


#By default, data is stored in-memory. To persist to disk (under ./storage):

index.storage_context.persist()

#To reload from disk in the future:
'''
from llama_index import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir='./storage')
# load index
index = load_index_from_storage(storage_context)
'''