# Storage generation

Through this notebook, the vector store for each model will be generated and stored in the `storage` folder under the name of the model. 
Please note that this was developed thinking of only 2 models (Llama 2 and Mistral). This means that the `messages_to_prompt` function might need to be changed if you want to use it with other models.


In [None]:
import os
import warnings
import pickle
from llama_index import (
    ServiceContext,
    SimpleDirectoryReader, 
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)

In [None]:
warnings.filterwarnings("ignore")
# Get the path to the parent directory
parent_dir = os.path.dirname(os.getcwd())

## Loading Documents

In [None]:
# data_path = os.path.join(parent_dir, 'data')

# # Data ingestion
# documents = SimpleDirectoryReader(data_path).load_data()

In [None]:
# # Storing documents as a list to avoid loading them again
# with open('../storage/documents/documents.pickle', 'wb') as f:
#     pickle.dump(documents, f)

In [None]:
# Opening the stored documents
with open('../storage/documents/documents.pickle', 'rb') as f:
    documents = pickle.load(f)

## Selecting a model to generate storage
This could be combined with a for loop, but to avoid memory issues, we run it separately for now.

In [None]:
# Construct the path to the models directory
models_path = os.path.join(parent_dir, 'models')
models = [f for f in os.listdir(models_path) if os.path.isfile(os.path.join(models_path, f))]
try:
    # remove .gitignore by specifying the name
    models.remove(".gitignore")
except:
    pass

try:
    # remove anything ending with Zone.Identifier
    models = [m for m in models if not m.endswith("Zone.Identifier")]
except:
    pass
# From every entry, remove everything after the first dot
print("Available models:")
for i, m in enumerate(models):
    print(f"{i}: {m.split('.')[0]}")

In [None]:
# Select a model. The user can only input a number between 0 and len(models)-1, if he inputs something else, the program will ask again
while True:
    try:
        model_index = int(input("Select a model: "))
        if model_index >= 0 and model_index < len(models):
            break
        else:
            print("Invalid input. Please enter a number between 0 and " + str(len(models)-1) + " according to the selection shown above.")
    except ValueError:
        print("Invalid input. Please enter a number between 0 and " + str(len(models)-1) + " according to the selection shown above.")

# Get path to the selected model
model_path = os.path.join(models_path, models[model_index])
model_tag = models[model_index].split('-')[0]

In [None]:
if not models[model_index].startswith("llama"):
        # The following prompt works well with Mistral
        def messages_to_prompt(messages):
                prompt = ""
                for message in messages:
                        if message.role == 'system':
                                prompt += f"<|system|>\n{message.content}</s>\n"
                        elif message.role == 'user':
                                prompt += f"<|user|>\n{message.content}</s>\n"
                        elif message.role == 'assistant':
                                prompt += f"<|assistant|>\n{message.content}</s>\n"

                        # ensure we start with a system prompt, insert blank if needed
                        if not prompt.startswith("<|system|>\n"):
                                prompt = "<|system|>\n</s>\n" + prompt

                        # add final assistant prompt
                        prompt = prompt + "<|assistant|>\n"

                return prompt

llm = LlamaCPP(
        # You can pass in the URL to a GGML model to download it automatically
        # model_url=model_url,
        # optionally, you can set the path to a pre-downloaded model instead of model_url
        model_path=model_path,
        temperature=0.2,
        max_new_tokens=1000,
        # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
        context_window=3900,
        # kwargs to pass to __call__()
        generate_kwargs={},
        # kwargs to pass to __init__()
        # set to at least 1 to use GPU
        model_kwargs={"n_gpu_layers": -1},
        # transform inputs into Llama2 format
        messages_to_prompt=messages_to_prompt,
        completion_to_prompt=completion_to_prompt,
        verbose=True,
)

## Selecting Embeddings model
Currently model name needs to be changed manually.

In [None]:
embedding = "EuropeanParliament/eubert_embedding_v1"   # BAAI/bge-base-en-v1.5   BAAI/bge-large-en-v1.5
embedding_tag = embedding.split('/')[1]
embed_model = HuggingFaceEmbedding(embedding, max_length=512)

In [None]:
service_context = ServiceContext.from_defaults(
    llm=llm, 
    embed_model= embed_model,
    # "local:EuropeanParliament/eubert_embedding_v1",    
    chunk_size=512,
    chunk_overlap=125,
)

In [None]:
vector_index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)

In [None]:
vector_index.storage_context.persist(persist_dir=f"../storage/{embedding_tag}")

## Loading index
Uncomment the following cell if you want to load an index from a previous run and test the storage loading.

In [None]:
# # rebuild storage context
# storage_context = StorageContext.from_defaults(persist_dir=f"storage/{model_tag}/vector_storage")

# # load index
# vector_index = load_index_from_storage(storage_context, service_context= service_context)