In [1]:
# local imports
from catalogs import MODELS_CATALOG, EMBEDDING_MODELS_CATALOG
from settings import DATA_PATH, embeddings_model_name, BASE_VECTORSTORE_DIR, VECTORSTORE_PATH, prompt_template, method, model
from preprocess import load_sources, split_documents, load_vector_store
from llm_model_setup import create_llm, create_retrieval_chain

# Create a local instance of the LLM with the given settings
# We do this first to make sure that the model is usable
# before processing the data sources
llm = create_llm(method, model, MODELS_CATALOG, download_dir="models")

# Pre-process data sources and create a database for queries
documents = load_sources(DATA_PATH)
texts = split_documents(documents)
db = load_vector_store(EMBEDDING_MODELS_CATALOG[embeddings_model_name], VECTORSTORE_PATH, texts, verbosity=1)

# Create an instance of a question-answer retrieval chain using the database created and the LLM
qa_chain = create_retrieval_chain(db, prompt_template, llm)

  from .autonotebook import tqdm as notebook_tqdm


Loading model parameters using LlamaCpp...
Model gemma-2-9b-instruct found
Loading model using LlamaCpp...


llama_model_loader: loaded meta data with 26 key-value pairs and 464 tensors from models/gemma-2-9b-it.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = gemma2
llama_model_loader: - kv   1:                               general.name str              = models
llama_model_loader: - kv   2:                      gemma2.context_length u32              = 8192
llama_model_loader: - kv   3:                    gemma2.embedding_length u32              = 3584
llama_model_loader: - kv   4:                         gemma2.block_count u32              = 42
llama_model_loader: - kv   5:                 gemma2.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                gemma2.attention.head_count u32              = 16
llama_model_loader: - kv   7:             gemma2.attention.head_count_kv u32    

Loaded 88 pages from PDF files.
Split the documents into 518 chunks.


  embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)


Loading existing vector store from: vectorstore/minilm


In [None]:
# Now use the question-answer retrieval chain
# Ask a Question
# Define the topic for the question
topic = "data uncertainty handling methods"
# Use a basic question template inserting the topic
query = f"Based on the provided documents, write a short summary of the key findings on {topic}."
result = qa_chain({"query": query})

# Print the result
print("\n--- Generated Answer ---\n")
print(result['result'])

# Inspect the metadata of the source documents used for the answer to retrieve references
print("\n--- Source Documents Used ---\n")
for doc in result['source_documents']:
    print(f"- Page {doc.metadata.get('page', 'N/A')}: {doc.page_content[:200]}...")