In [None]:
# local imports
from catalogs import MODELS_CATALOG, EMBEDDING_MODELS_CATALOG
from settings import DATA_PATH, embeddings_model_name, BASE_VECTORSTORE_DIR, VECTORSTORE_PATH, prompt_template, method, model
from preprocess import load_sources, split_documents, load_vector_store
from llm_model_setup import create_llm, create_retrieval_chain
from langchain.llms import LlamaCpp, CTransformers


documents = load_sources(DATA_PATH)
texts = split_documents(documents)
db = load_vector_store(EMBEDDING_MODELS_CATALOG[embeddings_model_name], VECTORSTORE_PATH, texts, verbosity=1)
llm = create_llm(method, model, MODELS_CATALOG, download_dir="models")
qa_chain = create_retrieval_chain(db, prompt_template, llm)

# Ask a Question
query = "Based on the provided documents, write a short summary of the key findings on data uncertainty handling methods."
result = qa_chain({"query": query})

# Print the result
print("\n--- Generated Whitepaper/Answer ---\n")
print(result['result'])

# You can also inspect the source documents used for the answer
print("\n--- Source Documents Used ---\n")
for doc in result['source_documents']:
    print(f"- Page {doc.metadata.get('page', 'N/A')}: {doc.page_content[:200]}...")

In [3]:
# Ask a Question
query = "Based on the provided documents, write 1000 words summary of the key findings on data uncertainty for LCA handling methods."
result = qa_chain({"query": query})

print(result.keys())

# Print the result
print("\n--- Generated Whitepaper/Answer ---\n")
print(result['result'])

# You can also inspect the source documents used for the answer
print("\n--- Source Documents Used ---\n")
for doc in result['source_documents']:
    print(f"- Page {doc.metadata.get('page', 'N/A')}: {doc.page_content[:200]}...")

Llama.generate: 50 prefix-match hit, remaining 569 prompt tokens to eval
llama_perf_context_print:        load time =   33291.14 ms
llama_perf_context_print: prompt eval time =   34825.35 ms /   569 tokens (   61.20 ms per token,    16.34 tokens per second)
llama_perf_context_print:        eval time =   80757.72 ms /   255 runs   (  316.70 ms per token,     3.16 tokens per second)
llama_perf_context_print:       total time =  115779.08 ms /   824 tokens


dict_keys(['query', 'result', 'source_documents'])

--- Generated Whitepaper/Answer ---


    The document sheds light on the significance of data uncertainty in Life Cycle Assessment (LCA). LCA is a comprehensive approach to assessing the environmental impact of a product or process throughout its entire life cycle.
    
    Data uncertainty arises from various sources, including measurement errors, data collection errors, modeling errors, and natural variability. These sources of data uncertainty can significantly affect the results and conclusions of an LCA study.
    
    To address data uncertainty in LCA studies, various handling methods have been proposed. These methods include sensitivity analysis, uncertainty propagation, Monte Carlo simulation, Bayesian analysis, fuzzy logic, and others.
    
    Each of these handling methods has its own strengths and weaknesses, advantages and disadvantages, limitations and applicability, and other factors that need to be considered when se