In [None]:
from src.rag.components.retriever import HybridRetriever

In [None]:
embedding_model_id = "dunzhang/stella_en_400M_v5"

In [None]:
from pathlib import Path

In [None]:
model_path = Path.cwd().joinpath("models")

In [None]:
embedding_model_path = model_path.joinpath(embedding_model_id ).__str__()

In [None]:
cross_encoder_kwargs = {
    "model_name": embedding_model_path,
    "trust_remote_code": True,
    "local_files_only": True,
    "config_kwargs": {"use_memory_efficient_attention": False, "unpad_inputs": False},
    "device": "cpu"
}

In [None]:

transformer_kwargs  = { "model_name_or_path": embedding_model_path,
    "trust_remote_code": True,
    "device": "cpu",
    "config_kwargs": {"use_memory_efficient_attention": False,
                   "unpad_inputs": False},
    "cache_folder": model_path}

In [None]:
spacy_model_id = "en_core_web_sm"

In [None]:
retriever = HybridRetriever(
    cross_encoder_kwargs=cross_encoder_kwargs,
    spacy_model=spacy_model_id,
    sentence_transformer_kwargs=transformer_kwargs,
    language="english"
)

In [None]:
query = "How does an interest only mortgage work?"


## "How can I speed up the mortgage completion process?"


#"How does an interest only mortgage work?"

# "How do you work out your LTV ratio?"

In [None]:
semantic_search_results = retriever.semantic_search(query=query, limit=10)

In [None]:
for item in semantic_search_results:
    print(item.content)
    print(10 * "-")

In [None]:
keywords = retriever.perform_keyword_extraction(query)

In [None]:
keywords

In [None]:
keywords_search_results = retriever.keyword_search(keywords=keywords, limit=10)

We can add a reranker but we decided to keep it simple for now.

For some reason the encoder is taking long to return.

In [None]:
for item in keywords_search_results:
    print(item.content)
    print(10 * "-")

In [None]:
# scores = retriever.cross_encoder.predict(
#    [(query, item.content) for item in keywords_search_results + semantic_search_results])

### Generator

In [None]:
from src.rag.components.generator import LLamaCppGeneratorComponent

In [None]:
prompt = "you are a helpful mortgage advisor"

In [None]:
model_name = "Qwen/Qwen2.5-1.5B-Instruct"

In [None]:
llama_cpp_generator = LLamaCppGeneratorComponent(
    api_url="http://127.0.0.1:8080",
    model_name=model_name,
    prompt=prompt
)

In [None]:
llama_cpp_generator._ping_api()

In [None]:
documents = [doc.content for doc in semantic_search_results +
             keywords_search_results]

In [None]:
template_input = {
    "documents": documents,
    "question": query
}

In [None]:
llama_cpp_generator.generate_chat_input(template_values=template_input)

In [None]:
response = llama_cpp_generator.run(template_values=template_input)

In [None]:
from pprint import pprint

In [None]:
pprint(response)