In [1]:
import os
from dotenv import load_dotenv
_ = load_dotenv()

#### build index

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
text_spliter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=100
    )


In [None]:
import os
import glob
from langchain_text_splitters import MarkdownHeaderTextSplitter

headers_to_split_on = [
    ("#", "Header 1"),
    ("##", "Header 2"),
    ("###", "Header 3"),
]

markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

docs_dir = 'docs'
doc_splits = []

for path in glob.glob(os.path.join(docs_dir, '*')):
    if path.lower().endswith(('.md', '.txt')):
        with open(path, 'r', encoding='utf-8') as f:
            file_content = f.read()
            
            # Use split_text, not split_documents
            header_splits = markdown_splitter.split_text(file_content)
            
            # Optional: Add the filename to metadata for each split
            for chunk in header_splits:
                chunk.metadata["source"] = os.path.basename(path)
            
            doc_splits.extend(header_splits)

# # Show the result
# for chunk in doc_splits[:3]: # Look at first 3 chunks
#     print(f"Metadata: {chunk.metadata}")
#     print(f"Content: {chunk.page_content[:100]}...")
#     print("-" * 20)

Metadata: {'Header 1': 'ONE ZERO Bank Guide on Card Usage and Services', 'Header 2': 'Using the mobile application', 'Header 3': 'Cards via the mobile application - issue a new card, card-related actions, PIN code, join digital wallet', 'source': 'cards.md'}
Content: You can order a new card, cancel a card, replace a card, view your card transactions, view or change...
--------------------
Metadata: {'Header 1': 'ONE ZERO Bank Guide on Card Usage and Services', 'Header 2': 'Using the mobile application', 'Header 3': 'Viewing card transactions and card fees', 'source': 'cards.md'}
Content: You can view the transactions and fees on your card in the credit card company - [Isracard](https://...
--------------------
Metadata: {'Header 1': 'ONE ZERO Bank Guide on Card Usage and Services', 'Header 2': 'Traveling Abroad', 'Header 3': 'What do you need to know before traveling abroad?', 'source': 'cards.md'}
Content: A few important things you should know before traveling:  
* You should update

In [65]:
vector_store = InMemoryVectorStore.from_documents(
        documents=doc_splits, embedding=embeddings)


In [64]:
len(doc_splits)

226

In [67]:
def create_vector_store_md_spliter(docs_dir='docs',save_path ="vector_store_data_md_spliter.json",text_splitter=text_spliter, embeddings=embeddings):
    headers_to_split_on = [
        ("#", "Header 1"),
        ("##", "Header 2"),
        ("###", "Header 3"),
    ]

    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

    docs_dir = 'docs'
    doc_splits = []

    for path in glob.glob(os.path.join(docs_dir, '*')):
        if path.lower().endswith(('.md', '.txt')):
            with open(path, 'r', encoding='utf-8') as f:
                file_content = f.read()
                
                # Use split_text, not split_documents
                header_splits = markdown_splitter.split_text(file_content)
                
                # Optional: Add the filename to metadata for each split
                for chunk in header_splits:
                    chunk.metadata["source"] = os.path.basename(path)
                
                doc_splits.extend(header_splits)
    

    vector_store = InMemoryVectorStore.from_documents(
        documents=doc_splits, embedding=embeddings)

    # vector_store.save(save_path)
    vector_store.dump(save_path)
    return vector_store

def create_vector_store(docs_dir='docs',save_path ="vector_store_data.json",text_splitter=text_spliter, embeddings=embeddings):
    import glob
    from langchain.schema import Document
    docs = []
    docs_dir = 'docs'
    for path in glob.glob(os.path.join(docs_dir, '*')):
        # only load markdown and text files (adjust as needed)
        if path.lower().endswith(('.md', '.txt')):
            with open(path, 'r', encoding='utf-8') as f:
                text = f.read()
            docs.append([Document(page_content=text, metadata={'source': os.path.basename(path)})])
    

    docs_list = [item for sublist in docs for item in sublist]

    doc_splits = text_splitter.split_documents(docs_list)

    

    vector_store = InMemoryVectorStore.from_documents(
        documents=doc_splits, embedding=embeddings)

    # vector_store.save(save_path)
    vector_store.dump(save_path)
    return vector_store

def retrieve_context(query: str,vector_store,k=5):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=k)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized

In [73]:
vector_store = create_vector_store_md_spliter()

In [76]:
retrived = retrieved_docs = vector_store.similarity_search("What are the fees for using my card abroad if I am on the ZERO subscription plan?")


In [79]:
retrived[0].id

'8484ac25-4f0a-4eff-8e98-51911f216333'

In [None]:
loaded_vector_store = InMemoryVectorStore.load(
     "vector_store_data_md_spliter.json", 
     embedding=embeddings # You must pass the same embedding object used to create it
 )

### create the QNA:

In [71]:
# from urllib import response
from langchain.chat_models import init_chat_model
model = init_chat_model("gpt-4.1")

def answer_query(query: str, vector_store, model):
    context = retrieve_context(query, vector_store)
    results = model.invoke([
    {"role": "system", "content": "Use the following context to answer the question:{query}. If the context does not contain the answer, say you don't know. if you use spesific information from the context, tell the user the source headers."},
    {"role": "user", "content": f"Context:\n{context}\n\n Question: {query}? /n/n your answer: /n/n"}
    ]).content
    return results


In [72]:
answer_query(query="How can I add my credit card to my phone's digital wallet?", vector_store=vector_store,model=model)

'To add your credit card to your phone\'s digital wallet, follow these steps based on your device type:\n\nFor Apple Devices (iPhone, iPad):\n(Source: Setting Up Apple Pay on Apple Devices, cards.md)\n\n- Open your device settings.\n- Tap on your name, then go to "Payment and Shipping."\n- Select "Add Payment Method" and enter your credit card details.\n- Alternatively, you can add your card directly through the Cards page in your ONE ZERO Bank app: Go to the Cards page → Add to Wallet → select your device.\n- You can also follow this [step-by-step Apple Pay video](https://www.youtube.com/watch?v=nrBfsXHZO8s).\n- If you want to change the default card: Go to iPhone settings → Wallet & Apple Pay → select the desired card under "Transaction Defaults."\n\nApple allows up to 8 charge cards to be linked, and all must belong to the same owner.\n\nFor Android Devices (Google Pay):\n(Source: Setting Up Google Pay on Android Devices, cards.md)\n\n- Download the Google Pay app from the Play Stor

In [66]:
answer_query("What is the purpose of this project?", vector_store, model)

'The purpose of this project is to provide ONE ZERO Bank clients with comprehensive guidance, tools, and informational resources related to securities trading and portfolio management. The project aims to streamline the investment process by offering:\n\n- Educational materials about various investment instruments (such as tracking funds, ETFs, mutual funds, stocks, and bonds).\n- AI-generated reviews and updates on clients’ stock portfolios.\n- Objective news feeds on stocks, gathered via third-party AI-driven methods.\n- Insights into market mechanics, regulatory considerations, risk management, and trading instructions.\n- Access to personalized banking services, budgeting advice, and long-term financial planning.\n\nThe overarching goal is to empower investors with knowledge and digital tools to make informed decisions, manage risks, and efficiently achieve their financial and investment objectives—while clarifying that all information provided is for informational purposes only an

#### todo - rerank

In [None]:
from langchain_community.document_compressors.flashrank_rerank import FlashrankRerank

def retrieve_context_with_rerank(query: str, vector_store, k=5, fetch_k=20):
    """
    Retrieves documents and then reranks them using a local Cross-Encoder.
    
    Args:
        query: The user query.
        vector_store: The vector store instance.
        k: The final number of documents to return to the LLM.
        fetch_k: The initial number of documents to fetch from the vector store (usually 3x-4x of k).
    """
    # 1. Initial Retrieval: Get a larger candidate set (fetch_k)
    # We fetch more docs because vector search often misses the subtle semantic nuance.
    retrieved_docs = vector_store.similarity_search(query, k=fetch_k)
    
    # 2. Reranking: Use FlashRank (runs locally, no API key required)
    # 'ms-marco-MiniLM-L-12-v2' is a small, fast model (~40MB download once).
    reranker = FlashrankRerank(model="ms-marco-MiniLM-L-12-v2")
    
    # This compresses the list: it ranks them and selects the top ones
    compressed_docs = reranker.compress_documents(documents=retrieved_docs, query=query)
    
    # 3. Slice to keep only the absolute best 'k'
    final_docs = compressed_docs[:k]

    # 4. Serialize for the LLM
    serialized = "\n\n".join(
        (f"Source: {doc.metadata.get('source', 'Unknown')}\nContent: {doc.page_content}")
        for doc in final_docs
    )
    return serialized

ModuleNotFoundError: No module named 'flashrank'

In [54]:
from flashrank import Ranker, RerankRequest

def retrieve_context_with_rerank(query: str, vector_store, k=5, fetch_k=20):
    """
    Retrieves documents and reranks them using FlashRank directly (bypassing LangChain wrapper).
    """
    # 1. Initial Retrieval: Get a larger candidate set
    retrieved_docs = vector_store.similarity_search(query, k=fetch_k)
    
    # 2. Prepare data for FlashRank
    # FlashRank expects a list of dictionaries with specific keys (like "id" and "text")
    passages = []
    for idx, doc in enumerate(retrieved_docs):
        passages.append({
            "id": idx,
            "text": doc.page_content,
            "meta": doc.metadata
        })
    
    # 3. Reranking
    # We initialize the Ranker locally. 'ms-marco-MiniLM-L-12-v2' is efficient and standard.
    ranker = Ranker(model_name="ms-marco-MiniLM-L-12-v2", cache_dir="./opt")
    
    # rank() returns the list sorted by relevance score
    reranked_results = ranker.rank(query=query, docs=passages)
    
    # 4. Slice to keep only the top 'k'
    top_results = reranked_results[:k]
    
    # 5. Serialize for the LLM
    # We reconstruct the string from the reranked dictionary results
    serialized = "\n\n".join(
        (f"Source: {res['meta'].get('source', 'Unknown')}\nContent: {res['text']}")
        for res in top_results
    )
    
    return serialized

ModuleNotFoundError: No module named 'flashrank'

In [70]:
from sentence_transformers import CrossEncoder

# Load the model once (it will download automatically)
# 'ms-marco-MiniLM-L-6-v2' is a fast, high-quality reranker
reranker_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

def retrieve_context_with_rerank(query: str, vector_store, k=5, fetch_k=20):
    """
    Retrieves documents and reranks them using Sentence-Transformers (CrossEncoder).
    """
    # 1. Initial Retrieval: Get a larger candidate set
    retrieved_docs = vector_store.similarity_search(query, k=fetch_k)
    
    # 2. Prepare pairs for the CrossEncoder
    # The model expects a list of [query, document_text] pairs
    doc_texts = [doc.page_content for doc in retrieved_docs]
    pairs = [[query, doc_text] for doc_text in doc_texts]
    
    # 3. Score the pairs
    # This returns a list of float scores
    scores = reranker_model.predict(pairs)
    
    # 4. Attach scores to docs and sort
    # We zip them together to keep track of which score belongs to which doc
    scored_docs = sorted(
        zip(retrieved_docs, scores), 
        key=lambda x: x[1], 
        reverse=True
    )
    
    # 5. Slice to keep only the top 'k'
    top_docs = [doc for doc, score in scored_docs[:k]]
    
    # 6. Serialize for the LLM
    serialized = "\n\n".join(
        (f"Source: {doc.metadata.get('source', 'Unknown')}\nContent: {doc.page_content}")
        for doc in top_docs
    )
    
    return serialized

ModuleNotFoundError: No module named 'sentence_transformers'

In [51]:
retrieve_context_with_rerank("What is the purpose of this project?", loaded_vector_store)

PydanticUserError: `FlashrankRerank` is not fully defined; you should define `Ranker`, then call `FlashrankRerank.model_rebuild()`.

For further information visit https://errors.pydantic.dev/2.9/u/class-not-fully-defined

### evaluate

In [None]:
metrix = ['groundness', 'relevance', 'completeness', 'helpfulness']

In [None]:
prompt_answer_groundness =

things to add:
- bm25
- reranker

desitions to make:
- a chanking strategy - done (for now)
- emmbeding model 
- k
- prompt
- tagging? retrive by context

evaluation 
- chank rellevency
- recall (the rellevent chank returned?)
- answer groundness
- answer correctnes
- answer relleventness

In [None]:
embbeding_models_list = ['text-embedding-3-small','text-embedding-3-large','text-embedding-ada-002']
chanking_strategy_list = ['recursive', 'markdown','hierarchical', 'simple_splitter']