### 1) Load index

In [1]:
import pickle, json, os
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv

In [2]:
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

In [3]:
# Load FAISS
emb = GoogleGenerativeAIEmbeddings(
  model = "models/embedding-001", 
  google_api_key= GEMINI_API_KEY
)
faiss_store = FAISS.load_local("../artifacts/faiss_index", emb, allow_dangerous_deserialization= True)

In [4]:
# Load BM25 corpus
with open("../artifacts/bm25_corpus.pkl", "rb") as f:
  bm25_data = pickle.load(f)

from rank_bm25 import BM25Okapi
bm25 = BM25Okapi(bm25_data["corpus_tokens"])

In [5]:
# load chunks mapping
chunks_map = {}
with open("../artifacts/chunks.jsonl", "r", encoding="utf-8") as f:
  for line in f:
    chunk = json.loads(line)
    chunks_map[chunk["chunk_id"]] = chunk

### 2) Retrieve Functions

In [7]:
import numpy as np
from collections import defaultdict
import re

In [8]:
def tokenize(q):
  return re.findall(r"\b\w+\b", q.lower())

In [9]:
def retrieve_bm25(query, k = 10):
  scores = bm25.get_scores(tokenize(query))
  idx = np.argsort(scores)[::-1][:k]
  out = []
  for i in idx:
    out.append({
      "chunk_id": i, 
      "score": float(scores[i]),
      "source" : "bm25", 
      "text":chunks_map[i]["text"], 
      "doc_id": chunks_map[i]["doc_id"], 
      "page":chunks_map[i]["page"], 
      })
  return out

In [10]:
def retrieve_dense(query, k = 10):
  docs_scores = faiss_store.similarity_search_with_score(query, k = k)
  
  out = []
  for doc, dist in docs_scores:
    cid = doc.metadata["chunk_id"]
    out.append({
      "chunk_id": cid, 
      "score": float(-dist), 
      "source": "dense", 
      "text": doc.page_content, 
      "doc_id": chunks_map[cid]["doc_id"], 
      "page": doc.metadata.get("page", None)
    })
  return out

In [11]:
def rrf_fusion(candidates_lists, top_k = 8 , k_rff = 60):
  # candidates_lists: [list_from_bm25, list_from_dense]
  score_by_id = defaultdict(float)
  seen_rank = defaultdict(dict)  # source -> chunk_id -> rank
  
  for cand_list in candidates_lists:
    for rank, item in enumerate(cand_list):
      cid = item["chunk_id"]
      score_by_id[cid] += 1.0 / (k_rff + rank +1)
  
  # retrieve best top_k 
  ranked = sorted(score_by_id.items(), key = lambda x: x[1], reverse=True)[:top_k]
  
  merged = []
  pool = { (c["chunk_id"], c["source"]): c for lst in candidates_lists for c in lst }
  
  for cid, _ in ranked:
    # find any item which chunk_id = cid 
    item = None
    for lst in candidates_lists:
      for c in lst:
        if c["chunk_id"] == cid:
          item = c
          break
      if item:
        break
    merged.append(item)
  return merged

Reranker (Cross-Encoder)

In [None]:
from sentence_transformers import CrossEncoder
import numpy as np

reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

def rerank_with_crossencoder(query, candidates, top_k=6):
    pairs = [(query, c["text"]) for c in candidates]
    scores = reranker.predict(pairs)  # bigger = better
    order = np.argsort(scores)[::-1][:top_k]
    reranked = [candidates[i] for i in order]
    return reranked




config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

### 3) Generation the response 

In [12]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema import HumanMessage, SystemMessage

In [13]:
llm = ChatGoogleGenerativeAI(
  api_key = GEMINI_API_KEY,
  model = "gemini-2.5-pro", 
  temperature=0.2,
  max_retries=2
)

In [14]:
SYSTEM = (
"You are an assistant answering automotive standards questions (ASPICE/AUTOSAR). "
"Answer ONLY from the provided context. If the answer is not in the context, say 'Insufficient context'. "
"After each factual sentence, add citations like [doc:{doc_id}, page:{page}, chunk:{chunk_id}]."
)

In [15]:
def build_context_snippets(evidences):
  ctx = []
  for e in evidences:
        ctx.append(
            f"[doc:{e['doc_id']}, page:{e['page']}, chunk:{e['chunk_id']}]\n{e['text']}\n"
        )
  return "\n---\n".join(ctx)

In [16]:
def answer_query_hybrid(query, k_each=12, fuse_top=12, rerank_top=6):
    bm25_hits = retrieve_bm25(query, k=k_each)
    dense_hits = retrieve_dense(query, k=k_each)
    fused = rrf_fusion([bm25_hits, dense_hits], top_k=fuse_top)
    reranked = rerank_with_crossencoder(query, fused, top_k=rerank_top)

    context_block = build_context_snippets(reranked)
    prompt = (
        f"Question: {query}\n\n"
        f"Context (use strictly):\n{context_block}\n\n"
        "Return a concise answer with citations as instructed."
    )
    msgs = [SystemMessage(content=SYSTEM), HumanMessage(content=prompt)]
    resp = llm.invoke(msgs)
    return {"answer": resp.content, "evidence": reranked}


### 4) practice on 5 queries 

In [17]:
queries = [
    "What does ASPICE SYS.2 require?",
    "What are the work products expected in ASPICE SYS.1?",
    "In AUTOSAR ECU State Manager, what is the purpose of RUN state?",
    "How does AUTOSAR ensure a safe transition between ECU states?",
    "What is the difference between ASPICE SYS.2 and SWE.1 objectives?"
]

In [18]:
for q in queries:
    res = answer_query_hybrid(q)
    print("Q:", q)
    print(res["answer"])
    print("="*80)

Q: What does ASPICE SYS.2 require?
Based on the context provided, ASPICE SYS.2 is the System Requirements Analysis process [doc:Automotive_SPICE_PAM_31_EN, page:12, chunk:72][doc:Automotive_SPICE_PAM_31_EN, page:4, chunk:26][doc:Automotive_SPICE_PAM_31_EN, page:11, chunk:67]. It is part of the System Engineering process group (SYS) [doc:Automotive_SPICE_PAM_31_EN, page:12, chunk:72]. The process includes base practices related to bidirectional traceability (SYS.2 BP6) and consistency (SYS.2 BP7) [doc:Automotive_SPICE_PAM_31_EN, page:123, chunk:701].

The provided context does not contain further details on the specific requirements, purpose, or outcomes of the SYS.2 process.


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 2
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 47
}
].
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing de

Q: What are the work products expected in ASPICE SYS.1?
Insufficient context.


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 2
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 12
}
].
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing de

Q: In AUTOSAR ECU State Manager, what is the purpose of RUN state?
The purpose of the RUN state is to have the ECU State Manager implement all its activities while the OS is up and running [doc:AUTOSAR_SWS_ECUStateManager, page:138, chunk:1234]. RUN is a standard state when ECU Mode Handling is used [doc:AUTOSAR_SWS_ECUStateManager, page:27, chunk:835]. The ECU Manager module arbitrates RUN and POST_RUN requests from software components [doc:AUTOSAR_SWS_ECUStateManager, page:13, chunk:780]. The mode port of the ECU State Manager module declares RUN as one of its modes, along with STARTUP, POST_RUN, SLEEP, and SHUTDOWN [doc:AUTOSAR_SWS_ECUStateManager, page:149, chunk:1270].
Q: How does AUTOSAR ensure a safe transition between ECU states?
Based on the provided context, AUTOSAR ensures safe transitions between ECU states through a cooperative mechanism between the ECU State Manager (EcuM) and the BSW Mode Manager (BswM) [doc:AUTOSAR_SWS_ECUStateManager, page:92, chunk:1061].

ECU states 