### 1) Load indexs 

In [1]:
import pickle, json, os
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv

In [2]:
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

In [19]:
# Load FAISS
emb = GoogleGenerativeAIEmbeddings(
  model = "models/embedding-001", 
  google_api_key= GEMINI_API_KEY
)
faiss_store = FAISS.load_local("../artifacts/faiss_index", emb, allow_dangerous_deserialization= True)

In [20]:
# Load BM25 corpus
with open("../artifacts/bm25_corpus.pkl", "rb") as f:
  bm25_data = pickle.load(f)

from rank_bm25 import BM25Okapi
bm25 = BM25Okapi(bm25_data["corpus_tokens"])

In [21]:
# load chunks mapping
chunks_map = {}
with open("../artifacts/chunks.jsonl", "r", encoding="utf-8") as f:
  for line in f:
    chunk = json.loads(line)
    chunks_map[chunk["chunk_id"]] = chunk

### 2) Retrieval functions 

In [22]:
import numpy as np
from collections import defaultdict
import re

In [23]:
def tokenize(q):
  return re.findall(r"\b\w+\b", q.lower())

In [24]:
def retrieve_bm25(query, k = 10):
  scores = bm25.get_scores(tokenize(query))
  idx = np.argsort(scores)[::-1][:k]
  out = []
  for i in idx:
    out.append({
      "chunk_id": i, 
      "score": float(scores[i]),
      "source" : "bm25", 
      "text":chunks_map[i]["text"], 
      "doc_id": chunks_map[i]["doc_id"], 
      "page":chunks_map[i]["page"], 
      })
  return out

In [25]:
def retrieve_dense(query, k = 10):
  docs_scores = faiss_store.similarity_search_with_score(query, k = k)
  
  out = []
  for doc, dist in docs_scores:
    cid = doc.metadata["chunk_id"]
    out.append({
      "chunk_id": cid, 
      "score": float(-dist), 
      "source": "dense", 
      "text": doc.page_content, 
      "doc_id": chunks_map[cid]["doc_id"], 
      "page": doc.metadata.get("page", None)
    })
  return out

In [26]:
def rrf_fusion(candidates_lists, top_k = 8 , k_rff = 60):
  # candidates_lists: [list_from_bm25, list_from_dense]
  score_by_id = defaultdict(float)
  seen_rank = defaultdict(dict)  # source -> chunk_id -> rank
  
  for cand_list in candidates_lists:
    for rank, item in enumerate(cand_list):
      cid = item["chunk_id"]
      score_by_id[cid] += 1.0 / (k_rff + rank +1)
  
  # retrieve best top_k 
  ranked = sorted(score_by_id.items(), key = lambda x: x[1], reverse=True)[:top_k]
  
  merged = []
  pool = { (c["chunk_id"], c["source"]): c for lst in candidates_lists for c in lst }
  
  for cid, _ in ranked:
    # find any item which chunk_id = cid 
    item = None
    for lst in candidates_lists:
      for c in lst:
        if c["chunk_id"] == cid:
          item = c
          break
      if item:
        break
    merged.append(item)
  return merged

### 3) Generationg the response 

In [27]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema import HumanMessage, SystemMessage

In [28]:
llm = ChatGoogleGenerativeAI(
  api_key = GEMINI_API_KEY,
  model = "gemini-2.5-pro", 
  temperature=0.2,
  max_retries=2
)

In [29]:
SYSTEM = (
"You are an assistant answering automotive standards questions (ASPICE/AUTOSAR). "
"Answer ONLY from the provided context. If the answer is not in the context, say 'Insufficient context'. "
"After each factual sentence, add citations like [doc:{doc_id}, page:{page}, chunk:{chunk_id}]."
)

In [30]:
def build_context_snippets(evidences):
  ctx = []
  for e in evidences:
        ctx.append(
            f"[doc:{e['doc_id']}, page:{e['page']}, chunk:{e['chunk_id']}]\n{e['text']}\n"
        )
  return "\n---\n".join(ctx)

In [31]:
def answer_query_vanilla(query, k_each=10, top_ctx=8):
    bm25_hits = retrieve_bm25(query, k=k_each)
    dense_hits = retrieve_dense(query, k=k_each)
    fused = rrf_fusion([bm25_hits, dense_hits], top_k=top_ctx)

    context_block = build_context_snippets(fused)
    prompt = (
        f"Question: {query}\n\n"
        f"Context (use strictly):\n{context_block}\n\n"
        "Return a concise answer with citations as instructed."
    )
    msgs = [SystemMessage(content=SYSTEM), HumanMessage(content=prompt)]
    resp = llm.invoke(msgs)
    return {"answer": resp.content, "evidence": fused}

### 4) practice on 5 queris 

In [32]:
queries = [
    "What does ASPICE SYS.2 require?",
    "What are the work products expected in ASPICE SYS.1?",
    "In AUTOSAR ECU State Manager, what is the purpose of RUN state?",
    "How does AUTOSAR ensure a safe transition between ECU states?",
    "What is the difference between ASPICE SYS.2 and SWE.1 objectives?"
]

In [33]:
for q in queries:
    res = answer_query_vanilla(q)
    print("Q:", q)
    print(res["answer"])
    print("="*80)

Q: What does ASPICE SYS.2 require?
Based on the context, ASPICE SYS.2 is System Requirements Analysis [doc:Automotive_SPICE_PAM_31_EN, page:4, chunk:26]. The analysis involves system requirements which include the functions and capabilities of the system; business, organizational and user requirements; and safety, security, interface, operations, and maintenance requirements [doc:Automotive_SPICE_PAM_31_EN, page:115, chunk:659]. The requirements should identify the system overview and any interrelationship constraints between system elements [doc:Automotive_SPICE_PAM_31_EN, page:115, chunk:659]. It also identifies design considerations for each system element, such as memory, hardware interface, user interface, performance, and security requirements [doc:Automotive_SPICE_PAM_31_EN, page:115, chunk:660].


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 2
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 3
}
].
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing det

Q: What are the work products expected in ASPICE SYS.1?
Based on the provided context, the output work products for the process preceding SYS.2 (which is SYS.1) are:

*   08-19 Risk management plan [doc:Automotive_SPICE_PAM_31_EN, page:38, chunk:219]
*   08-20 Risk mitigation plan [doc:Automotive_SPICE_PAM_31_EN, page:38, chunk:219]
*   13-04 Communication record [doc:Automotive_SPICE_PAM_31_EN, page:38, chunk:219]
*   13-19 Review record [doc:Automotive_SPICE_PAM_31_EN, page:38, chunk:219]
*   13-21 Change control record [doc:Automotive_SPICE_PAM_31_EN, page:38, chunk:219]
*   15-01 Analysis report [doc:Automotive_SPICE_PAM_31_EN, page:38, chunk:219]
*   17-03 Stakeholder Requirements [doc:Automotive_SPICE_PAM_31_EN, page:38, chunk:219]


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 2
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
}
].
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For mor

Q: In AUTOSAR ECU State Manager, what is the purpose of RUN state?
Insufficient context.


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 2
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 48
}
].
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing de

Q: How does AUTOSAR ensure a safe transition between ECU states?
AUTOSAR ensures safe transitions between ECU states through the cooperation of the ECU State Manager (EcuM) and the BSW Mode Manager (BswM) [doc:AUTOSAR_SWS_ECUStateManager, page:92, chunk:1061]. While the EcuM arbitrates requests for state changes from Software Components (SW-Cs), it is the BswM that is responsible for monitoring the ECU and ultimately deciding when a transition to a different mode can be made [doc:AUTOSAR_SWS_ECUStateManager, page:92, chunk:1061], [doc:AUTOSAR_SWS_ECUStateManager, page:19, chunk:801].

In cases where an error makes it impossible to continue processing, the EcuM can invoke the `EcuM_ErrorHook` [doc:AUTOSAR_SWS_ECUStateManager, page:124, chunk:1184]. This allows an integrator to define how the ECU is stopped, with options including a reset, halt, or transitioning to a "safe state" [doc:AUTOSAR_SWS_ECUStateManager, page:124, chunk:1184].


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-pro"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 2
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 46
}
].
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing de

Q: What is the difference between ASPICE SYS.2 and SWE.1 objectives?
Based on the provided context, SYS.2 is System Requirements Analysis, while SWE.1 is Software Requirements Analysis [doc:Automotive_SPICE_PAM_31_EN, page:124, chunk:706]. The SWE.1 process uses the system requirements and the system architecture as inputs to specify the software requirements [doc:Automotive_SPICE_PAM_31_EN, page:46, chunk:265]. Additionally, consistency and bidirectional traceability are established between system requirements and software requirements [doc:Automotive_SPICE_PAM_31_EN, page:46, chunk:265].
