In [1]:
from typing import List, Dict
import textwrap


In [2]:
SYSTEM_PROMPT = """
You are VoltAI, a domain-specific assistant for Electric Vehicle market trends, charging infrastructure, battery ecosystem, and EV policy.
You must answer ONLY using the provided EVIDENCE from the IEA Global EV Outlook knowledge base.

Rules:
1) Use only information present in EVIDENCE.
2) If the answer is not found in EVIDENCE, respond exactly with: "Insufficient data in knowledge base."
3) Be factual, concise, and avoid assumptions.
4) When relevant, include numbers and year references.
5) Always provide sources in the format: [SOURCE: <source>, YEAR: <year>].
6) If the user asks follow-up questions, use CHAT HISTORY to understand what "it/that/they" refer to.
"""


In [3]:
def format_evidence(chunks: List[Dict], max_chars_per_chunk: int = 900) -> str:
    """
    Convert retrieved chunks into a readable EVIDENCE section for the model.
    """
    formatted = []
    for i, ch in enumerate(chunks, 1):
        txt = ch["text"].replace("\n", " ").strip()
        txt = txt[:max_chars_per_chunk]
        formatted.append(
            f"[EVIDENCE {i}] (SOURCE={ch['source']}, YEAR={ch['year']}, CHUNK_ID={ch['chunk_id']}, SCORE={ch.get('score',0):.4f})\n"
            f"{txt}"
        )
    return "\n\n".join(formatted)


In [4]:
def format_chat_history(chat_history: List[Dict], max_turns: int = 6) -> str:
    """
    chat_history format:
    [
      {"role":"user","content":"..."},
      {"role":"assistant","content":"..."},
      ...
    ]
    """
    if not chat_history:
        return "None"

    recent = chat_history[-max_turns:]
    formatted = []
    for msg in recent:
        role = msg["role"].upper()
        content = msg["content"].strip()
        formatted.append(f"{role}: {content}")
    return "\n".join(formatted)


In [5]:
def build_prompt(user_query: str, retrieved_chunks: List[Dict], chat_history: List[Dict] = None) -> str:
    evidence_block = format_evidence(retrieved_chunks)
    history_block = format_chat_history(chat_history or [])

    prompt = f"""
{SYSTEM_PROMPT}

CHAT HISTORY:
{history_block}

EVIDENCE:
{evidence_block}

USER QUESTION:
{user_query}

INSTRUCTIONS:
- Answer clearly in bullet points where possible.
- If asked for trends, mention year and magnitude if present.
- End with a "Sources" section listing all evidence sources used.
"""
    return textwrap.dedent(prompt).strip()


In [6]:
fake_chunks = [
    {
        "chunk_id": "IEA_2023_000001",
        "source": "GlobalEVOutlook2023_clean",
        "year": 2023,
        "score": 0.88,
        "text": "Electric car sales exceeded X million in 2023, representing a Y% increase year-on-year..."
    }
]

fake_history = [
    {"role":"user","content":"Tell me about EV adoption."},
    {"role":"assistant","content":"EV adoption has risen quickly in recent years, especially in China and Europe."}
]

test_prompt = build_prompt(
    user_query="What was the EV sales trend in 2023?",
    retrieved_chunks=fake_chunks,
    chat_history=fake_history
)

print(test_prompt[:2000])


You are VoltAI, a domain-specific assistant for Electric Vehicle market trends, charging infrastructure, battery ecosystem, and EV policy.
You must answer ONLY using the provided EVIDENCE from the IEA Global EV Outlook knowledge base.

Rules:
1) Use only information present in EVIDENCE.
2) If the answer is not found in EVIDENCE, respond exactly with: "Insufficient data in knowledge base."
3) Be factual, concise, and avoid assumptions.
4) When relevant, include numbers and year references.
5) Always provide sources in the format: [SOURCE: <source>, YEAR: <year>].
6) If the user asks follow-up questions, use CHAT HISTORY to understand what "it/that/they" refer to.


CHAT HISTORY:
USER: Tell me about EV adoption.
ASSISTANT: EV adoption has risen quickly in recent years, especially in China and Europe.

EVIDENCE:
[EVIDENCE 1] (SOURCE=GlobalEVOutlook2023_clean, YEAR=2023, CHUNK_ID=IEA_2023_000001, SCORE=0.8800)
Electric car sales exceeded X million in 2023, representing a Y% increase year-o