In [3]:
!pip install sentence-transformers faiss-cpu openai tiktoken pandas langchain
# if on GPU environment, install faiss-gpu instead of faiss-cpu

StatementMeta(, 51092528-9a67-4cd9-8ddb-e4d226a73748, 5, Finished, Available, Finished)

Collecting sentence-transformers
  Using cached sentence_transformers-5.1.1-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Using cached faiss_cpu-1.12.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting openai
  Using cached openai-2.3.0-py3-none-any.whl.metadata (29 kB)
Collecting tiktoken
  Using cached tiktoken-0.12.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.7 kB)
Collecting langchain
  Downloading langchain-0.3.27-py3-none-any.whl.metadata (7.8 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Downloading transformers-4.57.0-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.4/41.4 kB[0m [31m221.7 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.10.0 (from openai)
  Downloading jiter-0.11.0-cp311-cp311-manylinux_2_17_x86_64.man

In [5]:
# Upgrade typing_extensions to a compatible version
%pip install --upgrade typing_extensions pydantic openai --quiet

StatementMeta(, 51092528-9a67-4cd9-8ddb-e4d226a73748, 12, Finished, Available, Finished)


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.



In [6]:
import openai
print("✅ OpenAI library imported successfully.")


StatementMeta(, 51092528-9a67-4cd9-8ddb-e4d226a73748, 14, Finished, Available, Finished)

✅ OpenAI library imported successfully.


In [8]:
"""
RAG Assistant for Reimage-AI Smart Parking
- Builds embeddings from HistoricalTraffic (and optionally other) Fabric tables
- Stores vector index using FAISS
- Answers queries by retrieving top-k contexts and (optionally) calling OpenAI for generation
- Logs queries/answers to MCP (ModelContextProtocol) and Hedera simulation via mcp.log_inference()
"""

import os
import json
import faiss
import numpy as np
import pandas as pd
from datetime import datetime
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Optional

# Optional OpenAI usage for generation; if not set, assistant falls back to extractive answers
import openai

# ---- Config ----
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"   # small & fast embedding model from sentence-transformers
FAISS_INDEX_PATH = "data/faiss_index.index"
DOCS_META_PATH = "data/faiss_docs_meta.json"
TOP_K = 5
USE_OPENAI = bool(os.getenv("OPENAI_API_KEY", ""))

if USE_OPENAI:
    openai.api_key = os.getenv("OPENAI_API_KEY")

# ---- Helper classes ----
class RAGAssistant:
    def __init__(self,
                 spark,
                 embedding_model_name: str = EMBEDDING_MODEL_NAME,
                 index_path: str = FAISS_INDEX_PATH,
                 docs_meta_path: str = DOCS_META_PATH):
        """
        spark: active SparkSession (so we can read tables from Fabric Lakehouse)
        """
        self.spark = spark
        self.embedding_model_name = embedding_model_name
        self.index_path = index_path
        self.docs_meta_path = docs_meta_path

        # Load embedding model
        print(f"[RAG] Loading embedding model: {self.embedding_model_name}")
        self.embedder = SentenceTransformer(self.embedding_model_name)

        # placeholders
        self.index = None
        self.doc_metas = []   # list of dicts {id, text, source, metadata...}
        if os.path.exists(self.index_path) and os.path.exists(self.docs_meta_path):
            try:
                print("[RAG] Loading existing FAISS index and metadata...")
                self._load_index()
                print("[RAG] Index loaded.")
            except Exception as e:
                print("[RAG] Warning: failed to load existing index:", e)
                self.index = None
                self.doc_metas = []

    # ------------------------------
    # Build vector store from HistoricalTraffic (and optionally other tables)
    # ------------------------------
    def build_vector_store(self, tables: List[str] = ["HistoricalTraffic"], text_columns: List[str] = None, force_rebuild: bool = False):
        """
        Build or rebuild the FAISS vector index from specified Spark table(s).

        - tables: list of table names to pull rows from (Spark SQL). Default: HistoricalTraffic.
        - text_columns: if provided, list of columns to concatenate per document (e.g., ['zone_id','date','hour','average_occupancy'])
        - force_rebuild: if True, rebuild even if an index already exists
        """
        if not force_rebuild and self.index is not None:
            print("[RAG] Index already present. Use force_rebuild=True to rebuild.")
            return

        print(f"[RAG] Reading data from tables: {tables}")
        docs = []
        for table in tables:
            try:
                df = self.spark.sql(f"SELECT * FROM {table}")
                pdf = df.toPandas()
                print(f"[RAG] Retrieved {len(pdf)} rows from {table}")
            except Exception as e:
                print(f"[RAG] Warning: failed to read table {table}: {e}")
                continue

            # Form documents
            for idx, row in pdf.iterrows():
                # Compose a human-readable text from chosen columns
                if text_columns:
                    parts = []
                    for c in text_columns:
                        parts.append(f"{c}: {row.get(c, '')}")
                    text = " | ".join(parts)
                else:
                    # Default textualization: join important fields
                    # Protect against NaNs by converting to str
                    text = f"zone_id: {row.get('zone_id','')}, date: {row.get('date','')}, hour: {row.get('hour','')}, occupancy: {row.get('average_occupancy','')}, volume: {row.get('traffic_volume','')}, weather: {row.get('weather_condition','')}"
                meta = {
                    "source_table": table,
                    "source_row_index": int(idx),
                    "zone_id": str(row.get("zone_id", "")),
                    "date": str(row.get("date", "")),
                    "hour": int(row.get("hour")) if not pd.isna(row.get("hour")) else None,
                    "data_hash": row.get("data_hash", None)
                }
                docs.append({"id": f"{table}__{idx}", "text": text, "meta": meta})

        if not docs:
            print("[RAG] No documents to index.")
            return

        # Create embeddings in batches
        texts = [d["text"] for d in docs]
        print("[RAG] Computing embeddings for documents...")
        embeddings = self.embedder.encode(texts, show_progress_bar=True, convert_to_numpy=True)
        dim = embeddings.shape[1]
        print(f"[RAG] Embeddings shape: {embeddings.shape}  dim={dim}")

        # Build FAISS index (IndexFlatIP with normalization -> cosine similarity by inner product after normalization)
        print("[RAG] Building FAISS index (normalized vectors)...")
        faiss.normalize_L2(embeddings)
        index = faiss.IndexFlatIP(dim)
        index.add(embeddings)

        # Save index and metadata
        faiss.write_index(index, self.index_path)
        with open(self.docs_meta_path, "w", encoding="utf-8") as fh:
            json.dump(docs, fh, ensure_ascii=False, indent=2)

        # assign to object
        self.index = index
        self.doc_metas = docs
        print(f"[RAG] FAISS index built and saved to {self.index_path}. Documents saved to {self.docs_meta_path}.")

    # ------------------------------
    # Internal load index
    # ------------------------------
    def _load_index(self):
        self.index = faiss.read_index(self.index_path)
        with open(self.docs_meta_path, "r", encoding="utf-8") as fh:
            self.doc_metas = json.load(fh)

    # ------------------------------
    # Query / Retrieve
    # ------------------------------
    def retrieve(self, query: str, top_k: int = TOP_K) -> List[Dict]:
        """
        Return top_k retrieved documents with scores.
        """
        if self.index is None:
            raise RuntimeError("FAISS index not built. Call build_vector_store() first.")

        q_emb = self.embedder.encode([query], convert_to_numpy=True)
        faiss.normalize_L2(q_emb)
        D, I = self.index.search(q_emb, top_k)  # D: similarity scores; I: indices
        results = []
        for score, idx in zip(D[0], I[0]):
            if idx < 0 or idx >= len(self.doc_metas):
                continue
            doc = self.doc_metas[idx]
            results.append({
                "id": doc["id"],
                "text": doc["text"],
                "meta": doc["meta"],
                "score": float(score)
            })
        return results

    # ------------------------------
    # Answer generation (RAG)
    # ------------------------------
    def answer(self,
               query: str,
               top_k: int = TOP_K,
               use_openai: bool = USE_OPENAI,
               openai_model: str = "gpt-3.5-turbo",
               llm_temperature: float = 0.0,
               mcp_system = None):
        """
        Full RAG answer flow:
        - retrieve top_k documents
        - craft a prompt combining query + retrieved contexts
        - optionally call OpenAI to generate final answer
        - log query & answer to MCP (if provided)
        """
        retrieved = self.retrieve(query, top_k=top_k)
        # Simple extractive fallback if no LLM
        context_texts = [f"[{r['meta']['zone_id']}|{r['meta']['date']}|h{r['meta']['hour']}] {r['text']}" for r in retrieved]

        # Compose system prompt
        prompt_header = (
            "You are a traffic & parking assistant. Use the contextual historical traffic data below "
            "to answer the user's query. Cite relevant zone and hour where appropriate.\n\n"
        )
        context_block = "\n\n".join([f"Context {i+1} (score={r['score']:.3f}):\n{r['text']}" for i, r in enumerate(retrieved)])
        final_prompt = prompt_header + context_block + f"\n\nUser Query: {query}\n\nAnswer concisely with supporting context."

        if use_openai:
            try:
                print("[RAG] Calling OpenAI to generate answer...")
                response = openai.ChatCompletion.create(
                    model=openai_model,
                    messages=[
                        {"role": "system", "content": "You are a helpful assistant specialized in traffic and parking analytics."},
                        {"role": "user", "content": final_prompt}
                    ],
                    temperature=llm_temperature,
                    max_tokens=512
                )
                answer_text = response["choices"][0]["message"]["content"].strip()
            except Exception as e:
                print("[RAG] OpenAI call failed:", e)
                # fallback to return retrieved context
                answer_text = " ".join(context_texts) if context_texts else "No context available to answer the query."
        else:
            # Fallback behaviour: return retrieved contexts concatenated (extractive RAG)
            print("[RAG] OPENAI key not provided — returning retrieved contexts as answer.")
            if context_texts:
                answer_text = "Retrieved context (top results):\n\n" + "\n\n".join(context_texts)
            else:
                answer_text = "No relevant historical traffic context found."

        # Log query+answer to MCP if available (so it becomes an audited inference)
        if mcp_system is not None:
            try:
                inference_meta = {
                    "query": query,
                    "retrieved_ids": [r["id"] for r in retrieved],
                    "retrieved_scores": [r["score"] for r in retrieved],
                    "answer_summary": answer_text[:800]  # truncated summary for ledger entry
                }
                # Use mcp.log_inference to log; this returns receipt (with transaction_id)
                receipt = mcp_system.log_inference("RAG_Traffic_Assistant_v1", {"query": query}, {"answer": answer_text}, confidence=0.9)
                # attach receipt info if present
                if receipt:
                    inference_tx_id = receipt.get("transaction_id") if isinstance(receipt, dict) else receipt
                else:
                    inference_tx_id = None
            except Exception as e:
                print("[RAG] Error logging to MCP:", e)
                inference_tx_id = None
        else:
            inference_tx_id = None

        return {
            "query": query,
            "answer": answer_text,
            "retrieved": retrieved,
            "mcp_inference_tx": inference_tx_id,
            "timestamp": datetime.utcnow().isoformat()
        }


StatementMeta(, 51092528-9a67-4cd9-8ddb-e4d226a73748, 16, Finished, Available, Finished)

In [12]:
# from rag_assistant import RAGAssistant

# assume spark is the active SparkSession in your Fabric notebook
rag = RAGAssistant(spark)

import os
os.makedirs("data", exist_ok=True)

# build embeddings/index from HistoricalTraffic
rag.build_vector_store(tables=["HistoricalTraffic"], force_rebuild=True)


StatementMeta(, 51092528-9a67-4cd9-8ddb-e4d226a73748, 20, Finished, Available, Finished)

[RAG] Loading embedding model: all-MiniLM-L6-v2
[RAG] Reading data from tables: ['HistoricalTraffic']
[RAG] Retrieved 672 rows from HistoricalTraffic
[RAG] Computing embeddings for documents...


Batches:   0%|          | 0/21 [00:00<?, ?it/s]

[RAG] Embeddings shape: (672, 384)  dim=384
[RAG] Building FAISS index (normalized vectors)...
[RAG] FAISS index built and saved to data/faiss_index.index. Documents saved to data/faiss_docs_meta.json.


StatementMeta(, 51092528-9a67-4cd9-8ddb-e4d226a73748, 21, Finished, Available, Finished)

In [17]:
result = rag.answer(
    "Which zones have the highest average occupancy during weekday mornings (8-10am)?",
    top_k=6)

print("ANSWER:\n", result["answer"])
print("\nRetrieved docs:")
for r in result["retrieved"]:
    print("-", r["meta"].get("zone_id"), r["meta"].get("date"), "score:", r["score"])

StatementMeta(, 51092528-9a67-4cd9-8ddb-e4d226a73748, 26, Finished, Available, Finished)

[RAG] Calling OpenAI to generate answer...
[RAG] OpenAI call failed: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742

ANSWER:
 [ZONE_B|2025-10-03|h20] zone_id: ZONE_B, date: 2025-10-03, hour: 20, occupancy: 0.147, volume: 279, weather: Sunny [ZONE_B|2025-10-09|h3] zone_id: ZONE_B, date: 2025-10-09, hour: 3, occupancy: 0.148, volume: 279, weather: Rainy [ZONE_A|2025-10-03|h7] zone_id: ZONE_A, date: 2025-10-03, hour: 7, occupancy: 0.139, volume: 253, weather: Sunny [ZONE_B|2025-10-08|h7] zone_id: ZONE_B, date: 2025-10-08, hour: 7, occupancy: 0.267, volume: 171, weather: Sunny [