# Algebraic and AI-Assisted Anomaly Detection for Robotic Sensor Data

- Load ../results/gemini_summary.json file
- Call Gemini 2.0 flash with a prompt
- Output AI report with
    - hypothesis
    - best detectors
    - cross-sensor insights
    - industrial relevance
    - numerical patterns
    - final summary
- Save JSON output as /ai_report/report.json 
- Save readable/printable output as /ai_report/report.md

In [None]:
# --- 1) Imports & setup ---
import os, json, textwrap, time
from pathlib import Path
from datetime import datetime
import dotenv

import numpy as np
import pandas as pd

# Optional install if google-generativeai is not present
try:
    import google.generativeai as genai
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "google-generativeai"])
    import google.generativeai as genai

dotenv.load_dotenv()

# --- 2) Paths ---
RESULTS_PATH = Path("../results")
AI_REPORT_PATH = Path("../ai_report")
AI_REPORT_PATH.mkdir(exist_ok=True)

SUMMARY_JSON = RESULTS_PATH / "gemini_summary.json"
RAW_TXT_PATH = AI_REPORT_PATH / "gemini_raw.txt"
PROMPT_PATH = AI_REPORT_PATH / "gemini_prompt.json"
REPORT_JSON_PATH = AI_REPORT_PATH / "report.json"
REPORT_MD_PATH = AI_REPORT_PATH / "report.md"

assert SUMMARY_JSON.exists(), f"Missing {SUMMARY_JSON}. Run 05_model_comparison.ipynb first."

# --- 3) Load summary & basic checks ---
with open(SUMMARY_JSON, "r") as f:
    project_summary = json.load(f)

datasets = list(project_summary.keys())
print(f"[INFO] Datasets in summary: {datasets}")

"""Gemini 2.0flash config"""
API_KEY = os.getenv("GEMINI_API_KEY", "")
if not API_KEY:
    print("[WARNING] GEMINI_API_KEY not set in environment. Set it to enable API calls.")
if API_KEY:
    print(f"[INFO] API_KEY found")
genai.configure(api_key=API_KEY)

MODEL_NAME = "gemini-2.0-flash"
GEN_CFG = dict(
    temperature=0.2,
    top_p=0.9,
    top_k=40,
    max_output_tokens=3500,
)


"""Prompt"""
SYSTEM_BRIEF = """
You are an expert AI Data Scientist specializing in anomaly detection for robotics time-series and multi-sensor data.
You will analyze method metrics and correlations and produce a precise, structured report.
Keep claims grounded in the provided numbers.
"""

# Clear, JSON-first contract so we can parse the output.
JSON_SPEC = {
  "schema_version": "1.0",
  "project_title": "Algebraic and AI-Assisted Anomaly Detection for Robotic Sensor Data",
  "per_dataset": {
    "<dataset_name>": {
      "best_detectors": [
        {"method": "<name>", "reason": "<short reason>", "supporting_numbers": {"mean": 0.0, "std": 0.0, "corr_to_others": {"PCA_Q": 0.0}}}
      ],
      "hypothesis": ["<concise, numbered hypotheses about root causes>"],
      "numerical_patterns": ["<specific quantitative patterns or thresholds seen>"],
      "notes": "<optional short note>"
    }
  },
  "cross_sensor_insights": ["<patterns across datasets/sensors>"],
  "industrial_relevance_insights": ["<why this matters in robotics / predictive maintenance>"],
  "general_final_summary": "<tight executive summary for a technical manager>"
}

def build_prompt(data: dict) -> str:
    # Reduce noise: keep only what’s needed
    reduced = {}
    for ds, obj in data.items():
        keep = {
            "mean_scores": obj.get("mean_scores", {}),
            "std_scores": obj.get("std_scores", {}),
            "method_correlations": obj.get("method_correlations", {}),
        }
        reduced[ds] = keep

    instructions = {
        "task": "Analyze anomaly detector results and produce a structured JSON following the JSON_SPEC exactly.",
        "requirements": [
            "Pick 'best_detectors' per dataset: justify with numbers and correlations.",
            "Generate 2–4 concise, testable 'hypothesis' per dataset.",
            "List 2–5 'numerical_patterns' per dataset (thresholds, rank orders, surprising values).",
            "Provide 3–6 'cross_sensor_insights' across datasets.",
            "Provide 3–6 'industrial_relevance_insights' connecting patterns to maintenance decisions.",
            "Finish with a single-paragraph 'general_final_summary' (executive tone).",
            "Do not fabricate numbers. Use the provided means/stds/correlations.",
            "If methods are missing for a dataset, acknowledge briefly."
        ],
        "json_contract": JSON_SPEC
    }

    prompt = {
        "system_brief": SYSTEM_BRIEF.strip(),
        "instructions": instructions,
        "data": reduced
    }
    return json.dumps(prompt, indent=2)
USER_PROMPT = build_prompt(project_summary)

# Save prompt
with open(PROMPT_PATH, "w") as f:
    f.write(USER_PROMPT)
print(f"[INFO] Prompt saved → {PROMPT_PATH}")


"""Call Gemini 2.0 flash model"""
def call_gemini(prompt_text: str, model_name: str = MODEL_NAME, cfg: dict = GEN_CFG, retries: int = 3, backoff: float = 2.0) -> str:
    if not API_KEY:
        return "[ERROR] GEMINI_API_KEY missing. Skipping API call."
    model = genai.GenerativeModel(model_name)
    last_err = None
    for attempt in range(1, retries+1):
        try:
            resp = model.generate_content(prompt_text, generation_config=cfg)
            if hasattr(resp, "text") and resp.text:
                return resp.text
            # Some SDKs return candidates list
            if hasattr(resp, "candidates") and resp.candidates:
                return resp.candidates[0].content.parts[0].text
            return str(resp)
        except Exception as e:
            last_err = e
            print(f"[WARNING] Gemini call failed (attempt {attempt}/{retries}): {e}")
            time.sleep(backoff * attempt)
    return f"[ERROR] Gemini call failed after {retries} attempts: {last_err}"


raw_output = call_gemini(USER_PROMPT)
with open(RAW_TXT_PATH, "w") as f:
    f.write(raw_output)
print(f"[INFO] Raw Gemini output saved → {RAW_TXT_PATH}")

[INFO] Datasets in summary: ['lp1', 'lp2', 'lp3', 'lp4', 'lp5']
[INFO] API_KEY found
[INFO] Prompt saved → ../ai_report/gemini_prompt.json
[INFO] Raw Gemini output saved → ../ai_report/gemini_raw.txt


In [10]:
"""Robust JSON parsing"""
def parse_json(text: str):
    # Try direct JSON
    try:
        return json.loads(text)
    except Exception:
        pass
    # Try to extract the largest {...} block
    start = text.find("{")
    end = text.rfind("}")
    if start != -1 and end != -1 and end > start:
        try:
            return json.loads(text[start:end+1])
        except Exception:
            return None
    return None
parsed = parse_json(raw_output)
with open(REPORT_JSON_PATH, "w") as f:
    json.dump(parsed, f, indent=2)
print(f"[INFO] Parsed report JSON saved → {REPORT_JSON_PATH}")

# --- 8) Render Markdown report ---
def render_md(report: dict) -> str:
    now = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
    lines = []
    lines.append(f"# AI Analysis Report — Gemini 2.0 Flash")
    lines.append("")
    lines.append(f"_Generated: {now}_")
    lines.append("")
    lines.append(f"**Project:** {report.get('project_title','(unknown)')}  ")
    lines.append(f"**Schema:** {report.get('schema_version','?')}")
    lines.append("")

    per_ds = report.get("per_dataset", {})
    for ds, block in per_ds.items():
        lines.append(f"## Dataset: {ds}")
        lines.append("")
        # Best detectors
        lines.append("### Best Detectors")
        bds = block.get("best_detectors", [])
        if bds:
            for item in bds:
                method = item.get("method","?")
                reason = item.get("reason","")
                nums = item.get("supporting_numbers",{})
                lines.append(f"- **{method}** — {reason}  \n  Numbers: `{json.dumps(nums)}`")
        else:
            lines.append("- _(none reported)_")
        lines.append("")

        # Hypotheses
        lines.append("### Hypotheses")
        hyps = block.get("hypothesis", [])
        if hyps:
            for i, h in enumerate(hyps, 1):
                lines.append(f"{i}. {h}")
        else:
            lines.append("- _(none reported)_")
        lines.append("")

        # Numerical patterns
        lines.append("### Numerical Patterns")
        pats = block.get("numerical_patterns", [])
        if pats:
            for p in pats:
                lines.append(f"- {p}")
        else:
            lines.append("- _(none reported)_")
        lines.append("")

        # Notes
        notes = block.get("notes","").strip()
        if notes:
            lines.append("### Notes")
            lines.append(notes)
            lines.append("")

    # Cross-sensor
    lines.append("## Cross-Sensor Insights")
    csi = report.get("cross_sensor_insights", [])
    if csi:
        for p in csi:
            lines.append(f"- {p}")
    else:
        lines.append("- _(none reported)_")
    lines.append("")

    # Industrial relevance
    lines.append("## Industrial Relevance Insights")
    iri = report.get("industrial_relevance_insights", [])
    if iri:
        for p in iri:
            lines.append(f"- {p}")
    else:
        lines.append("- _(none reported)_")
    lines.append("")

    # Final summary
    lines.append("## General Final Summary")
    lines.append(report.get("general_final_summary","(none)"))
    lines.append("")

    return "\n".join(lines)

report_md = render_md(parsed)

with open(REPORT_MD_PATH, "w") as f:
    f.write(report_md)
print(f"[INFO] Markdown report saved → {REPORT_MD_PATH}")

print("\nGemini analysis complete! Open:")
print(f"- JSON: {REPORT_JSON_PATH}")
print(f"- Markdown: {REPORT_MD_PATH}")

[INFO] Parsed report JSON saved → ../ai_report/report.json
[INFO] Markdown report saved → ../ai_report/report.md

Gemini analysis complete! Open:
- JSON: ../ai_report/report.json
- Markdown: ../ai_report/report.md


  now = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
