In [60]:
import os
import json
import time
import re
from typing import List, Dict, Any, Optional, Tuple
from dataclasses import dataclass, asdict

from llama_cpp import Llama
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [61]:
@dataclass
class Config:
    model_path: str = "models/Qwen2.5-7B-Instruct-Q4_K_M.gguf"
    n_ctx: int = 4096
    n_threads: int = 8

    temperature: float = 0.2
    top_p: float = 0.85
    max_tokens: int = 1100
    seed: int = 42

    rag_top_k: int = 4
    rag_min_score: float = 0.05

    max_repair_attempts: int = 2
    verbose: bool = False


CFG = Config()
assert os.path.exists(CFG.model_path), f"Model not found: {CFG.model_path}"


In [62]:
llm = Llama(
    model_path=CFG.model_path,
    n_ctx=CFG.n_ctx,
    n_threads=CFG.n_threads,
    verbose=CFG.verbose
)

print("LLM loaded successfully.")


llama_new_context_with_model: n_ctx_per_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized


LLM loaded successfully.


In [63]:
AGILE_KB = [
    {"id": "SCRUM_ROLES", "text": "Scrum defines three roles: Product Owner, Scrum Master, and Developers."},
    {"id": "SPRINT_CONCEPT", "text": "A Sprint is a time-boxed iteration producing a usable Increment with a clear Sprint Goal."},
    {"id": "SCRUM_EVENTS", "text": "Scrum events include Sprint Planning, Daily Scrum, Sprint Review, and Sprint Retrospective."},
    {"id": "AGILE_RISK", "text": "Agile manages risk through incremental delivery, early feedback, and prioritizing high-risk items."},
    {"id": "DEFINITION_OF_DONE", "text": "Definition of Done ensures a shared understanding of when work is complete and usable."}
]


In [64]:
def extract_json(text: str) -> Dict[str, Any]:
    match = re.search(r"<JSON_BEGIN>(.*?)<JSON_END>", text, re.DOTALL)
    if not match:
        raise ValueError("JSON markers not found in model output.")
    payload = match.group(1).strip()
    return json.loads(payload)


def validate_plan_schema(plan: Dict[str, Any]) -> None:
    """
    Lightweight schema validation (committee-friendly).
    Raises ValueError if invalid.
    """
    required_top = ["project_goal", "expected_technologies", "core_components", "complexity", "risks", "sprints", "management_summary"]
    for k in required_top:
        if k not in plan:
            raise ValueError(f"Missing top-level field: {k}")

    if plan["complexity"].get("level") not in ["Low", "Medium", "High"]:
        raise ValueError("complexity.level must be Low|Medium|High")

    if not isinstance(plan["expected_technologies"], list) or not all(isinstance(x, str) for x in plan["expected_technologies"]):
        raise ValueError("expected_technologies must be a list of strings")

    if not isinstance(plan["sprints"], list) or len(plan["sprints"]) < 2:
        raise ValueError("sprints must contain at least 2 sprint objects")

    ms = plan["management_summary"]
    if "rag_sources" not in ms or not isinstance(ms["rag_sources"], list):
        raise ValueError("management_summary.rag_sources must be a list")
    if "decision_rationale" not in ms or not isinstance(ms["decision_rationale"], dict):
        raise ValueError("management_summary.decision_rationale must be an object")


def llm_generate(prompt: str, temperature: float, top_p: float, max_tokens: int, seed: int) -> str:
    out = llm(
        prompt,
        temperature=temperature,
        top_p=top_p,
        max_tokens=max_tokens,
        seed=seed
    )
    return out["choices"][0]["text"]



In [65]:
SYSTEM_PROMPT = """
You are a senior Software Project Manager acting as a SINGLE autonomous AI agent.
You must plan software projects realistically using Agile principles.
Return only valid JSON as specified.
"""

AGENT_CONTROL_INSTRUCTION = """
Behave as an autonomous agent:
- Make decisions
- Justify choices
- Ensure feasibility
"""

OUTPUT_CONTRACT = """
Return ONLY JSON wrapped between markers:

<JSON_BEGIN>
{
  "project_goal": "...",
  "expected_technologies": ["..."],
  "core_components": ["..."],
  "complexity": {"level": "Low|Medium|High", "justification": "..."},
  "risks": [{"risk": "...", "mitigation": "..."}],
  "sprints": [
    {"name": "Sprint 1", "goal": "...", "tasks": ["..."]},
    {"name": "Sprint 2", "goal": "...", "tasks": ["..."]}
  ],
  "management_summary": {
    "summary": "...",
    "rag_sources": ["..."],
    "decision_rationale": {
      "approach_choice": "...",
      "timeline_feasibility": "...",
      "scope_strategy": "..."
    }
  }
}
<JSON_END>
"""

SELF_EVAL_PROMPT = """
Evaluate the plan feasibility.
Return ONLY JSON:

{
  "feasibility": "High|Medium|Low",
  "main_concerns": ["..."],
  "recommended_action": "None|Scope Reduction|Sprint Rebalance",
  "repair_instructions": "..."
}
"""

REPAIR_PROMPT = """
Repair the plan based on evaluator feedback.
Keep the SAME JSON structure and markers.
"""


In [66]:
def extract_pure_json(text: str) -> Dict[str, Any]:
    """
    Robust JSON extractor for LLM outputs.
    Safely extracts the FIRST valid JSON object only.
    """

    decoder = json.JSONDecoder()

    text = text.strip()

    # Remove common leading junk
    if text.startswith("```"):
        text = text.strip("`").strip()

    # Find first opening brace
    first_brace = text.find("{")
    if first_brace == -1:
        raise ValueError("No JSON object found in LLM output.")

    text = text[first_brace:]

    try:
        obj, idx = decoder.raw_decode(text)
        return obj
    except json.JSONDecodeError as e:
        raise ValueError(f"Failed to decode JSON from LLM output: {e}")


In [67]:
def validate_plan_schema(plan: Dict[str, Any]):
    required = ["project_goal", "expected_technologies", "core_components",
                "complexity", "risks", "sprints", "management_summary"]
    for k in required:
        if k not in plan:
            raise ValueError(f"Missing field: {k}")


In [68]:
def build_prompt(project_text: str, rag_docs: List[Dict[str, Any]]) -> str:
    rag_context = "\n".join(
        [f"[{d['id']} | {d['score']:.2f}] {d['text']}" for d in rag_docs]
    ) if rag_docs else "No relevant Agile knowledge retrieved."

    rag_ids = [d["id"] for d in rag_docs]

    return f"""
{SYSTEM_PROMPT}
{AGENT_CONTROL_INSTRUCTION}

AGILE KNOWLEDGE:
{rag_context}

IMPORTANT:
Use only these sources if relevant: {rag_ids}

PROJECT DESCRIPTION:
{project_text}

{OUTPUT_CONTRACT}
"""


In [69]:
def generate_llm(prompt: str, temp: float, max_tokens: int) -> str:
    return llm(
        prompt,
        temperature=temp,
        top_p=CFG.top_p,
        max_tokens=max_tokens,
        seed=CFG.seed
    )["choices"][0]["text"]


def evaluate_plan(plan: Dict[str, Any]) -> Dict[str, Any]:
    eval_prompt = f"""
PLAN:
{json.dumps(plan, indent=2)}

{SELF_EVAL_PROMPT}
"""
    text = generate_llm(eval_prompt, 0.1, 400)
    return extract_pure_json(text)


def repair_plan(project_text, rag_docs, plan, evaluation):
    prompt = f"""
{build_prompt(project_text, rag_docs)}

CURRENT PLAN:
{json.dumps(plan, indent=2)}

EVALUATION:
{json.dumps(evaluation, indent=2)}

{REPAIR_PROMPT}
"""
    text = generate_llm(prompt, CFG.temperature, CFG.max_tokens)
    repaired = extract_pure_json(text)
    validate_plan_schema(repaired)
    return repaired


In [70]:
def ai_project_manager(project_text: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
    start = time.time()

    rag_docs = rag.retrieve(project_text, CFG.rag_top_k, CFG.rag_min_score)

    prompt = build_prompt(project_text, rag_docs)
    raw = generate_llm(prompt, CFG.temperature, CFG.max_tokens)
    plan = extract_pure_json(raw)
    validate_plan_schema(plan)

    evaluations = []
    repairs = 0

    for _ in range(CFG.max_repair_attempts + 1):
        evaluation = evaluate_plan(plan)
        evaluations.append(evaluation)

        if evaluation["recommended_action"] == "None" and evaluation["feasibility"] in ["High", "Medium"]:
            break

        if repairs >= CFG.max_repair_attempts:
            break

        plan = repair_plan(project_text, rag_docs, plan, evaluation)
        repairs += 1

    trace = {
        "runtime_sec": round(time.time() - start, 2),
        "rag_used": [d["id"] for d in rag_docs],
        "rag_scores": {d["id"]: d["score"] for d in rag_docs},
        "repair_attempts": repairs,
        "final_feasibility": evaluations[-1]["feasibility"]
    }

    payload = {
        "plan": plan,
        "agent_status": evaluations[-1]
    }

    return payload, trace


In [71]:
PROJECT_TEXT = """
Build a web-based platform for managing clinic appointments and patient records.
Features:
- Role-based authentication
- Appointment scheduling
- Patient profiles
Constraints:
- 2 developers
- 6 weeks
"""

payload, trace = ai_project_manager(PROJECT_TEXT)
print(json.dumps(trace, indent=2))
print(json.dumps(payload, indent=2))


{
  "runtime_sec": 745.01,
  "rag_used": [
    "SCRUM_ROLES"
  ],
  "rag_scores": {
    "SCRUM_ROLES": 0.34092454627167806
  },
  "repair_attempts": 2,
  "final_feasibility": "Medium"
}
{
  "plan": {
    "project_goal": "Build a web-based platform for managing clinic appointments and patient records with role-based authentication, appointment scheduling, and patient profiles.",
    "expected_technologies": [
      "React",
      "Node.js",
      "Express",
      "MongoDB",
      "JWT for authentication"
    ],
    "core_components": [
      "Authentication system",
      "Appointment scheduling",
      "Patient profiles"
    ],
    "complexity": {
      "level": "Medium",
      "justification": "The project involves multiple features and a user authentication system, which adds complexity but is not extremely intricate."
    },
    "risks": [
      {
        "risk": "Limited developer resources",
        "mitigation": "Ensure developers are well-versed in the technologies and allocate 