# ==========================================
# *Stateful DevOps Agent (Memory + Tools)*
# ==========================================

In [4]:
# ==========================================
# Stable Stateful DevOps Agent
# ==========================================

from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="phi3:latest",
    temperature=0.1
)

# -----------------------------
# Tools (NO LLM inside)
# -----------------------------

def analyze_error(error: str) -> str:
    return "Possible root causes: resource limits, wrong env variables, app crash, missing dependency."


def generate_fix(issue: str) -> str:
    return "Fix steps: check logs, adjust resources, validate config, redeploy."


def monitoring_strategy(service: str) -> str:
    return "Monitor using Prometheus metrics: container restarts, CPU usage, memory usage, alert on restart rate."


tools = {
    "analyze_error": analyze_error,
    "generate_fix": generate_fix,
    "monitoring_strategy": monitoring_strategy
}

# -----------------------------
# Memory
# -----------------------------

chat_memory = []

def add_to_memory(role, content):
    chat_memory.append({"role": role, "content": content})

def get_memory_context():
    return "\n".join([f"{m['role']}: {m['content']}" for m in chat_memory])

# -----------------------------
# Tool Selection
# -----------------------------

def select_tools(user_input):
    decision_prompt = f"""
Available tools:
- analyze_error
- generate_fix
- monitoring_strategy

Select required tools.
Return comma separated tool names only.

User request:
{user_input}
"""
    response = llm.invoke(decision_prompt).content.lower()

    selected = []
    for name in tools:
        if name in response:
            selected.append(name)

    return selected

# -----------------------------
# Agent Logic
# -----------------------------

def run_agent(user_input):

    print("\nUser:", user_input)

    add_to_memory("user", user_input)

    selected_tools = select_tools(user_input)
    print("Selected tools:", selected_tools)

    observations = []

    for tool_name in selected_tools:
        result = tools[tool_name](user_input)
        observations.append(f"{tool_name}: {result}")

    final_prompt = f"""
Conversation history:
{get_memory_context()}

Tool outputs:
{chr(10).join(observations)}

Provide structured DevOps response.
"""

    final_answer = llm.invoke(final_prompt).content

    add_to_memory("assistant", final_answer)

    return final_answer

# -----------------------------
# Test Multi Questions
# -----------------------------

print("========== QUESTION 1 ==========")
print(run_agent("My Kubernetes pod shows CrashLoopBackOff error. Give root cause and fix."))

print("\n========== QUESTION 2 ==========")
print(run_agent("Now CPU usage is very high after scaling."))

print("\n========== QUESTION 3 ==========")
print(run_agent("How do I secure my Kubernetes cluster?"))



User: My Kubernetes pod shows CrashLoopBackOff error. Give root cause and fix.
Selected tools: ['analyze_error', 'generate_fix', 'monitoring_strategy']
**Root Cause Analysis and Fix for Kubernetes Pod CrashLoopBackOff Error**

*Possible Root Causes Identified by the Tool:*  
- Resource Limits Exceeded (CPU, Memory)
- Incorrect Environment Variables Setup
- Application Code Issues Leading to a Crash
- Missing Dependencies or Misconfigurations in Pod Specification

**Proposed Fix Steps Based on the Tool's Output:**  
1. **Check Logs for Clues:** Use `kubectl logs <pod_name>` command to review any error messages that could indicate why your application is failing within a container restart loop. Look specifically for stack traces or other indicators of an unhandled exception in the app code, missing dependencies during startup, or misconfigurations causing environment variables issues.
   
2. **Adjust Resources:** If resource limits are exceeded (CPU and memory), try to increase them usi