# OpenSource Integration : Langchain


In [None]:
# !uv add langgraph langchain-google-genai 

In [1]:
from __future__ import annotations

from typing import Annotated, Sequence, TypedDict, Optional, Literal, Any, Dict, List, Tuple
from dataclasses import dataclass
import re

from pydantic import BaseModel, Field

from langchain_core.tools import tool
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage
from langgraph.graph.message import add_messages
from langgraph.graph import StateGraph, END

## Hardcoded Cloud Data

In [2]:
# -----------------------------------------------------------------------------
# 1) Dummy data (realistic-ish) for tools
# -----------------------------------------------------------------------------

MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

# Per-service CPU usage (%)
CPU_DB: Dict[str, Dict[str, Dict[str, int]]] = {
    "payments-api": {
        "2025": {
            "Jan": 42, "Feb": 55, "Mar": 61, "Apr": 66, "May": 73, "Jun": 78,
            "Jul": 81, "Aug": 76, "Sep": 68, "Oct": 64, "Nov": 58, "Dec": 49
        },
        "2026": {"Jan": 84, "Feb": 79, "Mar": 71},
    },
    "risk-engine": {
        "2025": {
            "Jan": 35, "Feb": 39, "Mar": 44, "Apr": 52, "May": 57, "Jun": 60,
            "Jul": 62, "Aug": 59, "Sep": 55, "Oct": 50, "Nov": 46, "Dec": 41
        },
        "2026": {"Jan": 63, "Feb": 67, "Mar": 69},
    },
}

# Per-service RAM usage (%)
RAM_DB: Dict[str, Dict[str, Dict[str, int]]] = {
    "payments-api": {
        "2025": {
            "Jan": 48, "Feb": 52, "Mar": 58, "Apr": 63, "May": 69, "Jun": 72,
            "Jul": 74, "Aug": 73, "Sep": 67, "Oct": 62, "Nov": 57, "Dec": 51
        },
        "2026": {"Jan": 77, "Feb": 75, "Mar": 70},
    },
    "risk-engine": {
        "2025": {
            "Jan": 41, "Feb": 43, "Mar": 45, "Apr": 49, "May": 53, "Jun": 55,
            "Jul": 57, "Aug": 56, "Sep": 54, "Oct": 51, "Nov": 48, "Dec": 45
        },
        "2026": {"Jan": 58, "Feb": 60, "Mar": 61},
    },
}

# Global cloud bill (e.g., total account bill) by month/year
# We'll include a “spike” around Jul/Aug 2025 to demonstrate detection.
CLOUD_BILL_DB: Dict[str, Dict[str, float]] = {
    "2025": {
        "Jan": 12000.0, "Feb": 12150.0, "Mar": 12320.0, "Apr": 12510.0, "May": 12800.0, "Jun": 13020.0,
        "Jul": 16500.0, "Aug": 17200.0, "Sep": 13350.0, "Oct": 13110.0, "Nov": 12990.0, "Dec": 12780.0
    },
    "2026": {"Jan": 13420.0, "Feb": 13610.0, "Mar": 13780.0},
}

## Tool Schemas ( Models )

In [3]:
# -----------------------------------------------------------------------------
# 2) Tool schemas + tools (LangChain @tool)
# -----------------------------------------------------------------------------

class CpuRamInput(BaseModel):
    month: str = Field(description="Month like Jan/Feb/Mar... or full month name")
    year: str = Field(description="Year like 2025")
    servicename: str = Field(description="Service name like payments-api")

class BillInput(BaseModel):
    month: str = Field(description="Month like Jan/Feb/Mar... or full month name")
    year: str = Field(description="Year like 2025")


In [4]:
def normalize_month(m: str) -> str:
    """Normalize various month inputs to our 3-letter MONTHS keys."""
    m = m.strip()
    if not m:
        return m
    m_lower = m.lower()

    full_to_abbr = {
        "january": "Jan", "february": "Feb", "march": "Mar", "april": "Apr",
        "may": "May", "june": "Jun", "july": "Jul", "august": "Aug",
        "september": "Sep", "october": "Oct", "november": "Nov", "december": "Dec"
    }
    if m_lower in full_to_abbr:
        return full_to_abbr[m_lower]

    # Already like "Jan" / "JAN" / "jan"
    m3 = m_lower[:3].capitalize()
    if m3 in MONTHS:
        return m3

    return m  # fallback

## Metrics Tools

In [6]:
@tool("get_cpu_data", args_schema=CpuRamInput, return_direct=True)
def get_cpu_data(month: str, year: str, servicename: str) -> Dict[str, Any]:
    """Look up CPU usage (%) for a given service, month, year."""
    m = normalize_month(month)
    y = str(year).strip()

    try:
        val = CPU_DB[servicename][y][m]
        return {"service": servicename, "year": y, "month": m, "cpu_percent": val}
    except KeyError:
        return {"error": "CPU data not found", "service": servicename, "year": y, "month": m}


@tool("get_ram_data", args_schema=CpuRamInput, return_direct=True)
def get_ram_data(month: str, year: str, servicename: str) -> Dict[str, Any]:
    """Look up RAM usage (%) for a given service, month, year."""
    m = normalize_month(month)
    y = str(year).strip()

    try:
        val = RAM_DB[servicename][y][m]
        return {"service": servicename, "year": y, "month": m, "ram_percent": val}
    except KeyError:
        return {"error": "RAM data not found", "service": servicename, "year": y, "month": m}


@tool("get_cloud_bill", args_schema=BillInput, return_direct=True)
def get_cloud_bill(month: str, year: str) -> Dict[str, Any]:
    """Look up total cloud bill for a given month/year."""
    m = normalize_month(month)
    y = str(year).strip()

    try:
        val = CLOUD_BILL_DB[y][m]
        return {"year": y, "month": m, "cloud_bill_usd": val}
    except KeyError:
        return {"error": "Cloud bill not found", "year": y, "month": m}

In [8]:
TOOLS = [get_cpu_data, get_ram_data, get_cloud_bill]
TOOLS_BY_NAME = {t.name: t for t in TOOLS}
TOOLS_BY_NAME

{'get_cpu_data': StructuredTool(name='get_cpu_data', description='Look up CPU usage (%) for a given service, month, year.', args_schema=<class '__main__.CpuRamInput'>, return_direct=True, func=<function get_cpu_data at 0x10cc92c00>),
 'get_ram_data': StructuredTool(name='get_ram_data', description='Look up RAM usage (%) for a given service, month, year.', args_schema=<class '__main__.CpuRamInput'>, return_direct=True, func=<function get_ram_data at 0x10cc928e0>),
 'get_cloud_bill': StructuredTool(name='get_cloud_bill', description='Look up total cloud bill for a given month/year.', args_schema=<class '__main__.BillInput'>, return_direct=True, func=<function get_cloud_bill at 0x10cc92340>)}

## State

In [10]:
# -----------------------------------------------------------------------------
# 4) Agent state
# -----------------------------------------------------------------------------

class AgentState(TypedDict):
    """
    messages: chat history (HumanMessage / AIMessage / ToolMessage)
    plan: parsed intent: cpu/ram/both + service + month/year
    observations: tool outputs we want to keep
    need_bill_window_check: set True if cpu > 75
    bill_window_results: list of bill dicts collected from get_cloud_bill
    """
    messages: Annotated[Sequence[BaseMessage], add_messages]

    plan: Dict[str, Any]
    observations: Dict[str, Any]

    need_bill_window_check: bool
    bill_window_results: List[Dict[str, Any]]

## Nodes

### Helpers: month arithmetic (+/- 2 months), spike detection

In [9]:
def month_to_index(month_abbr: str) -> int:
    return MONTHS.index(month_abbr)  # raises if invalid

def index_to_month(idx: int) -> str:
    return MONTHS[idx]

def add_months(year: int, month_abbr: str, delta: int) -> Tuple[int, str]:
    """Return (new_year, new_month_abbr) after adding delta months."""
    m = normalize_month(month_abbr)
    mi = month_to_index(m)
    total = (year * 12 + mi) + delta
    new_year = total // 12
    new_mi = total % 12
    return new_year, index_to_month(new_mi)

def detect_spikes(bills: List[Dict[str, Any]], spike_ratio: float = 1.15) -> Dict[str, Any]:
    """
    Simple spike detection:
    - Compute median of available bills in window
    - Flag entries that exceed median * spike_ratio
    """
    vals = [b["cloud_bill_usd"] for b in bills if "cloud_bill_usd" in b]
    if not vals:
        return {"spike_found": False, "reason": "no bill values"}

    vals_sorted = sorted(vals)
    mid = len(vals_sorted) // 2
    median = vals_sorted[mid] if len(vals_sorted) % 2 == 1 else (vals_sorted[mid - 1] + vals_sorted[mid]) / 2.0

    spikes = []
    for b in bills:
        v = b.get("cloud_bill_usd")
        if v is None:
            continue
        if v >= median * spike_ratio:
            spikes.append(b)

    return {"spike_found": bool(spikes), "median": median, "spikes": spikes}