<a href="https://colab.research.google.com/github/atulmahaja/Lesson_one/blob/main/upgrad_session_YAML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import yaml
import pandas as pd
import time
import random

# 1. THE ROUTER POLICY (YAML)
# In a real scenario, this is a separate file (router.yaml)
router_yaml = """
policies:
  support_chat:
    primary: "small_model"   # Cost: $0.01
    fallback: "large_model"  # Cost: $0.10
    risk_tier: "low"
    require_citation: false
  policy_qa:
    primary: "large_model"
    fallback: "human_review"
    risk_tier: "high"
    require_citation: True
  summarization:
    primary: "small_model"
    fallback: "large_model"
    risk_tier: "low"
    require_citation: false

budget:
  max_daily_spend: 5.00
  current_spend: 0.00
"""

config = yaml.safe_load(router_yaml)
print("‚úÖ Router Policy Loaded. Logic: High Risk -> Large Model | Low Risk -> Small Model")

‚úÖ Router Policy Loaded. Logic: High Risk -> Large Model | Low Risk -> Small Model


In [2]:
def estimate_cost(model_type, prompt):
    # Rough estimate: 1 token per 4 chars.
    # Small model: $0.01 per 1k tokens | Large: $0.10 per 1k tokens
    tokens = len(prompt) / 4
    rate = 0.01 if model_type == "small_model" else 0.10
    return (tokens / 1000) * rate

def route_task(task_type, prompt, budget_state):
    policy = config['policies'].get(task_type)

    # Decision Logic
    selected_model = policy['primary']

    # Budget Check: If we are close to the limit, force small model unless high risk
    if budget_state > config['budget']['max_daily_spend'] * 0.9:
        if policy['risk_tier'] != "high":
            selected_model = "small_model"
            print(f"‚ö†Ô∏è Budget Warning! Routing {task_type} to Small Model.")

    return selected_model

# Test the router
task = "support_chat"
p = "How do I reset my password?"
chosen = route_task(task, p, 0.50)
print(f"‚úÖ Router selected: {chosen} for task {task}")

‚úÖ Router selected: small_model for task support_chat


In [3]:
def call_model(model_name, prompt):
    # Simulating model outputs
    if model_name == "small_model":
        return "Here is your answer." # No citation!
    if model_name == "large_model":
        return "According to the manual [Section 4], here is your answer."
    return "ESCALATE_TO_HUMAN"

def run_with_fallback(task_type, prompt):
    policy = config['policies'].get(task_type)
    model = route_task(task_type, prompt, 0.0)

    # First Attempt
    response = call_model(model, prompt)

    # Quality Gate: Does it require citation?
    passed_gate = True
    if policy['require_citation'] and "[" not in response:
        passed_gate = False
        print(f"‚ùå {model} failed Quality Gate (No Citation).")

    # Fallback Logic
    if not passed_gate:
        fallback_model = policy['fallback']
        print(f"üîÑ Escalating to {fallback_model}...")
        response = call_model(fallback_model, prompt)

    return response, "Success" if "[" in response or not policy['require_citation'] else "HITL_REQUIRED"

# Execute
res, status = run_with_fallback("policy_qa", "What is the hotel budget?")
print(f"Final Response: {res} | Status: {status}")

Final Response: According to the manual [Section 4], here is your answer. | Status: Success


In [4]:
test_cases = [
    {"type": "support_chat", "q": "Where is my order?"},
    {"type": "policy_qa", "q": "Can I fly Business?"},
    {"type": "summarization", "q": "Summarize this 10 page doc..."},
    # ... (Imagine 20 varied cases)
] * 7 # Multiplied to get to 21 cases

log = []
total_saved = 0

for case in test_cases:
    start = time.time()
    # Logic to track what we saved vs. always using Large Model
    cost_if_large = estimate_cost("large_model", case['q'])

    res, status = run_with_fallback(case['type'], case['q'])

    actual_cost = estimate_cost("small_model", case['q']) if "small" in str(res) else cost_if_large
    total_saved += (cost_if_large - actual_cost)

    log.append({
        "Task": case['type'],
        "Status": status,
        "Cost_Saved": round(cost_if_large - actual_cost, 4),
        "Latency": round(time.time()-start, 2)
    })

# THE SHIPNMENT: Final Leaderboard
report = pd.DataFrame(log)
print("\n" + "="*40)
print("ROUTER PERFORMANCE REPORT")
print("="*40)
print(f"Total Model Upgrades (Fallbacks): {len(report[report['Status'] == 'Success'])}")
print(f"Total Human Escalations (HITL): {len(report[report['Status'] == 'HITL_REQUIRED'])}")
print(f"ESTIMATED COST SAVED: ${round(total_saved, 2)}")
print("="*40)


ROUTER PERFORMANCE REPORT
Total Model Upgrades (Fallbacks): 21
Total Human Escalations (HITL): 0
ESTIMATED COST SAVED: $0.0
