# Volume 1, Chapter 8: Cost Optimization

**Reduce Your AI Bill by 50-70%**

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eduardd76/AI_for_networking_and_security_engineers/blob/main/Volume-1-Foundations/Colab-Notebooks/Vol1_Ch8_Cost_Optimization.ipynb)

---

**What you'll learn:**
- üí∞ Calculate and track API costs
- üîÄ Route to cheaper models intelligently
- ‚úÇÔ∏è Minimize tokens in prompts
- üì¶ Batch requests to save money

## üîß Setup

In [None]:
!pip install -q anthropic tiktoken

import os
from getpass import getpass

try:
    from google.colab import userdata
    os.environ['ANTHROPIC_API_KEY'] = userdata.get('ANTHROPIC_API_KEY')
except:
    if 'ANTHROPIC_API_KEY' not in os.environ:
        os.environ['ANTHROPIC_API_KEY'] = getpass('Anthropic API key: ')

from anthropic import Anthropic
import tiktoken

client = Anthropic()
encoding = tiktoken.get_encoding("cl100k_base")
print("‚úì Ready!")

---
## üí∞ Example 1: Cost Tracker

In [None]:
PRICING = {
    "claude-3-5-haiku-20241022": {"input": 0.25, "output": 1.25},
    "claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00},
}

class CostTracker:
    def __init__(self):
        self.total_cost = 0
        self.calls = 0
    
    def track(self, model, input_tokens, output_tokens):
        p = PRICING[model]
        cost = (input_tokens/1e6 * p["input"]) + (output_tokens/1e6 * p["output"])
        self.total_cost += cost
        self.calls += 1
        return cost
    
    def report(self):
        print(f"Total calls: {self.calls}")
        print(f"Total cost: ${self.total_cost:.4f}")
        print(f"Avg cost/call: ${self.total_cost/max(self.calls,1):.4f}")

tracker = CostTracker()

# Make some calls
for i in range(3):
    response = client.messages.create(
        model="claude-3-5-haiku-20241022",
        max_tokens=100,
        messages=[{"role": "user", "content": "What is OSPF? One sentence."}]
    )
    cost = tracker.track(
        "claude-3-5-haiku-20241022",
        response.usage.input_tokens,
        response.usage.output_tokens
    )
    print(f"Call {i+1}: ${cost:.6f}")

print("\n" + "="*40)
tracker.report()

---
## üîÄ Example 2: Smart Model Routing

In [None]:
def classify_task(prompt):
    """Classify task complexity."""
    prompt_lower = prompt.lower()
    
    # Complex tasks need Sonnet
    complex_keywords = ["troubleshoot", "diagnose", "why", "design", "compare", "analyze security"]
    if any(kw in prompt_lower for kw in complex_keywords):
        return "complex"
    
    # Simple tasks can use Haiku
    simple_keywords = ["extract", "list", "classify", "convert", "what is", "count"]
    if any(kw in prompt_lower for kw in simple_keywords):
        return "simple"
    
    return "medium"

def smart_call(prompt, max_tokens=500):
    """Route to appropriate model based on task."""
    complexity = classify_task(prompt)
    
    model = {
        "simple": "claude-3-5-haiku-20241022",
        "medium": "claude-3-5-haiku-20241022",
        "complex": "claude-3-5-sonnet-20241022"
    }[complexity]
    
    response = client.messages.create(
        model=model,
        max_tokens=max_tokens,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return {
        "model": model,
        "complexity": complexity,
        "response": response.content[0].text,
        "input_tokens": response.usage.input_tokens,
        "output_tokens": response.usage.output_tokens
    }

# Test different prompts
prompts = [
    "What is BGP?",  # Simple ‚Üí Haiku
    "Extract the IP from: ip address 10.1.1.1 255.255.255.0",  # Simple ‚Üí Haiku
    "Why won't my OSPF adjacency form? Troubleshoot this config...",  # Complex ‚Üí Sonnet
]

print("üîÄ SMART MODEL ROUTING")
print("=" * 60)

for prompt in prompts:
    result = smart_call(prompt)
    print(f"\nPrompt: {prompt[:50]}...")
    print(f"  Complexity: {result['complexity']}")
    print(f"  Model: {result['model'].split('-')[-2]}")
    print(f"  Tokens: {result['input_tokens']} in / {result['output_tokens']} out")

---
## ‚úÇÔ∏è Example 3: Token Minimization

In [None]:
import re

def count_tokens(text):
    return len(encoding.encode(text))

def minimize_prompt(prompt):
    """Remove unnecessary tokens from prompt."""
    original = prompt
    
    # Remove politeness
    politeness = ["please", "thank you", "thanks", "kindly", "I would like", 
                  "could you", "would you", "I appreciate", "if you could"]
    for phrase in politeness:
        prompt = re.sub(rf'\b{phrase}\b', '', prompt, flags=re.IGNORECASE)
    
    # Remove filler words
    fillers = ["basically", "actually", "really", "very", "just"]
    for word in fillers:
        prompt = re.sub(rf'\b{word}\b', '', prompt, flags=re.IGNORECASE)
    
    # Clean up whitespace
    prompt = re.sub(r'\s+', ' ', prompt).strip()
    
    return {
        "original": original,
        "optimized": prompt,
        "original_tokens": count_tokens(original),
        "optimized_tokens": count_tokens(prompt),
        "saved": count_tokens(original) - count_tokens(prompt)
    }

# Test
verbose_prompt = """
Hello! I would really appreciate it if you could please help me out.
Could you kindly analyze this network configuration for me?
I just basically need to know if there are any security issues.
Thank you so much for your help!

Config:
interface Gi0/0
 ip address 10.1.1.1 255.255.255.0
"""

result = minimize_prompt(verbose_prompt)

print("‚úÇÔ∏è PROMPT MINIMIZATION")
print("=" * 60)
print(f"Original: {result['original_tokens']} tokens")
print(f"Optimized: {result['optimized_tokens']} tokens")
print(f"Saved: {result['saved']} tokens ({result['saved']/result['original_tokens']*100:.0f}%)")
print(f"\nOptimized prompt:\n{result['optimized']}")

---
## üì¶ Example 4: Batch Processing

In [None]:
def batch_analyze(items, batch_size=5):
    """Analyze multiple items in fewer API calls."""
    results = []
    
    for i in range(0, len(items), batch_size):
        batch = items[i:i+batch_size]
        
        # Combine into single prompt
        batch_prompt = "Classify each log entry (INFO/WARNING/ERROR/CRITICAL):\n\n"
        for j, item in enumerate(batch):
            batch_prompt += f"{j+1}. {item}\n"
        batch_prompt += "\nReturn only: 1. LEVEL, 2. LEVEL, etc."
        
        response = client.messages.create(
            model="claude-3-5-haiku-20241022",
            max_tokens=200,
            messages=[{"role": "user", "content": batch_prompt}]
        )
        
        results.append({
            "batch_size": len(batch),
            "response": response.content[0].text,
            "tokens": response.usage.input_tokens + response.usage.output_tokens
        })
    
    return results

# Sample logs
logs = [
    "%LINK-3-UPDOWN: Interface Gi0/1, changed state to down",
    "%SYS-5-CONFIG_I: Configured from console",
    "%OSPF-5-ADJCHG: Nbr 10.1.1.1 DOWN",
    "%LINEPROTO-5-UPDOWN: Line protocol on Gi0/0, changed state to up",
    "%SYS-2-MALLOCFAIL: Memory allocation failed",
]

print("üì¶ BATCH VS INDIVIDUAL")
print("=" * 60)

# Batch approach
batch_results = batch_analyze(logs, batch_size=5)
batch_tokens = sum(r["tokens"] for r in batch_results)
print(f"\nBatch (1 call for {len(logs)} items):")
print(f"  Total tokens: {batch_tokens}")
print(f"  Response: {batch_results[0]['response']}")

# Individual approach (for comparison)
individual_tokens = 0
for log in logs:
    response = client.messages.create(
        model="claude-3-5-haiku-20241022",
        max_tokens=20,
        messages=[{"role": "user", "content": f"Classify (INFO/WARNING/ERROR/CRITICAL): {log}"}]
    )
    individual_tokens += response.usage.input_tokens + response.usage.output_tokens

print(f"\nIndividual ({len(logs)} separate calls):")
print(f"  Total tokens: {individual_tokens}")
print(f"\n‚úÖ Batch saved {individual_tokens - batch_tokens} tokens ({(1-batch_tokens/individual_tokens)*100:.0f}% reduction)")

---
## üìä Example 5: Monthly Cost Projection

In [None]:
def project_monthly_cost(daily_calls, avg_input_tokens, avg_output_tokens, model):
    p = PRICING[model]
    daily_cost = (daily_calls * avg_input_tokens / 1e6 * p["input"]) + \
                 (daily_calls * avg_output_tokens / 1e6 * p["output"])
    return daily_cost * 30

print("üìä MONTHLY COST PROJECTION")
print("=" * 60)

scenarios = [
    {"name": "Light use", "daily_calls": 100, "input": 500, "output": 200},
    {"name": "Medium use", "daily_calls": 500, "input": 500, "output": 200},
    {"name": "Heavy use", "daily_calls": 2000, "input": 500, "output": 200},
]

for scenario in scenarios:
    haiku_cost = project_monthly_cost(
        scenario["daily_calls"], scenario["input"], scenario["output"],
        "claude-3-5-haiku-20241022"
    )
    sonnet_cost = project_monthly_cost(
        scenario["daily_calls"], scenario["input"], scenario["output"],
        "claude-3-5-sonnet-20241022"
    )
    
    print(f"\n{scenario['name']} ({scenario['daily_calls']} calls/day):")
    print(f"  Haiku:  ${haiku_cost:>8.2f}/month")
    print(f"  Sonnet: ${sonnet_cost:>8.2f}/month")
    print(f"  Savings with Haiku: ${sonnet_cost - haiku_cost:.2f} ({(1-haiku_cost/sonnet_cost)*100:.0f}%)")

---
## üéØ Key Takeaways

| Strategy | Savings | Effort |
|----------|---------|--------|
| Use Haiku for simple tasks | 50-90% | Low |
| Remove verbose language | 10-20% | Low |
| Batch multiple items | 30-50% | Medium |
| Cache repeated prompts | 50-80% | Medium |

**Quick wins:**
1. Default to Haiku, upgrade only when needed
2. Remove "please", "thank you", filler words
3. Batch similar tasks into single calls
4. Track costs to find optimization opportunities

---

## üìö Next Steps

‚û°Ô∏è [Chapter 9: Working with Network Data](./Vol1_Ch9_Network_Data.ipynb)