# ML/AI Models Quick Start

This notebook demonstrates the cost prediction and anomaly detection models for AI agent operations.

## Setup

In [None]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
from datetime import datetime

from src.cost_prediction_model import CostPredictionModel, LLMProvider, AgentExecutionFeatures
from src.anomaly_detection_model import AnomalyDetectionModel, ExecutionMetrics
from src.training_utils import SyntheticDataGenerator, ModelEvaluator

## 1. Cost Prediction Model

Predict costs before running AI agents.

In [None]:
# Initialize model
cost_model = CostPredictionModel()

# Define agent execution features
features = AgentExecutionFeatures(
    agent_type='data_analyzer',
    task_complexity=7,
    data_scope_size=1000,
    has_tool_use=True,
    max_iterations=3,
    provider=LLMProvider.CLAUDE_SONNET,
    historical_avg_tokens=5200
)

# Predict cost
prediction = cost_model.predict_cost(features)

print(f"Predicted Cost: ${prediction['predicted_cost_usd']:.4f}")
print(f"Predicted Tokens: {prediction['predicted_tokens']:,}")
print(f"Confidence: {prediction['confidence']:.1%}")
print(f"\nRecommendation: {prediction['recommendation']}")
print("\nAlternative Providers:")
for alt in prediction['cost_alternatives'][:3]:
    print(f"  {alt['provider']}: ${alt['estimated_cost_usd']:.4f}")

## 2. Anomaly Detection Model

Detect unusual patterns in agent executions.

In [None]:
# Generate synthetic training data
generator = SyntheticDataGenerator(seed=42)
training_data = generator.generate_agent_executions(
    n_agents=5,
    executions_per_agent=100,
    include_anomalies=True,
    anomaly_rate=0.05
)

print(f"Generated {len(training_data)} training executions")
training_data.head()

In [None]:
# Train anomaly detector
detector = AnomalyDetectionModel(sensitivity=3.0)
detector.train(training_data)

print("Anomaly detector trained!")

In [None]:
# Test with normal execution
normal_execution = ExecutionMetrics(
    agent_id='agent_000',
    execution_id='exec_test_1',
    timestamp=datetime.now(),
    duration_seconds=15.2,
    total_tokens=5100,
    cost_usd=0.082,
    success=True,
    error_type=None,
    api_calls_made=8,
    provider='claude-sonnet'
)

result = detector.detect(normal_execution)
print("Normal Execution:")
print(f"  Is Anomaly: {result.is_anomaly}")
print(f"  {result.explanation}")

In [None]:
# Test with anomalous execution (cost spike)
anomaly_execution = ExecutionMetrics(
    agent_id='agent_000',
    execution_id='exec_test_2',
    timestamp=datetime.now(),
    duration_seconds=55.0,
    total_tokens=25000,
    cost_usd=0.45,
    success=True,
    error_type=None,
    api_calls_made=35,
    provider='claude-sonnet'
)

result = detector.detect(anomaly_execution)
print("Anomalous Execution:")
print(f"  Is Anomaly: {result.is_anomaly}")
print(f"  Type: {result.anomaly_type.value if result.anomaly_type else 'N/A'}")
print(f"  Severity: {result.severity.value if result.severity else 'N/A'}")
print(f"  Confidence: {result.confidence:.1%}")
print(f"  {result.explanation}")
print(f"  Action: {result.recommended_action}")

## 3. Batch Predictions

Analyze multiple executions at once.

In [None]:
# Generate test executions
test_data = generator.generate_agent_executions(
    n_agents=3,
    executions_per_agent=50,
    include_anomalies=True,
    anomaly_rate=0.1
)

# Convert to ExecutionMetrics
test_executions = [
    ExecutionMetrics(
        agent_id=row['agent_id'],
        execution_id=row['execution_id'],
        timestamp=row['timestamp'],
        duration_seconds=row['duration_seconds'],
        total_tokens=row['total_tokens'],
        cost_usd=row['cost_usd'],
        success=row['success'],
        error_type=row['error_type'],
        api_calls_made=row['api_calls_made'],
        provider=row['provider']
    )
    for _, row in test_data.iterrows()
]

# Batch detect anomalies
anomaly_results = detector.batch_detect(test_executions)

# Show anomalies only
anomalies = anomaly_results[anomaly_results['is_anomaly'] == True]
print(f"\nDetected {len(anomalies)} anomalies out of {len(test_executions)} executions")
anomalies[['agent_id', 'anomaly_type', 'severity', 'confidence', 'explanation']].head(10)

## 4. Cost Optimization Analysis

Compare costs across different providers.

In [None]:
# Compare providers for the same task
providers = [
    LLMProvider.CLAUDE_HAIKU,
    LLMProvider.CLAUDE_SONNET,
    LLMProvider.CLAUDE_OPUS,
    LLMProvider.GPT35,
    LLMProvider.GPT4
]

results = []
for provider in providers:
    features = AgentExecutionFeatures(
        agent_type='customer_support',
        task_complexity=5,
        data_scope_size=100,
        has_tool_use=False,
        max_iterations=1,
        provider=provider,
        historical_avg_tokens=2000
    )
    pred = cost_model.predict_cost(features)
    results.append({
        'provider': provider.value,
        'cost': pred['predicted_cost_usd'],
        'tokens': pred['predicted_tokens']
    })

comparison_df = pd.DataFrame(results).sort_values('cost')
print("\nProvider Cost Comparison (for customer support task):")
comparison_df

## 5. Save Models and Data

Export for production use.

In [None]:
# Save training data
generator.save_dataset(training_data, '../data/training_data.csv')

print("Models and data ready for production integration!")