# Observability Logs Data Generator

This notebook generates realistic application logs, error logs, and system metrics for testing elastic-script's LLM integration capabilities.

**Use Cases for LLM Analysis:**
- Summarize error patterns across services
- Classify log severity and impact
- Extract structured information from error messages
- Identify root causes from stack traces
- Generate incident reports from log data


In [None]:
import random
import uuid
from datetime import datetime, timedelta
from elasticsearch import Elasticsearch, helpers


In [None]:
# Connect to Elasticsearch
es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])

# Verify connection
if es.ping():
    print("Connected to Elasticsearch!")
else:
    raise Exception("Could not connect to Elasticsearch")


In [None]:
# Services and components in our fictional microservices architecture
services = [
    {"name": "api-gateway", "host": "gateway-prod-01", "env": "production"},
    {"name": "api-gateway", "host": "gateway-prod-02", "env": "production"},
    {"name": "user-service", "host": "user-prod-01", "env": "production"},
    {"name": "user-service", "host": "user-prod-02", "env": "production"},
    {"name": "order-service", "host": "order-prod-01", "env": "production"},
    {"name": "order-service", "host": "order-prod-02", "env": "production"},
    {"name": "payment-service", "host": "payment-prod-01", "env": "production"},
    {"name": "inventory-service", "host": "inventory-prod-01", "env": "production"},
    {"name": "notification-service", "host": "notif-prod-01", "env": "production"},
    {"name": "search-service", "host": "search-prod-01", "env": "production"},
    {"name": "recommendation-engine", "host": "rec-prod-01", "env": "production"},
    {"name": "cache-service", "host": "cache-prod-01", "env": "production"},
    {"name": "auth-service", "host": "auth-prod-01", "env": "production"},
]

log_levels = ["DEBUG", "INFO", "WARN", "ERROR", "FATAL"]
log_level_weights = [10, 60, 20, 8, 2]  # INFO is most common, FATAL is rare

# HTTP status codes and their weights
http_status_codes = [200, 201, 204, 301, 302, 400, 401, 403, 404, 408, 429, 500, 502, 503, 504]
http_status_weights = [50, 10, 5, 2, 2, 5, 3, 2, 8, 2, 3, 4, 1, 2, 1]

# User agents
user_agents = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Safari/537.36",
    "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15",
    "Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 Chrome/120.0.0.0 Mobile",
    "PostmanRuntime/7.35.0",
    "python-requests/2.31.0",
    "curl/8.4.0",
]


In [None]:
# Error message templates by service - realistic production errors
error_templates = {
    "api-gateway": [
        "Connection timeout to upstream service {service} after {timeout}ms",
        "Rate limit exceeded for client IP {ip} - {requests} requests in {window}s",
        "Circuit breaker OPEN for service {service}, failing fast",
        "SSL handshake failed with upstream: {reason}",
        "Request body too large: {size}MB exceeds limit of 10MB",
        "Invalid JWT token: {reason}",
        "Upstream service {service} returned {status}: {message}",
    ],
    "user-service": [
        "Failed to authenticate user {user_id}: {reason}",
        "Database connection pool exhausted, {waiting} requests waiting",
        "User {user_id} not found in database",
        "Password hash verification failed for user {user_id}",
        "Email validation failed for {email}: {reason}",
        "Session expired for user {user_id}, last activity {minutes} minutes ago",
        "Failed to send verification email to {email}: SMTP error {code}",
    ],
    "order-service": [
        "Order {order_id} processing failed: insufficient inventory for SKU {sku}",
        "Payment authorization failed for order {order_id}: {reason}",
        "Order {order_id} stuck in {status} state for {hours} hours",
        "Failed to calculate shipping for order {order_id}: invalid address",
        "Duplicate order detected: {order_id} matches existing order",
        "Order cancellation failed: order {order_id} already shipped",
        "Tax calculation service timeout for order {order_id}",
    ],
    "payment-service": [
        "Payment gateway {gateway} returned error: {error_code} - {message}",
        "Card validation failed for transaction {txn_id}: {reason}",
        "Refund request failed: original transaction not found",
        "3DS authentication required but not provided for transaction {txn_id}",
        "Payment timeout: transaction {txn_id} did not complete in {timeout}s",
        "Fraud detection triggered for transaction {txn_id}: risk score {score}",
        "Currency conversion failed: {from_curr} to {to_curr} rate unavailable",
    ],
    "inventory-service": [
        "Stock level mismatch for SKU {sku}: expected {expected}, actual {actual}",
        "Warehouse {warehouse_id} sync failed: connection refused",
        "Reserved inventory expired for order {order_id}, releasing {quantity} units",
        "Low stock alert: SKU {sku} has only {quantity} units remaining",
        "Failed to update inventory: optimistic locking conflict for SKU {sku}",
    ],
    "notification-service": [
        "Push notification failed for device {device_id}: token expired",
        "Email delivery failed to {email}: bounced with code {code}",
        "SMS gateway error: {provider} returned {error_code}",
        "Notification queue backlog: {count} messages pending, oldest {age}s",
        "Template rendering failed for {template}: missing variable {var}",
    ],
    "search-service": [
        "Elasticsearch cluster health RED: {shards} unassigned shards",
        "Search query timeout after {timeout}ms for query: {query}",
        "Index {index} not found, falling back to default search",
        "Aggregation query too expensive: {buckets} buckets requested",
        "Reindex operation failed at document {doc_id}: {reason}",
    ],
    "recommendation-engine": [
        "Model inference failed: GPU memory exhausted",
        "Feature store connection timeout after {timeout}ms",
        "Cold start problem for new user {user_id}: no interaction history",
        "Model version mismatch: expected v{expected}, got v{actual}",
        "Recommendation cache miss rate {percent}% exceeds threshold",
    ],
    "cache-service": [
        "Redis cluster node {node} unreachable, failover initiated",
        "Cache eviction storm: {evicted} keys evicted in {seconds}s",
        "Memory usage critical: {percent}% of max, consider scaling",
        "Key {key} serialization failed: object too large",
        "Distributed lock acquisition timeout for key {key}",
    ],
    "auth-service": [
        "Brute force attack detected from IP {ip}: {attempts} failed attempts",
        "OAuth provider {provider} returned error: {error}",
        "Token refresh failed for user {user_id}: refresh token revoked",
        "MFA verification failed for user {user_id}: code expired",
        "API key revoked due to suspicious activity",
        "LDAP sync failed: connection to {server} refused",
    ],
}


In [None]:
# Info/success message templates
info_templates = {
    "api-gateway": [
        "Request {method} {path} completed in {duration}ms with status {status}",
        "New client connection from {ip}, total active connections: {count}",
        "Rate limit reset for client {ip}",
        "Circuit breaker CLOSED for service {service}, resuming normal operation",
        "Health check passed for all upstream services",
    ],
    "user-service": [
        "User {user_id} successfully authenticated from {ip}",
        "New user registration completed: {user_id} ({email})",
        "Password changed successfully for user {user_id}",
        "User profile updated: {user_id} changed {fields}",
        "Session created for user {user_id}, expires in {hours} hours",
    ],
    "order-service": [
        "Order {order_id} created successfully, total: ${amount}",
        "Order {order_id} status changed: {from_status} -> {to_status}",
        "Order {order_id} shipped via {carrier}, tracking: {tracking}",
        "Order {order_id} delivered to {city}, {country}",
        "Bulk order import completed: {count} orders processed",
    ],
    "payment-service": [
        "Payment {txn_id} authorized successfully: ${amount}",
        "Payment {txn_id} captured: ${amount} via {method}",
        "Refund processed: ${amount} to {method}",
        "Daily settlement completed: {count} transactions, ${total}",
        "Payment method added for user {user_id}: {type} ending in {last4}",
    ],
    "inventory-service": [
        "Stock updated for SKU {sku}: {old_qty} -> {new_qty}",
        "Inventory reserved for order {order_id}: {quantity} x {sku}",
        "Warehouse {warehouse_id} sync completed: {count} SKUs updated",
        "Restock shipment received: {count} units for {sku}",
        "Inventory audit completed: {matched} matched, {discrepancies} discrepancies",
    ],
    "notification-service": [
        "Email sent to {email}: {subject}",
        "Push notification delivered to {device_count} devices",
        "SMS sent to {phone}: delivery confirmed",
        "Notification batch processed: {count} messages in {duration}ms",
        "User {user_id} notification preferences updated",
    ],
    "search-service": [
        "Search query completed in {duration}ms: {results} results for '{query}'",
        "Index {index} refreshed, {docs} documents searchable",
        "Search suggestions generated for '{prefix}': {count} suggestions",
        "Elasticsearch cluster health GREEN: {nodes} nodes, {shards} shards",
        "Search analytics aggregated: {queries} queries, {clicks} clicks",
    ],
    "recommendation-engine": [
        "Recommendations generated for user {user_id}: {count} items in {duration}ms",
        "Model v{version} deployed successfully to {replicas} replicas",
        "Feature store updated: {features} features for {users} users",
        "A/B test {test_id} started: {variants} variants, {traffic}% traffic",
        "Batch inference completed: {count} users processed",
    ],
    "cache-service": [
        "Cache hit for key {key}: served in {duration}us",
        "Cache warmed: {count} keys preloaded",
        "Redis cluster rebalanced: {slots} slots migrated",
        "Memory usage nominal: {percent}% of max capacity",
        "Cache statistics: {hits} hits, {misses} misses, {ratio}% hit rate",
    ],
    "auth-service": [
        "OAuth login successful: user {user_id} via {provider}",
        "Access token issued for user {user_id}, expires in {minutes} minutes",
        "API key created for user {user_id}",
        "MFA enabled for user {user_id}: {method}",
        "Security audit: {active} active sessions for user {user_id}",
    ],
}


In [None]:
# Stack trace templates for errors
stack_traces = [
    """java.lang.NullPointerException: Cannot invoke method on null object
    at com.example.service.UserService.getUser(UserService.java:142)
    at com.example.controller.UserController.handleRequest(UserController.java:87)
    at org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:897)
    at javax.servlet.http.HttpServlet.service(HttpServlet.java:750)""",
    
    """java.sql.SQLException: Connection pool exhausted, no available connections
    at com.zaxxer.hikari.pool.HikariPool.getConnection(HikariPool.java:155)
    at com.example.repository.OrderRepository.findById(OrderRepository.java:45)
    at com.example.service.OrderService.processOrder(OrderService.java:201)
    at com.example.controller.OrderController.createOrder(OrderController.java:63)""",
    
    """redis.clients.jedis.exceptions.JedisConnectionException: Could not get a resource from the pool
    at redis.clients.jedis.JedisPool.getResource(JedisPool.java:84)
    at com.example.cache.CacheService.get(CacheService.java:56)
    at com.example.service.ProductService.getProduct(ProductService.java:78)""",
    
    """org.elasticsearch.ElasticsearchTimeoutException: Timeout waiting for search response
    at org.elasticsearch.client.RestHighLevelClient.search(RestHighLevelClient.java:1012)
    at com.example.search.SearchService.executeSearch(SearchService.java:145)
    at com.example.controller.SearchController.search(SearchController.java:52)""",
    
    """com.stripe.exception.CardException: Your card was declined
    at com.stripe.net.LiveStripeResponseGetter.handleError(LiveStripeResponseGetter.java:214)
    at com.example.payment.StripePaymentGateway.charge(StripePaymentGateway.java:89)
    at com.example.service.PaymentService.processPayment(PaymentService.java:156)""",
    
    """io.grpc.StatusRuntimeException: UNAVAILABLE: upstream connect error or disconnect/reset before headers
    at io.grpc.stub.ClientCalls.toStatusRuntimeException(ClientCalls.java:271)
    at com.example.client.InventoryClient.checkStock(InventoryClient.java:67)
    at com.example.service.OrderService.validateOrder(OrderService.java:112)""",
    
    """org.apache.kafka.common.errors.TimeoutException: Failed to update metadata after 60000 ms
    at org.apache.kafka.clients.producer.KafkaProducer.doSend(KafkaProducer.java:999)
    at com.example.messaging.EventPublisher.publish(EventPublisher.java:45)
    at com.example.service.OrderService.publishOrderEvent(OrderService.java:234)""",
]


In [None]:
# Helper functions
def generate_ip():
    return f"{random.randint(1,255)}.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(1,254)}"

def generate_user_id():
    return f"user_{random.randint(10000, 99999)}"

def generate_order_id():
    return f"ORD-{random.randint(100000, 999999)}"

def generate_transaction_id():
    return f"txn_{uuid.uuid4().hex[:16]}"

def generate_trace_id():
    return uuid.uuid4().hex

def generate_span_id():
    return uuid.uuid4().hex[:16]

def fill_template(template, service_name):
    """Fill a template with random realistic values"""
    replacements = {
        "{service}": random.choice([s["name"] for s in services]),
        "{timeout}": str(random.choice([1000, 2000, 3000, 5000, 10000, 30000])),
        "{ip}": generate_ip(),
        "{requests}": str(random.randint(100, 1000)),
        "{window}": str(random.choice([60, 300, 600])),
        "{reason}": random.choice(["connection_refused", "timeout", "invalid_response", "certificate_expired", "rate_limited"]),
        "{size}": str(random.randint(10, 100)),
        "{status}": str(random.choice([200, 201, 400, 401, 403, 404, 500, 502, 503, 504])),
        "{message}": random.choice(["Internal Server Error", "Bad Gateway", "Service Unavailable", "Gateway Timeout", "OK"]),
        "{user_id}": generate_user_id(),
        "{waiting}": str(random.randint(10, 100)),
        "{email}": f"user{random.randint(1000,9999)}@example.com",
        "{minutes}": str(random.randint(30, 120)),
        "{code}": str(random.choice([421, 450, 451, 452, 550, 551, 552, 553, 554])),
        "{order_id}": generate_order_id(),
        "{sku}": f"SKU-{random.randint(10000, 99999)}",
        "{hours}": str(random.randint(1, 48)),
        "{txn_id}": generate_transaction_id(),
        "{score}": str(random.randint(70, 99)),
        "{from_curr}": random.choice(["USD", "EUR", "GBP"]),
        "{to_curr}": random.choice(["JPY", "CNY", "INR"]),
        "{gateway}": random.choice(["stripe", "paypal", "braintree", "adyen"]),
        "{error_code}": random.choice(["card_declined", "insufficient_funds", "expired_card", "invalid_cvc"]),
        "{expected}": str(random.randint(100, 500)),
        "{actual}": str(random.randint(0, 99)),
        "{warehouse_id}": f"WH-{random.choice(['US-EAST', 'US-WEST', 'EU-CENTRAL', 'APAC'])}01",
        "{quantity}": str(random.randint(1, 100)),
        "{device_id}": f"device_{uuid.uuid4().hex[:12]}",
        "{provider}": random.choice(["twilio", "sendgrid", "aws_sns", "firebase"]),
        "{count}": str(random.randint(10, 1000)),
        "{age}": str(random.randint(60, 3600)),
        "{template}": random.choice(["order_confirmation", "password_reset", "welcome", "shipping_update"]),
        "{var}": random.choice(["customer_name", "order_total", "tracking_number", "delivery_date"]),
        "{shards}": str(random.randint(1, 10)),
        "{query}": random.choice(["laptop gaming", "iphone case", "running shoes", "wireless headphones"]),
        "{index}": random.choice(["products", "orders", "users", "logs"]),
        "{buckets}": str(random.randint(10000, 100000)),
        "{doc_id}": f"doc_{uuid.uuid4().hex[:8]}",
        "{percent}": str(random.randint(10, 50)),
        "{test_id}": f"test_{random.randint(100, 999)}",
        "{node}": f"redis-{random.randint(1, 5)}",
        "{evicted}": str(random.randint(1000, 10000)),
        "{seconds}": str(random.randint(1, 60)),
        "{key}": f"cache:{random.choice(['user', 'product', 'session', 'config'])}:{random.randint(1000, 9999)}",
        "{attempts}": str(random.randint(5, 50)),
        "{error}": random.choice(["access_denied", "invalid_grant", "invalid_client", "expired_token"]),
        "{server}": f"ldap.{random.choice(['us', 'eu', 'ap'])}.corp.internal",
        "{method}": random.choice(["GET", "POST", "PUT", "DELETE", "PATCH"]),
        "{path}": random.choice(["/api/v1/users", "/api/v1/orders", "/api/v1/products", "/api/v1/search", "/health"]),
        "{duration}": str(random.randint(5, 2000)),
        "{fields}": random.choice(["email,phone", "name", "address", "preferences"]),
        "{amount}": f"{random.randint(10, 500)}.{random.randint(0, 99):02d}",
        "{from_status}": random.choice(["pending", "processing", "shipped"]),
        "{to_status}": random.choice(["processing", "shipped", "delivered"]),
        "{carrier}": random.choice(["fedex", "ups", "usps", "dhl"]),
        "{tracking}": f"{random.choice(['1Z', 'FX', 'DHL'])}{random.randint(1000000000, 9999999999)}",
        "{city}": random.choice(["New York", "Los Angeles", "Chicago", "Houston", "London", "Paris", "Tokyo"]),
        "{country}": random.choice(["USA", "UK", "France", "Germany", "Japan", "Canada"]),
        "{old_qty}": str(random.randint(10, 100)),
        "{new_qty}": str(random.randint(0, 200)),
        "{matched}": str(random.randint(900, 1000)),
        "{discrepancies}": str(random.randint(0, 10)),
        "{subject}": random.choice(["Order Confirmation", "Shipping Update", "Password Reset", "Welcome!"]),
        "{device_count}": str(random.randint(1, 5)),
        "{phone}": f"+1{random.randint(2000000000, 9999999999)}",
        "{results}": str(random.randint(10, 10000)),
        "{docs}": str(random.randint(10000, 1000000)),
        "{prefix}": random.choice(["lapt", "phon", "shoe", "head"]),
        "{nodes}": str(random.randint(3, 10)),
        "{queries}": str(random.randint(1000, 100000)),
        "{clicks}": str(random.randint(100, 10000)),
        "{version}": f"{random.randint(1, 5)}.{random.randint(0, 9)}.{random.randint(0, 99)}",
        "{replicas}": str(random.randint(2, 10)),
        "{features}": str(random.randint(50, 200)),
        "{users}": str(random.randint(10000, 1000000)),
        "{variants}": str(random.randint(2, 4)),
        "{traffic}": str(random.randint(5, 50)),
        "{slots}": str(random.randint(100, 1000)),
        "{hits}": str(random.randint(10000, 100000)),
        "{misses}": str(random.randint(100, 1000)),
        "{ratio}": str(random.randint(90, 99)),
        "{active}": str(random.randint(1, 10)),
        "{type}": random.choice(["visa", "mastercard", "amex"]),
        "{last4}": f"{random.randint(1000, 9999)}",
        "{total}": f"{random.randint(10000, 100000)}.{random.randint(0, 99):02d}",
    }
    
    result = template
    for placeholder, value in replacements.items():
        result = result.replace(placeholder, value)
    return result


In [None]:
def generate_log_entry(base_time, service_info):
    """Generate a single log entry"""
    
    # Pick log level with weighted randomness
    log_level = random.choices(log_levels, weights=log_level_weights)[0]
    
    service_name = service_info["name"]
    
    # Generate message based on log level
    if log_level in ["ERROR", "FATAL"]:
        templates = error_templates.get(service_name, error_templates["api-gateway"])
        message = fill_template(random.choice(templates), service_name)
        stack_trace = random.choice(stack_traces) if random.random() < 0.7 else None
    elif log_level == "WARN":
        # Mix of error-like warnings and info-like warnings
        if random.random() < 0.5:
            templates = error_templates.get(service_name, error_templates["api-gateway"])
        else:
            templates = info_templates.get(service_name, info_templates["api-gateway"])
        message = fill_template(random.choice(templates), service_name)
        stack_trace = None
    else:  # DEBUG, INFO
        templates = info_templates.get(service_name, info_templates["api-gateway"])
        message = fill_template(random.choice(templates), service_name)
        stack_trace = None
    
    # Generate timestamp with some randomness
    timestamp = base_time + timedelta(
        seconds=random.randint(0, 3600),
        milliseconds=random.randint(0, 999)
    )
    
    # Generate distributed tracing IDs
    trace_id = generate_trace_id()
    span_id = generate_span_id()
    
    log_entry = {
        "@timestamp": timestamp.isoformat(),
        "log.level": log_level,
        "message": message,
        "service.name": service_name,
        "host.name": service_info["host"],
        "host.environment": service_info["env"],
        "trace.id": trace_id,
        "span.id": span_id,
        "process.pid": random.randint(1000, 65535),
        "process.thread.name": f"worker-{random.randint(1, 16)}",
    }
    
    # Add HTTP-related fields for gateway and some services
    if service_name in ["api-gateway", "user-service", "order-service", "search-service"]:
        http_status = random.choices(http_status_codes, weights=http_status_weights)[0]
        log_entry["http.request.method"] = random.choice(["GET", "POST", "PUT", "DELETE"])
        log_entry["http.response.status_code"] = http_status
        log_entry["http.request.duration_ms"] = random.randint(1, 5000)
        log_entry["client.ip"] = generate_ip()
        log_entry["user_agent.original"] = random.choice(user_agents)
    
    # Add stack trace for errors
    if stack_trace:
        log_entry["error.stack_trace"] = stack_trace
        log_entry["error.type"] = stack_trace.split(":")[0]
    
    return log_entry


In [None]:
# Create index with appropriate mapping
index_name = "application-logs"

mapping = {
    "mappings": {
        "properties": {
            "@timestamp": {"type": "date"},
            "log.level": {"type": "keyword"},
            "message": {"type": "text"},
            "service.name": {"type": "keyword"},
            "host.name": {"type": "keyword"},
            "host.environment": {"type": "keyword"},
            "trace.id": {"type": "keyword"},
            "span.id": {"type": "keyword"},
            "process.pid": {"type": "integer"},
            "process.thread.name": {"type": "keyword"},
            "http.request.method": {"type": "keyword"},
            "http.response.status_code": {"type": "integer"},
            "http.request.duration_ms": {"type": "integer"},
            "client.ip": {"type": "ip"},
            "user_agent.original": {"type": "text"},
            "error.stack_trace": {"type": "text"},
            "error.type": {"type": "keyword"},
        }
    }
}

# Delete index if exists and recreate
if es.indices.exists(index=index_name):
    es.indices.delete(index=index_name)
    print(f"Deleted existing index: {index_name}")

es.indices.create(index=index_name, body=mapping)
print(f"Created index: {index_name}")


In [None]:
# Generate logs for the past 7 days
def generate_and_index_logs(num_logs=10000):
    print(f"Generating {num_logs} log entries...")
    
    actions = []
    end_time = datetime.now()
    start_time = end_time - timedelta(days=7)
    
    for i in range(num_logs):
        # Random time within the past 7 days
        base_time = start_time + timedelta(
            seconds=random.randint(0, int((end_time - start_time).total_seconds()))
        )
        
        service_info = random.choice(services)
        log_entry = generate_log_entry(base_time, service_info)
        
        actions.append({
            "_index": index_name,
            "_source": log_entry
        })
        
        # Bulk index in batches
        if len(actions) >= 1000:
            helpers.bulk(es, actions)
            print(f"Indexed {i + 1} logs...")
            actions = []
    
    # Index remaining
    if actions:
        helpers.bulk(es, actions)
    
    print(f"Successfully indexed {num_logs} log entries!")

# Generate 10,000 log entries
generate_and_index_logs(10000)


In [None]:
# Verify the data
es.indices.refresh(index=index_name)
count = es.count(index=index_name)["count"]
print(f"\nTotal documents in '{index_name}': {count}")

# Show distribution by log level
agg_result = es.search(
    index=index_name,
    body={
        "size": 0,
        "aggs": {
            "by_level": {
                "terms": {"field": "log.level"}
            },
            "by_service": {
                "terms": {"field": "service.name", "size": 20}
            }
        }
    }
)

print("\nLog level distribution:")
for bucket in agg_result["aggregations"]["by_level"]["buckets"]:
    print(f"  {bucket['key']}: {bucket['doc_count']}")

print("\nService distribution:")
for bucket in agg_result["aggregations"]["by_service"]["buckets"]:
    print(f"  {bucket['key']}: {bucket['doc_count']}")


In [None]:
# Show sample error logs (great for LLM analysis)
print("\n" + "="*80)
print("SAMPLE ERROR LOGS (perfect for LLM analysis):")
print("="*80)

error_logs = es.search(
    index=index_name,
    body={
        "query": {
            "terms": {"log.level": ["ERROR", "FATAL"]}
        },
        "size": 5,
        "sort": [{"@timestamp": "desc"}]
    }
)

for hit in error_logs["hits"]["hits"]:
    source = hit["_source"]
    print(f"\n[{source['log.level']}] {source['service.name']} @ {source['@timestamp']}")
    print(f"  Message: {source['message']}")
    if 'error.type' in source:
        print(f"  Error Type: {source['error.type']}")

print("\n" + "="*80)
print("Data generation complete! Ready for LLM-powered log analysis.")
print("="*80)
print("\nExample elastic-script queries you can now run:")
print("  - LLM_SUMMARIZE(<error logs from payment-service>)")
print("  - LLM_CLASSIFY(<log message>, ['critical', 'warning', 'info'])")
print("  - LLM_EXTRACT(<stack trace>, ['exception_type', 'root_cause', 'affected_service'])")
