In [0]:
dbutils.widgets.text("model", "claude-sonnet-4", "Model")
dbutils.widgets.text("cloud", "aws", "Cloud")
dbutils.widgets.text("tpm_millions", "8.0", "TPM (Millions)")
dbutils.widgets.text("input_tokens_per_request", "30000", "Input Tokens / Request")
dbutils.widgets.text("output_tokens_per_request", "1200", "Output Tokens / Request")
dbutils.widgets.text("caching_ratio_percent", "80.0", "Cache Ratio (%)")
dbutils.widgets.text("discount_percent", "0", "Discount (%)")

In [0]:
"""
Claude Bedrock Pricing Calculator
Estimates the cost of running Claude models on AWS Bedrock based on TPM and usage patterns.
"""

class ClaudeBedrockPricing:
    def __init__(self):
        # Pricing per 1K tokens (as of January 2025)
        # Source: https://aws.amazon.com/bedrock/pricing/
        self.pricing = {
            "claude-sonnet-4": {
                "input_per_1k": 0.003,     # $3.00 per 1M tokens
                "output_per_1k": 0.015,    # $15.00 per 1M tokens
                "cache_write_per_1k": 0.00375,  # $3.75 per 1M tokens
                "cache_read_per_1k": 0.0003     # $0.30 per 1M tokens
            },
            "claude-3-7-sonnet": {
                "input_per_1k": 0.003,     # $3.00 per 1M tokens
                "output_per_1k": 0.015,    # $15.00 per 1M tokens
                "cache_write_per_1k": 0.00375,  # $3.75 per 1M tokens
                "cache_read_per_1k": 0.0003     # $0.30 per 1M tokens
            },
            "claude-opus-4": {
                "input_per_1k": 0.015,     # $15.00 per 1M tokens (5x Sonnet)
                "output_per_1k": 0.075,    # $75.00 per 1M tokens (5x Sonnet)
                "cache_write_per_1k": 0.01875,  # $18.75 per 1M tokens (5x Sonnet)
                "cache_read_per_1k": 0.0015     # $1.50 per 1M tokens (5x Sonnet)
            }
        }
    
    def get_pricing_for_cloud(self, model, cloud):
        """
        Get pricing for a model on a specific cloud provider
        
        Args:
            model (str): Claude model name
            cloud (str): Cloud provider ('aws', 'gcp', or 'azure')
            
        Returns:
            dict: Pricing structure for the model on the specified cloud
        """
        if model not in self.pricing:
            raise ValueError(f"Model '{model}' not supported. Available models: {list(self.pricing.keys())}")
        
        if cloud.lower() not in ['aws', 'gcp', 'azure']:
            raise ValueError(f"Cloud '{cloud}' not supported. Available clouds: aws, gcp, azure")
        
        base_pricing = self.pricing[model].copy()
        
        # Apply 50% markup for Azure
        if cloud.lower() == 'azure':
            for key in base_pricing:
                base_pricing[key] *= 1.5  # 50% markup
        
        return base_pricing
    
    def calculate_production_cost(self, model, cloud, tpm_millions, input_tokens_per_request, 
                                output_tokens_per_request, caching_ratio_percent, discount_percent=0):
        """
        Calculate production cost based on TPM and usage patterns
        
        Args:
            model (str): Claude model name
            cloud (str): Cloud provider ('aws', 'gcp', or 'azure')
            tpm_millions (float): Total tokens per minute in millions (24/7 usage)
            input_tokens_per_request (int): Input tokens per request
            output_tokens_per_request (int): Output tokens per request
            caching_ratio_percent (float): Percentage of input tokens that benefit from caching (0-100)
            discount_percent (float): Percentage discount to apply to total monthly bill (0-100)
            
        Returns:
            dict: Comprehensive cost breakdown
        """
        pricing = self.get_pricing_for_cloud(model, cloud)
        caching_ratio = caching_ratio_percent / 100
        discount_ratio = discount_percent / 100
        
        # Calculate requests per minute
        total_tokens_per_request = input_tokens_per_request + output_tokens_per_request
        tpm_actual = tpm_millions * 1_000_000  # Convert to actual tokens
        requests_per_minute = tpm_actual / total_tokens_per_request
        
        # Calculate token distribution
        input_tokens_per_minute = requests_per_minute * input_tokens_per_request
        output_tokens_per_minute = requests_per_minute * output_tokens_per_request
        
        # Apply caching logic - only if caching_ratio_percent > 0
        if caching_ratio_percent > 0:
            cache_read_tokens_per_minute = input_tokens_per_minute * caching_ratio
            regular_input_tokens_per_minute = input_tokens_per_minute * (1 - caching_ratio)
            cache_read_cost_per_minute = (cache_read_tokens_per_minute / 1000) * pricing["cache_read_per_1k"]
        else:
            cache_read_tokens_per_minute = 0
            regular_input_tokens_per_minute = input_tokens_per_minute
            cache_read_cost_per_minute = 0
        
        # Calculate costs per minute
        regular_input_cost_per_minute = (regular_input_tokens_per_minute / 1000) * pricing["input_per_1k"]
        output_cost_per_minute = (output_tokens_per_minute / 1000) * pricing["output_per_1k"]
        
        total_cost_per_minute = regular_input_cost_per_minute + output_cost_per_minute + cache_read_cost_per_minute
        
        # Calculate time-based costs (before discount)
        cost_per_hour = total_cost_per_minute * 60
        cost_per_day = cost_per_hour * 24
        cost_per_month = cost_per_day * 30  # 30 days
        cost_per_year = cost_per_day * 365
        
        # Apply discount to monthly and daily costs
        cost_per_day_after_discount = cost_per_day * (1 - discount_ratio)
        cost_per_month_after_discount = cost_per_month * (1 - discount_ratio)
        
        # Calculate monthly token volumes
        monthly_total_tokens = tpm_actual * 60 * 24 * 30  # 30 days
        monthly_input_tokens = input_tokens_per_minute * 60 * 24 * 30
        monthly_output_tokens = output_tokens_per_minute * 60 * 24 * 30
        monthly_cache_read_tokens = cache_read_tokens_per_minute * 60 * 24 * 30
        monthly_regular_input_tokens = regular_input_tokens_per_minute * 60 * 24 * 30
        
        return {
            "model": model,
            "cloud": cloud,
            "tpm_millions": tpm_millions,
            "input_tokens_per_request": input_tokens_per_request,
            "output_tokens_per_request": output_tokens_per_request,
            "caching_ratio_percent": caching_ratio_percent,
            "discount_percent": discount_percent,
            "requests_per_minute": requests_per_minute,
            "requests_per_day": requests_per_minute * 60 * 24,
            "requests_per_month": requests_per_minute * 60 * 24 * 30,
            # Token breakdown per minute
            "input_tokens_per_minute": input_tokens_per_minute,
            "output_tokens_per_minute": output_tokens_per_minute,
            "cache_read_tokens_per_minute": cache_read_tokens_per_minute,
            "regular_input_tokens_per_minute": regular_input_tokens_per_minute,
            # Cost breakdown per minute
            "regular_input_cost_per_minute": regular_input_cost_per_minute,
            "output_cost_per_minute": output_cost_per_minute,
            "cache_read_cost_per_minute": cache_read_cost_per_minute,
            "total_cost_per_minute": total_cost_per_minute,
            # Time-based costs (before discount)
            "cost_per_hour": cost_per_hour,
            "cost_per_day": cost_per_day,
            "cost_per_month": cost_per_month,
            "cost_per_year": cost_per_year,
            # Time-based costs (after discount)
            "cost_per_day_after_discount": cost_per_day_after_discount,
            "cost_per_month_after_discount": cost_per_month_after_discount,
            # Monthly token volumes
            "monthly_total_tokens": monthly_total_tokens,
            "monthly_input_tokens": monthly_input_tokens,
            "monthly_output_tokens": monthly_output_tokens,
            "monthly_cache_read_tokens": monthly_cache_read_tokens,
            "monthly_regular_input_tokens": monthly_regular_input_tokens
        }
    
    def compare_models_production(self, cloud, tpm_millions, input_tokens_per_request, 
                                output_tokens_per_request, caching_ratio_percent, discount_percent=0):
        """
        Compare production costs across all Claude models
        
        Args:
            cloud (str): Cloud provider ('aws', 'gcp', or 'azure')
            tpm_millions (float): Total tokens per minute in millions
            input_tokens_per_request (int): Input tokens per request
            output_tokens_per_request (int): Output tokens per request
            caching_ratio_percent (float): Percentage of input tokens that benefit from caching
            discount_percent (float): Percentage discount to apply to total monthly bill
            
        Returns:
            list: Cost comparison for all models, sorted by monthly cost
        """
        comparisons = []
        for model in self.pricing.keys():
            cost_info = self.calculate_production_cost(
                model, cloud, tpm_millions, input_tokens_per_request, 
                output_tokens_per_request, caching_ratio_percent, discount_percent
            )
            comparisons.append(cost_info)
        
        # Sort by monthly cost (after discount)
        return sorted(comparisons, key=lambda x: x["cost_per_month_after_discount"])
    
    def print_pricing_table(self):
        """Print a formatted pricing table for all models"""
        print("Claude Bedrock Pricing (per 1K tokens)")
        print("=" * 80)
        print(f"{'Model':<20} {'Input':<10} {'Output':<10} {'Cache Write':<12} {'Cache Read':<12}")
        print("-" * 80)
        
        for model, pricing in self.pricing.items():
            print(f"{model:<20} ${pricing['input_per_1k']:<9.5f} ${pricing['output_per_1k']:<9.5f} "
                  f"${pricing['cache_write_per_1k']:<11.5f} ${pricing['cache_read_per_1k']:<11.5f}")
    
    def print_production_estimate(self, result):
        """Print a formatted production cost estimate"""
        print(f"Daily Projected Cost: ${result['cost_per_day_after_discount']:,.2f}")
        print(f"Monthly Projected Cost: ${result['cost_per_month_after_discount']:,.2f}")


def main():
    """Example usage of the production pricing calculator"""
    calculator = ClaudeBedrockPricing()
    model = dbutils.widgets.get("model")
    cloud = dbutils.widgets.get("cloud")
    tpm_millions = float(dbutils.widgets.get("tpm_millions"))
    input_tokens_per_request = int(dbutils.widgets.get("input_tokens_per_request"))
    output_tokens_per_request = int(dbutils.widgets.get("output_tokens_per_request"))
    caching_ratio_percent = float(dbutils.widgets.get("caching_ratio_percent"))
    discount_percent = float(dbutils.widgets.get("discount_percent"))

    print("Calculating Projected Usage: ")
    print(f"Model: " + model)
    print(f"Cloud: " + cloud)
    print("TPM (M): " + str(tpm_millions))
    print("Input Tokens / request: " + str(input_tokens_per_request))
    print("Output Tokens / request: " + str(output_tokens_per_request))
    print("Cache Ratio: " + str(caching_ratio_percent))
    print("Projected Discount: " + str(discount_percent))

    # Example: Production cost estimate with discount
    result = calculator.calculate_production_cost(
        model,
        cloud,
        tpm_millions,
        input_tokens_per_request,
        output_tokens_per_request,
        caching_ratio_percent,
        discount_percent
    )

    
    calculator.print_production_estimate(result)


if __name__ == "__main__":
    main()

Calculating Projected Usage: 
Model: claude-sonnet-4
Cloud: aws
TPM (M): 40.0
Input Tokens / request: 30000
Output Tokens / request: 800
Cache Ratio: 80.0
Projected Discount: 0.0
Daily Projected Cost: $69,568.83
Monthly Projected Cost: $2,087,064.94
