In [1]:
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname("src"), '..')))

In [16]:
import os
from src.llm_selector.core import LLMSelector
from src.config.settings import ModelConfig, DynamicLLMSelectorConfig
from src.llm_selector.model_registry import ModelRegistry, ModelConfig
from src.compression.openai import OpenaiCompressor
from src.compression.gemini import GeminiCompressor
from src.llm_selector.providers.openai import OpenAIProvider
from src.llm_selector.providers.gemini import GeminiProvider
from src.llm_selector.providers.anthropic import AnthropicProvider
from src.complextiy.gemini import GeminiComplexityAnalyzer
from src.complextiy.code import CodeComplexityAnalyzer
from src.compression.code import CodeCompressor

ImportError: cannot import name 'GeminiCompressor' from 'src.compression.gemini' (/Users/amirdor/Documents/LLMOpt/src/compression/gemini.py)

In [3]:
compressor = OpenaiCompressor()

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use mps:0


In [4]:
import os
import yaml
from typing import Dict, Any

def load_yaml_files(directory: str) -> Dict[str, Any]:
    """
    Load all YAML files from a given directory and return a unified dictionary.
    
    :param directory: Path to the directory containing YAML files.
    :return: A dictionary containing the merged content of all YAML files.
    """
    if not os.path.exists(directory):
        raise FileNotFoundError(f"Directory not found: {directory}")
    
    unified_data = {}
    
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".yaml"):
                file_path = os.path.join(root, file)
                with open(file_path, "r") as yaml_file:
                    try:
                        # Load the content of the YAML file
                        data = yaml.safe_load(yaml_file)
                        if isinstance(data, dict):
                            # Merge the loaded data into the unified dictionary
                            unified_data.update(data)
                        else:
                            print(f"Skipping {file_path}: Root element is not a dictionary.")
                    except yaml.YAMLError as e:
                        print(f"Error loading {file_path}: {e}")
    
    return unified_data


In [5]:
# Directory containing the YAML files
directory = "../src/config/pricing/"

# Load all YAML files
all_pricing_data = load_yaml_files(directory)

# Print the unified dictionary
print(all_pricing_data)

{'openai_pricing': {'gpt_3_5_turbo': {'input': 0.0005, 'output': 0.0015}, 'gpt_3_5_turbo_16k': {'input': 0.003, 'output': 0.006}, 'gpt_4': {'input': 0.01, 'output': 0.03}, 'gpt_4_32k': {'input': 0.03, 'output': 0.06}, 'gpt_4o': {'input': 0.0025, 'output': 0.01}, 'gpt_4o_mini': {'input': 0.00015, 'output': 0.0006}, 'gpt_4_turbo': {'input': 0.01, 'output': 0.03}, 'o1': {'input': 0.015, 'output': 0.06}, 'o1_mini': {'input': 0.003, 'output': 0.012}, 'gpt_4o_realtime_preview': {'input': 0.005, 'output': 0.02}, 'gpt_4o_mini_realtime_preview': {'input': 0.0006, 'output': 0.0024}}, 'anthropic_pricing': {'claude_3_5_sonnet': {'context_window': 200000, 'input_cost_per_mtok': 3.0, 'output_cost_per_mtok': 15.0, 'prompt_caching_write': 3.75, 'prompt_caching_read': 0.3, 'notes': 'Most intelligent model, 50% discount with Batches API'}, 'claude_3_5_haiku': {'context_window': 200000, 'input_cost_per_mtok': 0.8, 'output_cost_per_mtok': 4.0, 'prompt_caching_write': 1.0, 'prompt_caching_read': 0.08, 'not

In [6]:
# Create custom configuration
custom_config = DynamicLLMSelectorConfig(
    models={
        # Gemini models
        "gemini-1.5-flash": ModelConfig(
            name="gemini-1.5-flash",
            provider="gemini",
            complexity_threshold=20.0,
            cost_per_1k_tokens_input=all_pricing_data["gemini_pricing"]["gemini_1_5_flash"]["input_pricing"]["below_128k"],
            cost_per_1k_tokens_output=all_pricing_data["gemini_pricing"]["gemini_1_5_flash"]["output_pricing"]["below_128k"],
            model_instance=GeminiProvider("gemini-1.5-flash")
        ),
        "gemini-1.5-flash-8b": ModelConfig(
            name="gemini-1.5-flash-8b",
            provider="gemini",
            complexity_threshold=10.0,
            cost_per_1k_tokens_input=all_pricing_data["gemini_pricing"]["gemini_1_5_flash_8b"]["input_pricing"]["below_128k"],
            cost_per_1k_tokens_output=all_pricing_data["gemini_pricing"]["gemini_1_5_flash_8b"]["output_pricing"]["below_128k"],
            model_instance=GeminiProvider("gemini-1.5-flash-8b")
        ),
        # "gemini-1.5-pro": ModelConfig(
        #     name="gemini-1.5-pro",
        #     provider="gemini",
        #     complexity_threshold=50.0,
        #     cost_per_1k_tokens_input=all_pricing_data["gemini_pricing"]["gemini_1_5_pro"]["input_pricing"]["below_128k"],
        #     cost_per_1k_tokens_output=all_pricing_data["gemini_pricing"]["gemini_1_5_pro"]["output_pricing"]["below_128k"],
        #     model_instance=GeminiProvider("gemini-1.5-pro")
        # ),

        # OpenAI models
        "gpt-3.5-turbo": ModelConfig(
            name="gpt-3.5-turbo",
            provider="openai",
            complexity_threshold=40.0,
            cost_per_1k_tokens_input=all_pricing_data["openai_pricing"]["gpt_3_5_turbo"]["input"],
            cost_per_1k_tokens_output=all_pricing_data["openai_pricing"]["gpt_3_5_turbo"]["output"],
            model_instance=OpenAIProvider("gpt-3.5-turbo")
        ),
        "gpt-4o-mini": ModelConfig(
            name="gpt-4o-mini",
            provider="openai",
            complexity_threshold=55.0,
            cost_per_1k_tokens_input=all_pricing_data["openai_pricing"]["gpt_4o_mini"]["input"],
            cost_per_1k_tokens_output=all_pricing_data["openai_pricing"]["gpt_4o_mini"]["output"],
            model_instance=OpenAIProvider("gpt-4o-mini")
        ),
        "gpt-4": ModelConfig(
            name="gpt-4",
            provider="openai",
            complexity_threshold=75.0,
            cost_per_1k_tokens_input=all_pricing_data["openai_pricing"]["gpt_4"]["input"],
            cost_per_1k_tokens_output=all_pricing_data["openai_pricing"]["gpt_4"]["output"],
            model_instance=OpenAIProvider("gpt-4")
        ),
        # "gpt-4-turbo": ModelConfig(
        #     name="gpt-4-turbo",
        #     provider="openai",
        #     complexity_threshold=80.0,
        #     cost_per_1k_tokens_input=all_pricing_data["openai_pricing"]["gpt_4_turbo"]["input"],
        #     cost_per_1k_tokens_output=all_pricing_data["openai_pricing"]["gpt_4_turbo"]["output"],
        #     model_instance=OpenAIProvider("gpt-4-turbo")
        # ),

        # # Anthropic models
        # "claude-3.5-sonnet": ModelConfig(
        #     name="claude-3.5-sonnet",
        #     provider="anthropic",
        #     complexity_threshold=60.0,
        #     cost_per_1k_tokens_input=all_pricing_data["anthropic_pricing"]["claude_3_5_sonnet"]["input_cost_per_mtok"] / 1000,
        #     cost_per_1k_tokens_output=all_pricing_data["anthropic_pricing"]["claude_3_5_sonnet"]["output_cost_per_mtok"] / 1000,
        #     model_instance=AnthropicProvider("claude-3.5-sonnet")
        # ),
        # "claude-3.5-haiku": ModelConfig(
        #     name="claude-3.5-haiku",
        #     provider="anthropic",
        #     complexity_threshold=45.0,
        #     cost_per_1k_tokens_input=all_pricing_data["anthropic_pricing"]["claude_3_5_haiku"]["input_cost_per_mtok"] / 1000,
        #     cost_per_1k_tokens_output=all_pricing_data["anthropic_pricing"]["claude_3_5_haiku"]["output_cost_per_mtok"] / 1000,
        #     model_instance=AnthropicProvider("claude-3.5-haiku")
        # ),
        # "claude-3-opus": ModelConfig(
        #     name="claude-3-opus",
        #     provider="anthropic",
        #     complexity_threshold=85.0,
        #     cost_per_1k_tokens_input=all_pricing_data["anthropic_pricing"]["claude_3_opus"]["input_cost_per_mtok"] / 1000,
        #     cost_per_1k_tokens_output=all_pricing_data["anthropic_pricing"]["claude_3_opus"]["output_cost_per_mtok"] / 1000,
        #     model_instance=AnthropicProvider("claude-3-opus")
        # )
    }
)


In [7]:
# Initialize the model registry
model_registry = ModelRegistry()
model_registry.register_models(custom_config.get_models_for_registry())

In [8]:
model_registry.get_sorted_models()

[ModelConfig(name='gemini-1.5-flash-8b', provider='gemini', complexity_threshold=10.0, cost_per_1k_tokens_input=0.0375, cost_per_1k_tokens_output=0.15, api_key=None, model_instance=<src.llm_selector.providers.gemini.GeminiProvider object at 0x166dd2920>),
 ModelConfig(name='gemini-1.5-flash', provider='gemini', complexity_threshold=20.0, cost_per_1k_tokens_input=0.075, cost_per_1k_tokens_output=0.3, api_key=None, model_instance=<src.llm_selector.providers.gemini.GeminiProvider object at 0x166dd1fc0>),
 ModelConfig(name='gpt-3.5-turbo', provider='openai', complexity_threshold=40.0, cost_per_1k_tokens_input=0.0005, cost_per_1k_tokens_output=0.0015, api_key='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', model_instance=<src.llm_selector.providers.openai.OpenAIProvider object at 0x166dd2980>),
 ModelConfig(name='gpt-4o-mini', provider='openai', complexity_threshold=55.0, 

In [9]:
# Initialize LLM Selector with custom configuration
llm_selector = LLMSelector(model_registry=model_registry, compression=compressor)

In [10]:
# Complex prompts with varying complexity
prompts = [
    "Hello, how are you?",
    "Explain the basics of machine learning",
    "Provide a comprehensive analysis of quantum computing's impact on cryptography"
]

In [11]:
for prompt in prompts:
    print(f"\nPrompt: {prompt}")
    details = llm_selector.get_complexity_details(prompt)
    print("Complexity Details:", details)
    
    # Generate response
    response = llm_selector.generate_response(prompt)
    print("Response:", response)

2025-01-24 20:54:47,329 - INFO - Prompt complexity: 10.208825906469123
2025-01-24 20:54:47,330 - INFO - Selected model: gemini-1.5-flash
2025-01-24 20:54:47,330 - INFO - Prompt complexity: 10.208825906469123
2025-01-24 20:54:47,330 - INFO - Selected model: gemini-1.5-flash



Prompt: Hello, how are you?
Complexity Details: {'prompt': 'Hello, how are you?', 'complexity_score': 10.208825906469123, 'selected_model': 'gemini-1.5-flash', 'details': {'overall_complexity': 10.208825906469123, 'token_complexity': 41.53677461028802, 'linguistic_complexity': 0.0, 'structural_complexity': 2, 'token_count': 6}}


2025-01-24 20:54:50,093 - INFO - Response generated using gemini-1.5-flash, response: {'response': 'I am doing well, thank you for asking!  How are you today?\n', 'usage': {'input_tokens': 7, 'output_tokens': 17, 'total_tokens': 24}}
2025-01-24 20:54:50,095 - INFO - Added input cost: $0.0005, output cost: $0.0051, total: $0.0056. Running total: input $0.0005, output $0.0051, total $0.0056
2025-01-24 20:54:50,097 - INFO - Prompt complexity: 15.773579999999995
2025-01-24 20:54:50,098 - INFO - Selected model: gemini-1.5-flash
2025-01-24 20:54:50,100 - INFO - Prompt complexity: 15.773579999999995
2025-01-24 20:54:50,101 - INFO - Selected model: gemini-1.5-flash


Response: I am doing well, thank you for asking!  How are you today?


Prompt: Explain the basics of machine learning
Complexity Details: {'prompt': 'Explain the basics of machine learning', 'complexity_score': 15.773579999999995, 'selected_model': 'gemini-1.5-flash', 'details': {'overall_complexity': 15.773579999999995, 'token_complexity': 52.285714285714285, 'linguistic_complexity': 3.0484999999999927, 'structural_complexity': 2, 'token_count': 7}}


2025-01-24 20:54:56,513 - INFO - Response generated using gemini-1.5-flash, response: {'response': 'Machine learning (ML) is a branch of artificial intelligence (AI) that focuses on enabling computer systems to learn from data without being explicitly programmed.  Instead of relying on hard-coded rules, ML algorithms identify patterns, make predictions, and improve their performance over time based on the data they are exposed to.\n\nHere\'s a breakdown of the basics:\n\n**1. Core Idea: Learning from Data**\n\nThe fundamental principle is that machines can learn from data, just like humans do.  This learning process allows them to perform tasks without specific instructions for every scenario.  For example, instead of programming a system to recognize a cat in every possible image, you feed it thousands of cat images and let it learn the features that define a cat.\n\n**2. Types of Machine Learning:**\n\nThere are three main categories of ML:\n\n* **Supervised Learning:** The algorithm

Response: Machine learning (ML) is a branch of artificial intelligence (AI) that focuses on enabling computer systems to learn from data without being explicitly programmed.  Instead of relying on hard-coded rules, ML algorithms identify patterns, make predictions, and improve their performance over time based on the data they are exposed to.

Here's a breakdown of the basics:

**1. Core Idea: Learning from Data**

The fundamental principle is that machines can learn from data, just like humans do.  This learning process allows them to perform tasks without specific instructions for every scenario.  For example, instead of programming a system to recognize a cat in every possible image, you feed it thousands of cat images and let it learn the features that define a cat.

**2. Types of Machine Learning:**

There are three main categories of ML:

* **Supervised Learning:** The algorithm is trained on a labeled dataset.  This means the data includes both the input features and the desired

2025-01-24 20:55:10,044 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-01-24 20:55:10,049 - INFO - Response generated using gpt-4o-mini, response: {'response': "Quantum computing is poised to have a profound impact on cryptography, which is the backbone of secure communication in the digital age. The unique properties of quantum mechanics allow quantum computers to perform certain calculations much more efficiently than classical computers, leading to significant implications for various cryptographic protocols. Below is a comprehensive analysis of this impact:\n\n### 1. Quantum Computing Fundamentals\n\n**Quantum Bits (Qubits):**\n- Unlike classical bits that can be either 0 or 1, qubits can exist in superpositions of states, allowing quantum computers to process a vast amount of information simultaneously.\n\n**Quantum Algorithms:**\n- The most notable algorithms impacting cryptography include:\n  - **Shor’s Algorithm:** Efficiently fact

Response: Quantum computing is poised to have a profound impact on cryptography, which is the backbone of secure communication in the digital age. The unique properties of quantum mechanics allow quantum computers to perform certain calculations much more efficiently than classical computers, leading to significant implications for various cryptographic protocols. Below is a comprehensive analysis of this impact:

### 1. Quantum Computing Fundamentals

**Quantum Bits (Qubits):**
- Unlike classical bits that can be either 0 or 1, qubits can exist in superpositions of states, allowing quantum computers to process a vast amount of information simultaneously.

**Quantum Algorithms:**
- The most notable algorithms impacting cryptography include:
  - **Shor’s Algorithm:** Efficiently factors large integers, threatening RSA and ECC (Elliptic Curve Cryptography).
  - **Grover’s Algorithm:** Provides a quadratic speedup for unstructured search problems, impacting symmetric cryptographic algorit

In [14]:
llm_selector.cost_tracker.get_cost_breakdown()

{'input_cost': 0.0011, 'output_cost': 0.2337, 'total_cost': 0.2347}