# Energy Measurement

In [7]:
# Initialize dependencies and load environment
import pandas as pd, openai, os, json, time, requests
from dotenv import load_dotenv
from codecarbon import EmissionsTracker

load_dotenv()

True

In [8]:
# Load prompts data and initialize processed status
data = pd.read_json("data/clean_prompts.jsonl", lines=True)
if 'processed' not in data.columns:
    data['processed'] = 0
    data.to_json("data/clean_prompts.jsonl", orient='records', lines=True)

# Load existing energy data to avoid duplicates
try:
    existing_energy = pd.read_json("data/energy_measurements.jsonl", lines=True)
    processed_combinations = set()
    for _, row in existing_energy.iterrows():
        processed_combinations.add((row['prompt'], row['model']))
except:
    processed_combinations = set()

In [9]:
# Setup API clients for different models
load_dotenv(override=True)
clients, models_to_test = {}, []

if openai_key := os.getenv("OPENAI_API_KEY"):
    clients['openai'] = openai.OpenAI(api_key=openai_key)
    models_to_test.append("openai")

if groq_key := os.getenv("GROQ_API_KEY"):
    clients['llama'] = groq_key
    models_to_test.append("llama")

if mistral_key := os.getenv("MISTRAL_API_KEY"):
    clients['mistral'] = mistral_key
    models_to_test.append("mistral")


In [10]:
# Measure energy consumption and performance metrics
def track_performance(model_name, prompt, api_call_func):
    tracker = EmissionsTracker(log_level="ERROR", save_to_file=False)
    tracker.start()
    start_time = time.time()
    response_data, time_to_first_token = api_call_func(prompt)
    duration = time.time() - start_time
    energy_consumed_wh = tracker.stop() * 1000
    
    usage = response_data.get("usage", {})
    prompt_tokens = usage.get("prompt_tokens", usage.get("input_tokens", 0))
    completion_tokens = usage.get("completion_tokens", usage.get("output_tokens", 0))
    total_tokens = usage.get("total_tokens", prompt_tokens + completion_tokens)
    content = response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
    
    return {
        "prompt": prompt,
        "model": response_data.get("model", model_name),
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
        "duration": round(duration, 2),
        "time_to_first_token": round(time_to_first_token, 3),
        "prompt_tokens": prompt_tokens,
        "completion_tokens": completion_tokens,
        "total_tokens": total_tokens,
        "tokens_per_second": round(total_tokens / duration, 1) if duration > 0 else 0,
        "energy_consumed_wh": round(energy_consumed_wh, 4),
        "response": content
    }


In [11]:
# API call functions for different models with retry logic
import random

def api_call_with_retry(api_func, max_retries=3, base_delay=1):
    for attempt in range(max_retries):
        try:
            return api_func()
        except Exception as e:
            if attempt == max_retries - 1:
                print(f"\nFinal attempt failed: {str(e)}")
                raise e
            
            delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
            print(f"\nAttempt {attempt + 1} failed: {str(e)}. Retrying in {delay:.1f}s...")
            time.sleep(delay)
    
    return None

def call_openai(prompt):
    def _call():
        start_time = time.time()
        response = clients['openai'].chat.completions.create(
            model="gpt-4o-mini", messages=[{"role": "user", "content": prompt}], 
            max_tokens=50, temperature=0.3
        )
        return {
            "usage": {
                "prompt_tokens": response.usage.prompt_tokens,
                "completion_tokens": response.usage.completion_tokens,
                "total_tokens": response.usage.total_tokens
            },
            "choices": [{"message": {"content": choice.message.content}} for choice in response.choices],
            "model": response.model
        }, time.time() - start_time
    
    return api_call_with_retry(_call)

def call_llama(prompt):
    def _call():
        start_time = time.time()
        response = requests.post(
            "https://api.groq.com/openai/v1/chat/completions",
            headers={"Authorization": f"Bearer {clients['llama']}"},
            json={"model": "llama-3.1-8b-instant", "messages": [{"role": "user", "content": prompt}], 
                  "max_tokens": 50, "temperature": 0.3},
            timeout=30
        )
        if response.status_code != 200:
            raise Exception(f"API Error: {response.status_code} - {response.text}")
        return response.json(), time.time() - start_time
    
    return api_call_with_retry(_call)

def call_mistral(prompt):
    def _call():
        start_time = time.time()
        response = requests.post(
            "https://api.mistral.ai/v1/chat/completions",
            headers={"Authorization": f"Bearer {clients['mistral']}"},
            json={"model": "mistral-large-latest", "messages": [{"role": "user", "content": prompt}], 
                  "max_tokens": 50, "temperature": 0.3},
            timeout=60
        )
        if response.status_code != 200:
            raise Exception(f"API Error: {response.status_code} - {response.text}")
        resp_json = response.json()
        if "error" in resp_json:
            raise Exception(f"API Error: {resp_json['error']}")
        return resp_json, time.time() - start_time
    
    return api_call_with_retry(_call)


In [None]:
# Process prompts and save results incrementally
def run_performance_tests():
    global data, processed_combinations
    unprocessed = data[data['processed'] == 0]
    if len(unprocessed) == 0:
        return
    
    api_calls = {"openai": call_openai, "llama": call_llama, "mistral": call_mistral}
    
    for idx, row in unprocessed.iterrows():
        prompt_text = row['prompt_text']
        prompt_results = []
        models_to_process = [model for model in models_to_test if (prompt_text, model) not in processed_combinations]
        
        if not models_to_process:
            print(f"Prompt {idx + 1} already complete")
            continue
            
        for model_name in models_to_process:
            try:
                print(f"\rProcessing prompt {idx + 1} with {model_name}...", end="", flush=True)
                result = track_performance(model_name, prompt_text, api_calls[model_name])
                prompt_results.append(result)
                processed_combinations.add((prompt_text, model_name))
            except Exception as e:
                print(f"\r✗ {model_name} failed: {str(e)}")
                continue
        
        if prompt_results:
            with open("data/energy_measurements.jsonl", "a") as f:
                for result in prompt_results:
                    f.write(json.dumps(result) + "\n")
        
        print(f"\rPrompt {idx + 1} complete: {len(prompt_results)}/{len(models_to_process)} models", end="", flush=True)
        
        all_models_complete = all((prompt_text, model) in processed_combinations for model in models_to_test)
        if all_models_complete:
            data.loc[idx, 'processed'] = 1
            data.to_json("data/clean_prompts.jsonl", orient='records', lines=True)

if models_to_test:
    run_performance_tests()


Prompt 1473 complete: 3/3 modelsral...Prompt 1490 already complete
Prompt 1491 already complete
Prompt 1643 complete: 3/3 modelsral...Prompt 1646 already complete
Prompt 1859 complete: 3/3 modelsral...Prompt 1870 already complete
Prompt 1959 complete: 3/3 modelsral...Prompt 1960 already complete
Prompt 2029 complete: 3/3 modelsral...Prompt 2030 already complete
Processing prompt 2122 with llama.....