In [1]:
import openai
import time
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm

# Configure OpenAI client
client = openai.OpenAI(
    api_key="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"  # Replace with your API key
)

def test_latency(prompt, model="gpt-3.5-turbo", n_trials=10):
    """
    Test LLM latency for a given prompt and model
    
    Args:
        prompt (str): The prompt to test
        model (str): The model to test
        n_trials (int): Number of trials to run
        
    Returns:
        dict: Dictionary containing latency metrics
    """
    latencies = []
    token_counts = []
    
    for _ in tqdm(range(n_trials), desc=f"Testing {model}"):
        start_time = time.time()
        
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "user", "content": prompt}
            ]
        )
        
        end_time = time.time()
        latency = (end_time - start_time) * 1000  # Convert to milliseconds
        
        latencies.append(latency)
        token_counts.append(response.usage.total_tokens)
        
        # Add small delay between requests to avoid rate limits
        time.sleep(1)
    
    return {
        'latencies': latencies,
        'token_counts': token_counts,
        'mean_latency': np.mean(latencies),
        'std_latency': np.std(latencies),
        'p50_latency': np.percentile(latencies, 50),
        'p90_latency': np.percentile(latencies, 90),
        'p99_latency': np.percentile(latencies, 99),
        'mean_tokens': np.mean(token_counts)
    }

# Define test prompts of varying lengths
test_prompts = {
    'smart': 'Evaluate this goal using the SMART framework and return a 0-100 rating for each dimension of the smart framework: “Grow the number of monthly users of Techfirm’s mobile app by optimizing our app-store listing and creating targeted social media campaigns.”',  
's': 'Evaluate this goal using the SMART framework and return a 0-100 rating of how specific this goal is: “Grow the number of monthly users of Techfirm’s mobile app by optimizing our app-store listing and creating targeted social media campaigns.”  ',
    'm': 'Evaluate this goal using the SMART framework and return a 0-100 rating of how measurable this goal is: “Grow the number of monthly users of Techfirm’s mobile app by optimizing our app-store listing and creating targeted social media campaigns.”',  
'a': 'Evaluate this goal using the SMART framework and return a 0-100 rating of how achieveable this goal is: “Grow the number of monthly users of Techfirm’s mobile app by optimizing our app-store listing and creating targeted social media campaigns.”  ',
'r': 'Evaluate this goal using the SMART framework and return a 0-100 rating of how relevent this goal is: “Grow the number of monthly users of Techfirm’s mobile app by optimizing our app-store listing and creating targeted social media campaigns.”  ',
't': 'Evaluate this goal using the SMART framework and return a 0-100 rating of how time bound this goal is: “Grow the number of monthly users of Techfirm’s mobile app by optimizing our app-store listing and creating targeted social media campaigns.”  
}

    

# Define models to test
models_to_test = ["gpt-3.5-turbo", "gpt-4"]

# Run tests
results = []
for model in models_to_test:
    for prompt_name, prompt in test_prompts.items():
        print(f"\nTesting {model} with {prompt_name} prompt...")
        metrics = test_latency(prompt, model=model)
        
        results.append({
            'model': model,
            'prompt_type': prompt_name,
            'mean_latency': metrics['mean_latency'],
            'p50_latency': metrics['p50_latency'],
            'p90_latency': metrics['p90_latency'],
            'p99_latency': metrics['p99_latency'],
            'std_latency': metrics['std_latency'],
            'mean_tokens': metrics['mean_tokens']
        })

# Create results DataFrame
results_df = pd.DataFrame(results)

# Plot results
plt.figure(figsize=(12, 6))
sns.barplot(data=results_df, x='prompt_type', y='mean_latency', hue='model')
plt.title('Mean Latency by Model and Prompt Type')
plt.xlabel('Prompt Type')
plt.ylabel('Latency (ms)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Print detailed metrics
print("\nDetailed Latency Metrics (milliseconds):")
print(results_df.round(2).to_string(index=False))

# Save results to CSV with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_df.to_csv(f'llm_latency_results_{timestamp}.csv', index=False)

SyntaxError: unterminated string literal (detected at line 67) (206534220.py, line 67)