In [1]:
%run ../utils/init_env.py

In [2]:
import requests
from typing import List, Dict

# Configuration
LMSTUDIO_API_URL = "http://localhost:1234/v1/completions"  # Adjust port if needed

In [2]:
def send_prompt_to_lmstudio(prompt: str, system_prompt: str = "", max_tokens: int = 500) -> str:
    """
    Send a single prompt to LMStudio with an optional system prompt and return the response
    """
    headers = {
        "Content-Type": "application/json"
    }
    
    # Combine system prompt and user prompt
    full_prompt = f"{system_prompt}\n\n{prompt}" if system_prompt else prompt
    
    payload = {
        "prompt": full_prompt,
        "max_tokens": max_tokens,
        "temperature": 0.7,  # Adjust as needed
        "top_p": 1.0
    }
    
    try:
        response = requests.post(LMSTUDIO_API_URL, headers=headers, json=payload)
        response.raise_for_status()
        return response.json()["choices"][0]["text"].strip()
    except requests.exceptions.RequestException as e:
        print(f"Error connecting to LMStudio: {e}")
        return None

def evaluate_prompts(prompts: List[str], expected_responses: List[str], system_prompt: str = "") -> Dict:
    """
    Evaluate multiple prompts with an optional system prompt and compare with expected responses
    """
    results = {
        "total_prompts": len(prompts),
        "successful_matches": 0,
        "responses": [],
        "system_prompt_used": system_prompt
    }
    
    if len(prompts) != len(expected_responses):
        raise ValueError("Number of prompts must match number of expected responses")
    
    for i, (prompt, expected) in enumerate(zip(prompts, expected_responses)):
        # Send prompt to LLM with system prompt
        actual_response = send_prompt_to_lmstudio(prompt, system_prompt)
        
        if actual_response is None:
            continue
            
        # Simple comparison (case-insensitive exact match)
        is_match = actual_response.lower() == expected.lower()
        if is_match:
            results["successful_matches"] += 1
            
        # Store detailed results
        results["responses"].append({
            "prompt": prompt,
            "expected": expected,
            "actual": actual_response,
            "match": is_match
        })
    
    # Calculate accuracy
    results["accuracy"] = results["successful_matches"] / results["total_prompts"] * 100
    return results