In [None]:
import json

# Load the synthetic test dataset
with open('synthetic-test-dataset.json', 'r') as f:
    data = json.load(f)

# Prepare new entries with the required structure
new_entries = []
for entry in data:
    new_entries.append({
        "user_input": entry.get("user_input", ""),
        "reference": entry.get("reference", ""),
        "response": "",
        "retrieved_contexts": ""
    })

# Save to new JSON file
with open('synthetic-test-dataset_structured.json', 'w') as f:
    json.dump(new_entries, f, ensure_ascii=False, indent=2)

print(
    f"Wrote {len(new_entries)} entries to synthetic-test-dataset_structured.json")

In [None]:
import requests
import uuid
import os
from dotenv import load_dotenv
import json

# Load environment variables from .env file
load_dotenv()

# Endpoint URL (replace with your actual endpoint)
API_URL = "https://ibkunirag-exp-swedencentral-func-dbbsd8g2c3b3adce.swedencentral-01.azurewebsites.net/api/chat/completions"
API_KEY = os.getenv("CASABLANCA")

if not API_KEY:
    raise ValueError("CASABLANCA not found in .env file.")

# System prompt
system_prompt = (
    "You are an internal support-knowledge-Worker employeed at CASABLANCA hotelsoftware and try to answer the queries from support-workers using the provided context below. "
    "Always spell the company name like that: CASABLANCA hotelsoftware to be inline with the branding. "
    "You embody a helpful, knowledgeable, and friendly co-worker who understands both the intricacies of our internal processes and the spirit of our company culture. "
    "Only Answer the question if there is relevant information in the Context and don't make up any information. "
    "If the question from the user is not fully specified ask to clearify (always). If you use citations f.e. doc1 always include the url. "
    "Always format your reponse in Markdown. "
)

# Load the synthetic test dataset
with open('synthetic-test-dataset.json', 'r') as f:
    data = json.load(f)

# Only take the first 5 documents
data = data[:250]

results = []
headers = {"x-functions-key": API_KEY}

# Ensure json-temp directory exists
os.makedirs('json-temp', exist_ok=True)

for idx, entry in enumerate(data):
    user_input = entry.get("user_input", "")
    payload = {
        "messages": [
            {
                "id": str(uuid.uuid4()),
                "role": "system",
                "content": system_prompt
            },
            {
                "id": str(uuid.uuid4()),
                "role": "user",
                "content": user_input
            }
        ],
        "stream": False
    }
    # Make the POST request
    response = requests.post(API_URL, json=payload, headers=headers)
    if response.status_code == 200:
        result = response.json()
        # Extract answer and documents
        answer = ""
        retrieved_documents = []
        if "choices" in result and result["choices"]:
            answer = result["choices"][0]["message"].get("content", "")
        if "context" in result and "Documents" in result["context"]:
            retrieved_documents = [doc.get("Text", "")
                                   for doc in result["context"]["Documents"]]
        run_result = {
            "user_input": user_input,
            "reference": entry.get("reference", ""),
            "response": answer,
            "retrieved_contexts": retrieved_documents
        }
    else:
        run_result = {
            "user_input": user_input,
            "reference": entry.get("reference", ""),
            "response": "",
            "retrieved_contexts": [],
            "error": response.status_code
        }
    # Log each run's output in a separate file
    with open(f'json-temp/run_{idx+1}.json', 'w') as temp_f:
        json.dump(run_result, temp_f, ensure_ascii=False, indent=2)
    results.append(run_result)

# Save all results to a file
with open('synthetic-test-dataset_api_results-gpt-4-mini.json', 'w') as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

print(f"Completed {len(results)} API calls and saved results.")

