In [9]:
import json
import time
import argparse
import sys
from openai import OpenAI

client = OpenAI(api_key="")


model_privacyqa = "ft:gpt-4o-mini-2024-07-18:personal:privacyqa:BEUwmeGn" #tuned on 500 samples
model_cancerqa = "ft:gpt-4o-mini-2024-07-18:personal:cancerqa:BOS35I1P" #tuned on 500 samples
model_medqa = "ft:gpt-4.1-mini-2025-04-14:personal:medqa:BTJ2tab8" #tuned on 2000 samples
model_civilqa = "ft:gpt-4.1-mini-2025-04-14:personal:civilqa:BRh8o3a6" #tuned on 2000 samples

model_baseline = "gpt-4o-mini"

input_file = "logicqa_400_prompts.jsonl"
output_file = "civilqa_generated_responses.jsonl"
output_file_b = "logicqa_generated_responses_b.jsonl" #vs baseline model
output_file_c = "logicq_meda_generated_responses.jsonl"
output_file_d = "logicq_civila_generated_responses.jsonl"


def count_words(text: str) -> int:
    return len(text.strip().split())

def process_jsonl(path: str):
    total_words = 0
    max_words = 0
    message_count = 0

    with open(path, 'r', encoding='utf-8') as f:
        for lineno, line in enumerate(f, start=1):
            line = line.strip()
            if not line:
                continue

            try:
                obj = json.loads(line)
            except json.JSONDecodeError as e:
                print(f"Warning: skipping invalid JSON on line {lineno}: {e}", file=sys.stderr)
                continue

            # Extract all "content" fields
            contents = []
            if isinstance(obj, dict) and 'messages' in obj and isinstance(obj['messages'], list):
                # e.g. {"messages":[{"role":"user","content":"..."}, ...]}
                for msg in obj['messages']:
                    if isinstance(msg, dict) and 'content' in msg:
                        contents.append(msg['content'])
            elif isinstance(obj, dict) and 'content' in obj:
                # e.g. {"role":"assistant","content":"..."}
                contents.append(obj['content'])
            else:
                # no recognizable content
                continue

            # Count words per message
            for text in contents:
                words = count_words(text)
                total_words += words
                message_count += 1
                max_words = max(max_words, words)

    if message_count == 0:
        print("No messages with 'content' found in the file.", file=sys.stderr)
        return

    average = total_words / message_count
    print(f"Processed {message_count} messages.")
    print(f"Average word count: {average:.2f}")
    print(f"Highest word count: {max_words}")

def generate_multiple_responses(prompt, model_id, num_samples=5):
    responses = []
    response = client.chat.completions.create(
        model=model_id,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=1.0,
        max_tokens=1000,
        n=num_samples
    )
    for choice in response.choices:
        responses.append(choice.model_dump_json())
    return responses


def process_prompts(input_path, output_path, model):
    with open(input_path, 'r') as infile, open(output_path, 'w') as outfile:
        for line in infile:
            data = json.loads(line)
            prompt = ""
            for msg in data.get("messages", []):
                if msg.get("role") == "user":
                    prompt = msg.get("content", "")
                    break
            if prompt:
                try:
                    completions = generate_multiple_responses(prompt, model)
                    output_data = {
                        "prompt": prompt,
                        "completions": completions
                    }
                    outfile.write(json.dumps(output_data) + "\n")
                except Exception as e:
                    print(f"Error processing prompt: {prompt}\n{e}")
                    time.sleep(.9)  # Brief pause in case of rate limits

#process_jsonl(input_file)
process_prompts(input_file, output_file_b, model_baseline)
process_prompts(input_file, output_file_c, model_medqa)
process_prompts(input_file, output_file_d, model_civilqa)



In [3]:
#output count
with open(output_file_b, 'r') as f:
    print(sum(1 for _ in f))
     

400
