In [111]:
import pandas as pd

filtered_qs = pd.read_csv('../outputs/filtered_qs.csv', index_col =0)
filtered_qs = filtered_qs.loc[['q24684']]

In [112]:
for i, row in filtered_qs.iterrows():
    foo=row[['text', 'option_1', 'option_2', 'option_3', 'option_4']].dropna().to_list()
    break

foo

['Do you carry condoms?', 'Always', 'Sometimes', 'Rarely', 'Never']

In [113]:
import json 

output_file = 'batchinput.jsonl'

with open(output_file, 'w') as f:
    for i, row in filtered_qs.iterrows():
        row_dict = {
            "custom_id": f"request-{i}",
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": "gpt-4.1-mini",
                "messages": [
                    {"role": "system", "content": "You are a data labeler. The data comes from the OkCupid dataset from Kirkegaard et. al. Questions and their multiple choice answers are provided. Your job is to rate the questions on a scale from 1-5 in terms of how useful you think the question is for identifying the user's demographic. For example, beliefs about sex before marriage, cheating on a test may say a lot about a user's cultural background. But their preference for wine vs. beer may just be more random. But remember: users answer questions optionally. So many of the questions are sexual in nature; in these cases, don't assume that they hold valuable information about the user's views on sexuality but that more conservative users probabily skipped them and the questions aren't important. \n\n Begin your response with the score so that our python script can scrape it from the beginning."},
                    {"role": "user", "content": f"{row[['text', 'option_1', 'option_2', 'option_3', 'option_4']].dropna().to_list()}"}
                ],
                "max_tokens": 1000
            }
        }
        f.write(json.dumps(row_dict) + '\n')


In [114]:
from openai import OpenAI
import os

with open(os.path.expanduser("~/openai_apikey"), "r") as f:
    api_key = f.read().strip()

from openai import OpenAI
client = OpenAI(api_key=api_key)

batch_input_file = client.files.create(
    file=open("batchinput.jsonl", "rb"),
    purpose="batch"
)

print(batch_input_file)

FileObject(id='file-SpwW5YPhQBQ3pU67kfb9HY', bytes=1143, created_at=1745699807, filename='batchinput.jsonl', object='file', purpose='batch', status='processed', status_details=None, expires_at=None)


In [115]:
batch_input_file_id = batch_input_file.id
batch = client.batches.create(
    input_file_id=batch_input_file_id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
    metadata={
        "description": "question scores, 4.1-mini"
    }
)
print(batch)

Batch(id='batch_680d43e10e0481909b755bf862d65a73', completion_window='24h', created_at=1745699809, endpoint='/v1/chat/completions', input_file_id='file-SpwW5YPhQBQ3pU67kfb9HY', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1745786209, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'question scores, 4.1-mini'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))


In [140]:
batch_retrieved = client.batches.retrieve(batch.id)
print(batch_retrieved.status)

in_progress


In [100]:
file_response = client.files.content(batch_retrieved.output_file_id)
print(file_response.text)

{"id": "batch_req_680d2740d8f48190849d0b68285d5c4a", "custom_id": "request-q48", "response": {"status_code": 200, "request_id": "af94779253e1a508283c5d6d711ac799", "body": {"id": "chatcmpl-BQeczWuDIbNQMFDOw70Z3JXKkvasP", "object": "chat.completion", "created": 1745692401, "model": "gpt-4.1-mini-2025-04-14", "choices": [{"index": 0, "message": {"role": "assistant", "content": "2", "refusal": null, "annotations": []}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 143, "completion_tokens": 2, "total_tokens": 145, "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0}, "completion_tokens_details": {"reasoning_tokens": 0, "audio_tokens": 0, "accepted_prediction_tokens": 0, "rejected_prediction_tokens": 0}}, "service_tier": "default", "system_fingerprint": "fp_38647f5e19"}}, "error": null}
{"id": "batch_req_680d2740e9d08190bd079f7e65369050", "custom_id": "request-q46", "response": {"status_code": 200, "request_id": "6f6e4e5800c4d7819e6200934a562311", "body

In [101]:
lines = [json.loads(line) for line in file_response.iter_lines()]

In [102]:
import re 
scores = [line['response']['body']['choices'][0]['message']['content'] for line in lines]
scores = [int(re.search(r'\d', s).group()) for s in scores if re.search(r'\d+', s)]
scores

[2,
 2,
 4,
 2,
 2,
 4,
 3,
 4,
 4,
 4,
 2,
 2,
 4,
 1,
 5,
 5,
 2,
 4,
 3,
 2,
 3,
 4,
 2,
 5,
 4,
 5,
 5,
 2,
 3,
 4,
 1,
 4,
 2,
 3,
 4,
 1,
 3,
 2,
 2,
 5,
 3,
 5,
 5,
 4,
 3,
 3,
 3,
 2,
 4,
 4,
 1,
 2,
 2,
 2,
 3,
 2,
 2,
 3,
 3,
 4,
 2,
 4,
 3,
 3,
 4,
 2,
 4,
 2,
 2,
 4,
 1,
 3,
 3,
 3,
 4,
 4,
 1,
 2,
 3,
 5,
 4,
 3,
 3,
 4,
 3,
 2,
 2,
 2,
 4,
 3,
 2,
 2,
 3,
 4,
 2,
 2,
 3,
 2,
 4,
 3,
 2,
 2,
 2,
 4,
 2,
 3,
 4,
 2,
 1,
 2,
 1,
 1,
 3,
 3,
 2,
 3,
 2,
 4,
 4,
 2,
 3,
 4,
 2,
 2,
 3,
 2,
 1,
 2,
 2,
 2,
 3,
 2,
 4,
 2,
 2,
 2,
 2,
 3,
 4,
 5,
 3,
 3,
 2,
 4,
 4,
 4,
 5,
 5,
 2,
 3,
 3,
 2,
 3,
 3,
 4,
 3,
 3,
 4,
 4,
 3,
 2,
 1,
 4,
 3,
 1,
 2,
 4,
 2,
 3,
 2,
 3,
 1,
 3,
 5,
 2,
 2,
 3,
 4,
 4,
 1,
 2,
 4,
 4,
 5,
 3,
 3,
 4,
 1,
 2,
 3,
 2,
 2,
 4,
 3,
 2,
 4,
 4,
 1,
 4,
 5,
 2,
 3,
 2,
 3,
 2,
 2,
 2,
 2,
 3,
 3,
 1,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 2,
 4,
 3,
 3,
 5,
 5,
 3,
 4,
 4,
 2,
 4,
 4,
 3,
 3,
 3,
 2,
 4,
 3,
 1,
 3,
 3,
 2,
 1,
 2,
 3,
 4,
 5,
 4,
 3,
 2,
 2,


In [103]:
filtered_qs['scores']=scores

In [104]:
filtered_qs['idx'] = range(len(scores))

In [107]:
filtered_qs.sample(10)[['text', 'option_1', 'option_2', 'scores']]

Unnamed: 0,text,option_1,option_2,scores
q46563,Assume you have a homosexual friend who is the...,Yes.,No.,4
q24684,Do you carry condoms?,Always,Sometimes,3
q12503,How often should your significant other buy yo...,Very frequently.,From time to time.,2
q1040,Receiving anal sex?,I like it / I think I might like it,I don't like it / I don't think I would like it,4
q23954,Do you ever feel the need to get really drunk?,Often,Sometimes,3
q1052,How often do you get angry?,Very often,Sometimes,2
q18594,"Do superficial people, who place a high emphas...",Yes,No,2
q22021,Is your ideal match well-off financially (or a...,"Yes, it's very important.",It's somewhat important.,4
q212813,Which best describes your political beliefs?,Liberal / Left-wing,Centrist,5
q81307,Have you ever had sex with a person within the...,Yes.,No.,4
