In [1]:
import pandas as pd
import json

In [3]:
df = pd.read_csv('./claude_vs_all_raw/combined_batch.csv')
df.columns

Index(['HITId', 'HITTypeId', 'Title', 'Description', 'Keywords', 'Reward',
       'CreationTime', 'MaxAssignments', 'RequesterAnnotation',
       'AssignmentDurationInSeconds', 'AutoApprovalDelayInSeconds',
       'Expiration', 'NumberOfSimilarHITs', 'LifetimeInSeconds',
       'AssignmentId', 'WorkerId', 'AssignmentStatus', 'AcceptTime',
       'SubmitTime', 'AutoApprovalTime', 'ApprovalTime', 'RejectionTime',
       'RequesterFeedback', 'WorkTimeInSeconds', 'LifetimeApprovalRate',
       'Last30DaysApprovalRate', 'Last7DaysApprovalRate', 'Input.data',
       'Answer.results', 'Answer.timeTaken', 'Approve', 'Reject'],
      dtype='object')

In [5]:
df['data'] = df['Input.data'].apply(json.loads)

In [6]:
df['results'] = df['Answer.results'].apply(json.loads)

In [9]:
comparisons_data = []
for i, data, results in df[['data', 'results']].itertuples():
    qid = data['question']['question_id']
    qtext = data['question']['text']
    
    for response in data['responses']:
        # simplify model_id
        response['model_id'] = response['model_id'].split(':')[0].removesuffix('-turbo')

    comparisons = results['pairwise']
    for comp in comparisons:
        idxA = comp['responseAIdx']
        idxB = comp['responseBIdx']
        val = comp['value']
        if data['responses'][idxA]['model_id'] == 'claude':
            other = data['responses'][idxB]['model_id']
            val = comp['value']
            if val not in [1, 2, 3]:
                print('None value for', qid, other)
                continue
            comparisons_data.append({
                'question_id': qid,
                'text': qtext,
                'model_a': 'claude',
                'model_b': other,
                'score': val,
            })

In [14]:
comps_df = pd.DataFrame(comparisons_data)

In [16]:
# get list of models in merged
modelsA = comps_df.model_a.unique()
modelsB = comps_df.model_b.unique()

for modelA in modelsA:
    for modelB in modelsB:
        if modelA == modelB:
            continue
        filtered_df = comps_df[(comps_df['model_a'] == modelA) & (comps_df['model_b'] == modelB)]
        
        filtered_df = filtered_df[['question_id', 'text', 'score']]

        name = f'answer_{modelA}-vs-answer_{modelB}-human-reviewer_threeclass.jsonl'
        filtered_df.to_json(f'../{name}', orient='records', lines=True)