In [1]:
import sqlite3
import os
import json

db_path = './DBs/principles.db'


In [2]:
def create_chat_prompt(res1, res2, pid=1):
    sys_msg = """Determine if the texts are correct. Return 00 if both incorrect, 01 if only text 2 is correct, 10 if only text 1 is correct, and 11 if both correct. Do not provide justification, only give a two-character response."""
    user_prompt = f"""###Text 1### {res1}
                    ###Text 2### {res2}"""
    
    return [
        {"role": "system", "content": sys_msg}, 
        {"role": "user", "content": user_prompt}
    ]

In [3]:

def get_all_rows():
    conn = sqlite3.connect(db_path)
    c = conn.cursor()
    rows = []
    for i in range(1, 27): 
        if i in [3, 14, 19, 22, 23]:
            continue
        table_name = f'principle{i}'
        c.execute(f'SELECT * FROM {table_name}')
        rows.extend(c.fetchall())
    conn.close()
    return rows

In [4]:
model = 'gpt-4'

filename = f"{model}_requests2.jsonl"
if os.path.exists(filename):
    os.remove(filename)
    os.remove(filename.replace(".jsonl", "_results.jsonl"))
rows = get_all_rows()
jobs = [create_chat_prompt(row[4], row[5]) for row in rows]
with open(filename, "w") as f:
    for job in jobs:
        body = {"model": model, "messages": job}
        json_string = json.dumps(body)
        f.write(json_string + "\n")

In [5]:
!python api_request_parallel_processor.py --requests_filepath 'gpt-4_requests2.jsonl'

INFO:root:Starting request #0
INFO:root:Starting request #1
INFO:root:Starting request #2
INFO:root:Starting request #3
INFO:root:Starting request #4
INFO:root:Starting request #5
INFO:root:Starting request #6
INFO:root:Starting request #7
INFO:root:Starting request #8
INFO:root:Starting request #9
INFO:root:Starting request #10
INFO:root:Starting request #11
INFO:root:Starting request #12
INFO:root:Starting request #13
INFO:root:Starting request #14
INFO:root:Starting request #15
INFO:root:Starting request #16
INFO:root:Starting request #17
INFO:root:Starting request #18
INFO:root:Starting request #19
INFO:root:Starting request #20
INFO:root:Starting request #21
INFO:root:Starting request #22
INFO:root:Starting request #23
INFO:root:Starting request #24
INFO:root:Starting request #25
INFO:root:Starting request #26
INFO:root:Starting request #27
INFO:root:Starting request #28
INFO:root:Starting request #29
INFO:root:Starting request #30
INFO:root:Starting request #31
INFO:root:Starting

In [None]:
# Load the JSONL files
file1_path = f"{model}_requests2.jsonl"
file2_path = f"{model}_requests2_results.jsonl"


with open(file1_path, 'r') as file:
    file1_entries = [json.loads(line) for line in file]

with open(file2_path, 'r') as file:
    file2_entries = [json.loads(line) for line in file]

# k = 1/0
if len(file1_entries) != len(file2_entries):
    raise ValueError('The length of the request and response files are not same!')
# print(file2_entries[0][0]['messages'][1]['content'])
# k = 1/0
ordered_file2_entries = []
for i, entry1 in enumerate(file1_entries):
    for entry2 in file2_entries:
        if entry2[0]['messages'][1]['content'] == entry1['messages'][1]['content']:
            ordered_file2_entries.append(entry2)
            break

# Save the ordered entries to a new JSONL file
ordered_file2_path = f"{model}_ordered_response2.jsonl"
with open(ordered_file2_path, 'w') as file:
    for entry in ordered_file2_entries:
        file.write(json.dumps(entry) + '\n')

print(f"Ordered file saved to {ordered_file2_path}")


FileNotFoundError: [Errno 2] No such file or directory: 'gpt-4_requests2_results.jsonl'

### Store preference in database

In [None]:
def extract_res(text):
    idx1 = text.find('###Text 1### ')
    idx2 = text.find('###Text 2### ')
    return text[idx1+len("###Text 1### "):idx2]

def update_correctness(response1, machine_preference):
    flag = 0
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    for i in range(1, 27): 
        table_name = f'principle{i}'
        c.execute(f'SELECT  qid, response_with_principle FROM {table_name}')
        all_response1 = c.fetchall()
        
        for qid, res in all_response1:
            if res.strip(' \n') == response1.strip(' \n'):
                c.execute(f"UPDATE {table_name} SET correctness_{model}=? WHERE qid=?", (machine_preference,  qid))
                conn.commit()
                flag = 1
                break
            
        if flag == 1:
            break
        
        # break
    conn.close()
    return flag


In [None]:
file_path = f"{model}_ordered_response2.jsonl"

with open(file_path, 'r') as file:
    file_entries = [json.loads(line) for line in file]

for i, entry in enumerate(file_entries):
    text = entry[0]['messages'][1]['content']
    try:
        correctness = entry[1]['choices'][0]['message']['content']
    except:
        correctness = 'Error occured, manually put into chatgpt'
    response1 = extract_res(text)
    # print(response1)
    flag = update_correctness(response1, correctness)
    if flag == 0:
        print(f'Entry: {i} is not updated in database. Do it manually')
    
