In [1]:
# 240626

from anthropic import AnthropicVertex
import os
from tqdm import tqdm
import time
from concurrent.futures import ThreadPoolExecutor
from itertools import product
import json
from util_trans import *

REGION = 'us-east5'
PROJECT_ID = "phonic-impact-421908"
ENDPOINT = f"https://{REGION}-aiplatform.googleapis.com"

os.environ['GOOGLE_CLOUD_PROJECT'] = PROJECT_ID
client = AnthropicVertex(region=REGION, project_id=PROJECT_ID)

In [2]:
class ModelResponder:
    def __init__(self, model_path, ques_path_list, context_list, prompt_func_list=None, path=None):
        self.batch_size = 10
        self.model_path = model_path
        if path is not None:
            self.path = path
        else:
            self.path = os.path.basename(model_path)

        if not prompt_func_list:
            default_prompt_func = lambda inst,ques: f"{ques}"
            prompt_func_list = [default_prompt_func]
        self.prompt_func_list = prompt_func_list

        indexed_ques_paths = list(enumerate(ques_path_list))
        indexed_prompt_funcs = list(enumerate(self.prompt_func_list))
        indexed_contexts = list(enumerate(context_list))
        self.combinations = list(product(indexed_ques_paths, indexed_prompt_funcs, indexed_contexts))

    def process_and_update(self, ques_dict, context, prompt_func, pbar):
        max_retries = 50
        retries = 0
        while retries < max_retries:
            try:
                responses = client.messages.create(
                    messages=[{"role":"user","content": prompt_func(inst, ques_dict)}],
                    system=context,
                    model=model_path,
                    temperature=0,
                    max_tokens=1024
                )
                pbar.update(1)
                output = responses.to_dict()['content'][0]['text']
                try: result = ast.literal_eval(extract_substring(output))
                except: result = output_adjust(extract_substring(output))
                return result
            except Exception as e:
                if "Quota" in str(e):
                    time.sleep(60)
                elif "he candidate is likely blocked" in str(e):
                    retries += 50
                    print(f'Candidate blocked, skipping... {ques_dict['year']}-{ques_dict['session']}-{ques_dict['question_number']}"')
                else:
                    retries += 1
                    print(e)
                    print(f"Retrying '{ques_dict['year']}-{ques_dict['session']}-{ques_dict['question_number']}'... ({retries}/{max_retries})")
                    print(output)
                    print(result)
                    time.sleep(10)

    def process_files(self):
        for (ques_idx, ques_path), (prompt_idx, prompt_func), (context_idx, inst) in self.combinations:
            with open(ques_path) as f:
                exam = json.load(f)
            filename=f'output/{self.path} [f{prompt_idx+1}_p{context_idx+1}_q{ques_idx+1}].json'
            dataset = ExamDataset(exam, inst, prompt_func)
            dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=False, num_workers=0, collate_fn=collate_fn)
            with tqdm(total=len(dataloader),desc=filename, leave=True, position=0) as pbar:
                for i, batch in enumerate(dataloader):
                    with ThreadPoolExecutor() as executor:
                        with tqdm(total=len(batch), desc=f"Batch {i}", leave=True, position=0) as batch_pbar:
                            results = list(executor.map(lambda ques: self.process_and_update(ques, inst, prompt_func, batch_pbar), batch))
                    if os.path.exists(filename):
                        with open(filename, 'r') as file:
                            resp = json.load(file)
                    else:
                        resp = []
                        os.makedirs(os.path.dirname(filename), exist_ok=True)
                    for result in results:
                        resp.append(result)
                    with open(filename, 'w', encoding='utf-8') as f:
                        json.dump(resp, f, indent=4, ensure_ascii=False)
                    pbar.update(1)

In [3]:
model_path = "claude-3-5-sonnet@20240620"
run = ModelResponder(model_path, ques_path_list, inst_list)
run.process_files()

Batch 0: 100%|██████████| 10/10 [00:05<00:00,  1.70it/s]          | 0/210 [00:00<?, ?it/s]
Batch 1: 100%|██████████| 10/10 [00:02<00:00,  3.56it/s]          | 1/210 [00:05<20:31,  5.89s/it]
Batch 2: 100%|██████████| 10/10 [00:02<00:00,  3.48it/s]          | 2/210 [00:08<14:09,  4.08s/it]
Batch 3: 100%|██████████| 10/10 [00:03<00:00,  3.22it/s]▏         | 3/210 [00:11<12:10,  3.53s/it]
Batch 4: 100%|██████████| 10/10 [00:03<00:00,  3.24it/s]▏         | 4/210 [00:14<11:32,  3.36s/it]
Batch 5: 100%|██████████| 10/10 [00:03<00:00,  2.90it/s]▏         | 5/210 [00:17<11:09,  3.27s/it]
Batch 6: 100%|██████████| 10/10 [00:03<00:00,  3.19it/s]▎         | 6/210 [00:21<11:19,  3.33s/it]
Batch 7: 100%|██████████| 10/10 [00:03<00:00,  2.90it/s]▎         | 7/210 [00:24<11:03,  3.27s/it]
Batch 8: 100%|██████████| 10/10 [00:03<00:00,  3.32it/s]▍         | 8/210 [00:27<11:12,  3.33s/it]
Batch 9: 100%|██████████| 10/10 [00:03<00:00,  3.10it/s]▍         | 9/210 [00:30<10:49,  3.23s/it]
Batch 10: 100%|███

invalid decimal literal (<unknown>, line 1)
Retrying '2022-1-60'... (1/50)
Here's the translation of the question into English while maintaining its original format:

{'year': 2022, 'session': 1, 'common_question_text': 'Read the following case and choose the most appropriate answer for each question.', 'common_data': 'The following figure shows the results of testing the contraction of vascular smooth muscle in vitro after isolating it from a white rat's blood vessel.\nThe effects of drug B and drug C on the vascular smooth muscle contraction response of A (epinephrine) are represented as dose-response curves. (Note: '10X' indicates a 10-fold concentration)\n<Data (confidential)>', 'question_number': 60, 'question_text': 'Which of the following correctly describes the action characteristics of C?', 'data': None, 'choices': ['① It shows an additive effect with A.', '② It shows a synergistic effect with A.', '③ It competitively antagonizes A.', '④ It non-competitively antagonizes A.', '

UnboundLocalError: cannot access local variable 'result' where it is not associated with a value

In [None]:
# model_path = "claude-3-opus@20240229"
# run = ModelResponder(model_path, ques_path_list, inst_list)
# run.process_files()