In [None]:
import sys
sys.path.append('..')

In [None]:
# Load terminology
from base.load_hpo import hpo

In [None]:
# Load datasets
from base.load_dataset import *
annotated_dataset = load_annotated_dataset() # train + val
inference_dataset = load_test_dataset()
handpicked_dataset = load_handpicked_dataset() # handpicked examples from train + val to cover tricky cases

In [None]:
# Load OpenAI client
from config.openai_config import openai_api_key
from prompting.openai_client import OpenAIClient
openai_client = OpenAIClient(openai_api_key, model='gpt-3.5-turbo-0613')

In [None]:
# Load prompting
from prompting.generate_messages import get_openai_messages
from prompting.prompts import *

# Generate messages to submit to OpenAI API.
# Optional: Use a few shot dataset for few-shotting annotated examples.
inference_dataset_messages = get_openai_messages(inference_dataset, hpo, system_message= SYSTEM_MESSAGE,
                                                 user_message_wrapper = USER_MESSAGE_WRAPPER,
                                                 assistant_message_table_header = ASSISTANT_MESSAGE_TABLE_HEADER,
                                                 few_shot_dataset = annotated_dataset, few_shot_k = 10, few_shot_k_min = 3,
                                                 hand_picked_dataset = handpicked_dataset, include_response=False)

In [None]:
# Save the generated message dataset to cache.
from util.caching import save_json_to_cache
save_json_to_cache('inference_dataset_messages.json', inference_dataset_messages)

In [None]:
# Test inference
test_response = openai_client.get_response(inference_dataset_messages[0])
print(test_response)

In [None]:
# Run inference
from tqdm import tqdm
inference_responses = []
for messages in tqdm(inference_dataset_messages):
    observation = messages[-1]['content']
    response = openai_client.get_response(messages)
    
    inference_responses.append({
        'observation': observation,
        'response': response
    })

In [None]:
# Save responses for the dataset to cache.
save_json_to_cache('inference_responses.json', inference_responses)

In [None]:
# Load responses as a dataset object
import os
from base.init_dataset import init_dataset_from_openai_responses
from util.caching import CACHE_DIR
response_dataset = init_dataset_from_openai_responses(os.path.join(CACHE_DIR, 'inference_responses.json'))

In [None]:
# Normalization
from matching.normalization import normalize_term
for observation in tqdm(response_dataset.observations):
    observation.terms = [normalize_term(term) for term in observation.terms]

In [None]:
# Write predictions
from config.config import OUTPUT_DIR
response_dataset.write_to_tsv(os.path.join(OUTPUT_DIR, 'BioCreativeVIII3_TestSetPreds.tsv'))