In [1]:
import json
import os
from glob import glob

from langchain.callbacks import get_openai_callback
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from tqdm.notebook import tqdm

from src.conditionalqa.code_prompt import CodePrompt, RationaleType
from src.conditionalqa.evaluation import evaluate
from src.conditionalqa.oracle_retriever import get_summarized_doc
from src.utils import get_current_time, print_chain_stats

In [2]:
# DEFINE THIS VARIABLES
num_dev_examples = 285 # number of examples to evaluate (full dev set)
num_translation_examples = 4
num_interpreter_examples = 4
save_results = False

llm_name = "gpt-3.5-turbo-16k-0613"
start_idx = 0  # starting index to evaluate of the dev set. Only modify if you want to skip some examples
end_idx = start_idx + num_dev_examples

In [None]:
# if you use Azure OpenAI Service
os.environ["OPENAI_API_TYPE"] = ""
os.environ["OPENAI_API_VERSION"] = ""
os.environ["OPENAI_API_BASE"] = ""
os.environ["OPENAI_API_KEY"] = ""

llm = AzureChatOpenAI(
    deployment_name=llm_name,
    temperature=0.0,
    request_timeout=30,
    max_retries=3,
    timeout=60 * 3,
)

In [3]:
# if you use OpenAI API
openai_api_key = ""
llm = ChatOpenAI(
    api_key=openai_api_key,
    model=llm_name,
    temperature=0.0,
    request_timeout=30,
    max_retries=3,
    timeout=60 * 3,
)

In [4]:
with open("data/CondQA/train.json") as f:
    train = json.load(f)
with open("data/CondQA/dev.json") as f:
    dev = json.load(f)

with open("data/CondQA/documents.json") as f:
    documents = json.load(f)
url2doc = {d["url"]: d for i, d in enumerate(documents)}

In [5]:
# loading demonstrations for each component of the chain of prompts
with open("data/CondQA/ICL/doc2code/examples.json") as f:
    doc2code_examples = json.load(f)

with open("data/CondQA/ICL/question2code/examples.json") as f:
    question2code_examples = json.load(f)

with open("data/CondQA/ICL/QAcode2answer/span_examples.json") as f:
    qacode2span_answer_examples = json.load(f)

with open("data/CondQA/ICL/QAcode2answer/yes_no_examples.json") as f:
    qacode2yn_answer_examples = json.load(f)

In [None]:
## RUN THIS. DO NOT CHANGE
output_path = os.path.join(
    "outputs/ConditionalQA/ICL/CodePrompt/",
    llm_name,
    f"transl_icl{num_translation_examples}_interp_icl{num_interpreter_examples}",
    f"dev{start_idx}_{end_idx}",
    "w_txt_rationales",
)

# by default seed = 0. But it will be set to the number of runs if save_results = True
seed = 0
# make output path
if save_results:
    # creating the base folder
    os.makedirs(output_path, exist_ok=True)
    # creating the run folder
    num_runs = len(glob(os.path.join(output_path, "*")))
    seed = num_runs
    output_path = os.path.join(output_path, f"run_{num_runs}")
    os.makedirs(output_path, exist_ok=True)
    print(f"Output path: {output_path}")

model = CodePrompt(
    llm,
    question2code_examples,
    doc2code_examples,
    qacode2yn_answer_examples,
    qacode2span_answer_examples,
    use_rationales=RationaleType.TEXT,
    num_translation_examples=num_translation_examples,
    num_interpreter_examples=num_interpreter_examples,
    seed=seed,
)
list_answers = []
openai_metadata = {
    "completion_tokens": [],
    "total_cost": [],
    "total_tokens": [],
    "prompt_tokens": [],
}
pbar = tqdm(dev[start_idx : start_idx + num_dev_examples])
for x in pbar:
    question = x["scenario"] + ". " + x["question"]
    summarized_doc = get_summarized_doc(x, url2doc)
    with get_openai_callback() as cb:
        answer, inter_results = model(question, x["question"], summarized_doc)
        openai_metadata["completion_tokens"].append(cb.completion_tokens)
        openai_metadata["total_cost"].append(cb.total_cost)
        openai_metadata["total_tokens"].append(cb.total_tokens)
        openai_metadata["prompt_tokens"].append(cb.prompt_tokens)
    list_answers.append(
        {"id": x["id"], "answers": answer, "inter_results": inter_results}
    )
    pbar.set_description(
        f"Current total cost: {sum(openai_metadata['total_cost']):.2f}"
    )

results = evaluate(
    {d["id"]: d["answers"] for d in list_answers},
    dev[start_idx : end_idx],
)

if save_results:
    # store outputs
    with open(os.path.join(output_path, "output.json"), "w") as f:
        json.dump(list_answers, f)
    # store openai metadata
    with open(os.path.join(output_path, "openai_metadata.json"), "w") as f:
        json.dump(openai_metadata, f, indent=4)
    # store results
    with open(os.path.join(output_path, "results.json"), "w") as f:
        json.dump(results, f, indent=4)
    # store timestamp
    with open(os.path.join(output_path, "timestamp.txt"), "w") as f:
        f.write(get_current_time())

print("## OpenAI Metadata ##")
print_chain_stats(openai_metadata)
print("\n\n## Results ##")
print(json.dumps(results, indent=4))
print("\n\n## Timestamp ##")
print(get_current_time())