In [12]:
import os
import json

In [13]:
import openai
from time import sleep
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  # for exponential backoff

openai.api_key = ''


In [14]:
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def evaluate(input, instruction=None, fs_prompt=None, 
             verbose=False):
    prompt = generate_prompt(input, instruction, fs_prompt)
    if verbose: 
        print("****PROMPT:****")
        print(prompt)
        print("****END PROMPT****")
    messages = [{'role': 'user',
                 'content': prompt}]
    response = openai.ChatCompletion.create(
      model="gpt-3.5-turbo-0301",
      messages=messages,
      temperature=0.7,
      top_p=0.9
    )
    # print(response)
    # print("*"*20)
    result = ''
    for choice in response.choices:
        result += choice.message.content.strip()
    return result.strip(), response['usage']['total_tokens']

def construct_few_shot_prompt(train_file_path, n=10):
    # prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request."
    # sample 10 instances from train.json
    with open(train_file_path, "r") as f:
        train_samples = json.load(f)
        train_samples = train_samples[:n]
    for i, row in enumerate(train_samples):
        instruction, input, response = row["instruction"], row["input"], row["output"]
        if i ==0: prompt = f"""{instruction}\nFor example:\n"""
        prompt += f"""{input}
{response}

"""
    prompt = f"{prompt}For the following sentence, {instruction.lower()}"
    return prompt.strip()

def generate_prompt(input, instruction=None, fs_prompt=None):
    if input and instruction:
        return f"""{instruction}
{input}"""
    if input and fs_prompt:
        return f"""{fs_prompt}
        
{input}"""

def run_inference(input_json, out_dir,
                   model_name, 
                   n_shots = True, train_file_path=None,
                   verbose=False, stop_idx=10000):
    
    output = []
    with open( input_json,'r') as fp:
        test_file = json.load(fp)

    total_tokens = 0
    for i, line in enumerate(test_file):
        if i > stop_idx: break

        print(f"INSTANCE {i}, percentage: {i/len(test_file)}")
        print(f"INSTRUCTION: {line['instruction']}\nINPUT: {line['input']}\n")

        if n_shots == 0:
            pred = evaluate(input=line['input'], instruction=line['instruction'], 
                            verbose=verbose)
        else:
            fs_prompt = construct_few_shot_prompt(train_file_path, n=n_shots)
            pred, tokens = evaluate(input=line['input'], fs_prompt=fs_prompt,
                            verbose=verbose)
            total_tokens += tokens
            print(f"CURRENT_TOTAL_COST: {(total_tokens/1000)*0.002}\n")
        output += [{"instruction": line['instruction'],
                "input": line['input'],
                "pred": pred,
                "id": i
                }]
        
        print("RESPONSE:\n", pred)
        print("-"*50)

        if i%10==0:
            with open(f"{out_dir}/{model_name}_test_output.json",'w', encoding='utf-8') as fp:
                json.dump(output, fp, indent=4)
    # final save
    with open(f"{out_dir}/{model_name}_test_output.json",'w', encoding='utf-8') as fp:
            json.dump(output, fp, indent=4)
    return output

In [None]:
out_dir = "../data/gyafc_w_ICHF_alpaca/informal_to_formal"
assert os.path.isdir(out_dir)
input_json = "../data/gyafc_w_ICHF_alpaca/informal_to_formal/test.json"
assert os.path.isfile(input_json)
train_file_path = "../data/gyafc_w_ICHF_alpaca/informal_to_formal/train.json"
assert os.path.isfile(train_file_path)

# fs_prompt = construct_few_shot_prompt(train_file_path, n=3)
# print(generate_prompt(input="test", fs_prompt=fs_prompt))

run_inference(input_json, out_dir, 
              "chatGPT",
              n_shots=1, train_file_path=train_file_path,
              verbose=False, stop_idx=10000)

In [None]:
out_dir = "../data/gyafc_w_ICHF_alpaca/formal_to_informal"
assert os.path.isdir(out_dir)
input_json = "../data/gyafc_w_ICHF_alpaca/formal_to_informal/test.json"
assert os.path.isfile(input_json)
train_file_path = "../data/gyafc_w_ICHF_alpaca/formal_to_informal/train.json"
assert os.path.isfile(train_file_path)

run_inference(input_json, out_dir, 
              "chatGPT",
              n_shots=1, train_file_path=train_file_path,
              verbose=False, stop_idx=10000)