In [1]:
import os, sys
os.chdir('../')
sys.path.append('./src')

In [2]:
from langchain.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

from llm_research.model import Prompt
from llm_research.model import OpenAILLM

## Set up the prompt

In [3]:
system_template = """\
You are an experienced expert in translating English addresses to Traditional Chinese.
Your task is to translate the English address to Traditional Chinese using Json format.

"Note: Do not include the country and postal code in your response."
"Note: Use '臺' instead of '台' whenever possible; for example, '臺北市' is preferable to '台北市'."
"Note: Translate '-' to '之'; for example, 'NO.42-3' should be translated as '42之3號', not '42-3號'."
"Note: If the address is not in Taiwan, translate it as -1, refering to the 5th example."\
"""
system_prompt_template = PromptTemplate.from_template(system_template)
system_prompt_template.save('data/raw/prompt/system.json')


human_template = """\
{instructions}
Translate the following address in Traditional Chinese:
{owner_address}
Output Instructions:
{output_instructions}
Besides, don't forget to escape a single quote in your response json string.\
"""
human_prompt_template = PromptTemplate.from_template(human_template)
human_prompt_template.save('data/raw/prompt/human.json')

## Setup the prompt, model and the output data class

In [4]:
class LLMResponse(BaseModel):
    translated_address: str = Field(description="the translated address in Traditional Chinese")


prompt = Prompt(LLMResponse, 'data/raw/prompt/system.json', 'data/raw/prompt/human.json')
model = OpenAILLM(model="gpt-4-1106-preview", temperature=0., timeout=120, verbose=True)

## Query LLM

In [12]:
model.init_request(experiment_name='translated_address', run_name='gpt-4-1106-formal')
model.request_batch(
    prompt,
    'data/processed/address_translate.jsonl',
    'data/raw/fewshot_examples.jsonl'
)

100%|██████████| 1800/1800 [2:15:04<00:00,  4.50s/it]  


In [11]:
model.end_request()