In [1]:
import os
os.chdir('../../')

In [2]:
from langchain.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

from llm_research.model import Prompt


system_template = """\
You are an experienced expert in translating English addresses to Traditional Chinese.
Your task is to translate the English address to Traditional Chinese using Json format.
"Notice: Do not include the country and postal code in your response".\
"""
system_prompt_template = PromptTemplate.from_template(system_template)
system_prompt_path = 'examples/english_address_to_chinese/prompt/system.json'
system_prompt_template.save(system_prompt_path)


human_template = """\
{instructions}
Translate the following address in Traditional Chinese:
{owner_address}
Output Instructions:
{output_instructions}
Besides, don't forget to escape a single quote in your response json string.
"""
human_prompt_template = PromptTemplate.from_template(human_template)
human_prompt_path = 'examples/english_address_to_chinese/prompt/human.json'
human_prompt_template.save(human_prompt_path)


class LLMResponse(BaseModel):
    translated_address: str = Field(description="the translated address in Traditional Chinese")
prompt = Prompt(LLMResponse, system_prompt_path, human_prompt_path)

In [3]:
from llm_research.model.utils import read_jsonl
# query data
read_jsonl('examples/english_address_to_chinese/data.jsonl')

[{'owner_id': 240220054,
  'owner_address': 'NO.56, LN. 125, XINXING ST., YANGMEI DIST.,  TAOYUAN CITY 326, TAIWAN (R.O.C.)'},
 {'owner_id': 240220055,
  'owner_address': '6F., NO.39, FUHE ST., SANCHONG DIST., NEW TAIPEI  CITY 241, TAIWAN (R.O.C.)'},
 {'owner_id': 240220097,
  'owner_address': 'NO. 1, LN. 200,  JUGUANG RD., BANQIAO DIST., NEW  TAIPEI CITY 22042, TAIWAN (R.O.C.)'},
 {'owner_id': 240220115,
  'owner_address': 'NO. 343, XISHI RD., FENGYUAN, TAICHUNG COUNTY,  TAIWAN, R.O.C'},
 {'owner_id': 240220115,
  'owner_address': 'NO.42-3, SEC. 3, YATAN RD., TANZI DIST., TAICHUNG CITY,  TAIWAN (R.O.C.)'},
 {'owner_id': 240220115,
  'owner_address': '16F-3, NO.89, SHIZHENG RD., XITUN DIST., TAICHUNG CITY,  TAIWAN (R.O.C.)'},
 {'owner_id': 240220116,
  'owner_address': 'NO. 343, XISHI RD., FENGYUAN DISTRICT, TAICHUNG CITY,  TAIWAN, R.O.C'},
 {'owner_id': 240220118,
  'owner_address': 'NO. 6, ALY. 5, LN. 172,  CHANG’AN W. RD., DATONG DIST.,  TAIPEI CITY 10349, TAIWAN (R.O.C.)'},
 {'owne

In [4]:
# fewshot examples
read_jsonl('examples/english_address_to_chinese/fewshot_examples.jsonl')

[{'owner_address': 'NO.56, LN. 125, XINXING ST., YANGMEI DIST.,  TAOYUAN CITY 326, TAIWAN (R.O.C.)',
  'translated_address': '桃園市楊梅區新興街125巷56號'},
 {'owner_address': '6F., NO.39, FUHE ST., SANCHONG DIST., NEW TAIPEI  CITY 241, TAIWAN (R.O.C.)',
  'translated_address': '新北市三重區福和街39號6樓'},
 {'owner_address': 'NO. 1, LN. 200,  JUGUANG RD., BANQIAO DIST., NEW  TAIPEI CITY 22042, TAIWAN (R.O.C.)',
  'translated_address': '新北市板橋區莒光路200巷1號'}]

In [5]:
from llm_research.model import OpenAILLM
model = OpenAILLM(model="gpt-3.5-turbo-1106", temperature=0., timeout=120, verbose=True)
model.init_request(experiment_name='10-test', run_name='chatgpt3.5')
model.request_batch(
    prompt,
    'examples/english_address_to_chinese/data.jsonl',
    'examples/english_address_to_chinese/fewshot_examples.jsonl'
)
model.end_request()

2024/01/26 12:11:38 INFO mlflow.tracking.fluent: Experiment with name '10-test' does not exist. Creating a new experiment.
100%|██████████| 10/10 [00:48<00:00,  4.83s/it]
