In [1]:
import os, sys
os.chdir('../')
sys.path.append('./src')

In [2]:
from langchain.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

from llm_research.model import Prompt
from llm_research.model import OpenAILLM

## Preprocess

In [3]:
from src.process import csv2jsonl, split_jsonl

csv2jsonl('data/raw/address_traslate.csv', 'data/processed/address_translate.jsonl', write=True)
split_jsonl('data/processed/address_translate.jsonl', n=100)

## Set up the prompt

In [2]:
system_template = """\
You are an experienced expert in translating English addresses to Traditional Chinese.
Your task is to translate the English address to Traditional Chinese using Json format.
"Notice: Do not include the country and postal code in your response".\
"""
system_prompt_template = PromptTemplate.from_template(system_template)
system_prompt_template.save('data/raw/prompt/system.json')


human_template = """\
{instructions}
Translate the following address in Traditional Chinese:
{owner_address}
Output Instructions:
{output_instructions}
Besides, don't forget to escape a single quote in your response json string.\
"""
human_prompt_template = PromptTemplate.from_template(human_template)
human_prompt_template.save('data/raw/prompt/human.json')

## Setup the prompt, model and the output data class

In [3]:
class LLMResponse(BaseModel):
    translated_address: str = Field(description="the translated address in Traditional Chinese")


prompt = Prompt(LLMResponse, 'data/raw/prompt/system.json', 'data/raw/prompt/human.json')
model = OpenAILLM(model="gpt-4-1106-preview", temperature=0., timeout=120, verbose=True)

## Query LLM

In [3]:
model.init_request(experiment_name='test-100', run_name='gpt-4-1106-warmup')
model.request_batch(
    prompt,
    'data/processed/test_100.jsonl',
    'data/raw/fewshot_examples.jsonl'
)
model.end_request()

2024/01/30 16:14:29 INFO mlflow.tracking.fluent: Experiment with name 'test-100' does not exist. Creating a new experiment.
100%|██████████████████████████████████████████| 100/100 [09:56<00:00,  5.96s/it]


In [None]:
model = OpenAILLM(model="gpt-4-1106-preview", temperature=0., timeout=120, verbose=True)
model.init_request(experiment_name='test-100', run_name='gpt-4-1106-remained')
model.request_batch(
    prompt,
    'data/processed/test_100_remained.jsonl',
    'data/raw/fewshot_examples.jsonl'
)
model.end_request()

## Merge the result