In [1]:
from langchain_core.pydantic_v1 import BaseModel, Field
class Triple(BaseModel):
    subject : str = Field(description="subject")

In [2]:
from langchain_experimental.llms import LMFormatEnforcer
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain.output_parsers import PydanticOutputParser,OutputFixingParser
from langchain.prompts import PromptTemplate

In [9]:
from transformers import AutoConfig, AutoModelForCausalLM, GPT2TokenizerFast,  AutoTokenizer
from transformers import pipeline
from transformers import BitsAndBytesConfig
import torch

In [28]:
auto_config = AutoConfig.from_pretrained("beomi/Llama-3-Open-Ko-8B", trust_remote_code = True)


nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4")

model = AutoModelForCausalLM.from_pretrained(
    "beomi/Llama-3-Open-Ko-8B",
    trust_remote_code = True, 
    device_map="auto",
    cache_dir = '/data',
    torch_dtype=torch.float16,
    attn_implementation="flash_attention_2",
)

Loading checkpoint shards: 100%|██████████| 6/6 [00:10<00:00,  1.70s/it]


In [37]:
tokenizer = AutoTokenizer.from_pretrained("beomi/Llama-3-Open-Ko-8B", use_fast=True, trust_remote_code = True, padding_side='left', truncation_side='left')
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left' # to prevent errors with FA
tokenizer.truncation_side = 'left' # to prevent cutting off last generation

llm = pipeline(
    "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=128, pad_token_id=model.config.eos_token_id,
)   

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [38]:
chat = [
    {'role':'system', 'content': "You are a helpful, respectful and honest assistant."},
    {"role": "user", "content": "{instruction}\n{format_instructions}\ntext:```{query}```\n"},    
]

In [39]:
PROMPT_TEMPLATE = PromptTemplate(  ## TODO : change template automatically
    template=tokenizer.apply_chat_template(chat, tokenize=False),
    input_variables=["instruction", "format_instructions", "query"],
)

In [40]:
data = ["My name is dokyoon."]

In [41]:
prompt_and_model = PROMPT_TEMPLATE | llm

In [42]:
prompts = PROMPT_TEMPLATE.format(
        instruction="Please extract the subject from the following text.", 
        format_instructions=Triple.schema(), #  parser.get_format_instructions(), 
        query=data)
    
lm_format_enforcer = LMFormatEnforcer(
    json_schema=Triple.schema(), pipeline=llm
)


In [43]:
results = lm_format_enforcer.generate(data)

In [48]:
parser = PydanticOutputParser(pydantic_object=Triple)

In [49]:
for generation in results.generations:
    parsed_output = parser.invoke(generation[0].text)

In [50]:
print(parsed_output)

subject='from'
