In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, PeftConfig

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

print(f'device: {device}')

quant_config = BitsAndBytesConfig(load_in_8bit=True)

model_path = '../Meta-Llama-3-8B-Instruct'
peft_model_path = 'llama-3-8b-lora'

tokenizer = AutoTokenizer.from_pretrained('../Meta-Llama-3-8B-Instruct')
model = AutoModelForCausalLM.from_pretrained('../Meta-Llama-3-8B-Instruct', quantization_config=quant_config, device_map={"": 0})
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
model.config.use_cache = False
model.config.pretraining_tp = 1
model.generation_config.pad_token_id = tokenizer.pad_token_id
model.config.pad_token_id = tokenizer.pad_token_id

config = PeftConfig.from_pretrained(peft_model_path)

model = get_peft_model(model, peft_config=config)

from transformers import pipeline
from langchain_huggingface import HuggingFacePipeline

pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, max_new_tokens=1024, return_full_text=False)
llm = HuggingFacePipeline(pipeline=pipe)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


device: cuda:0


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM'

In [3]:
from langchain.agents.output_parsers import ReActSingleInputOutputParser


In [4]:
from chain_items import PromptValueToChatML, ChatTemplateApplier
from chain_items.static import generate_history_passthrough, generate_gemma2_chat_prompt_template, generate_tools  

tools = generate_tools()
agent = generate_history_passthrough() | generate_gemma2_chat_prompt_template(tools) | PromptValueToChatML() | ChatTemplateApplier(tokenizer=tokenizer) | llm | ReActSingleInputOutputParser()


In [5]:
print(
  agent.invoke({
    'input': 'who is president of south korea now?',
    # 'input': 'please give me a address of myongji university yongin campus.',
    'intermediate_steps': [
      # (AgentAction('searxng', 'president of south korea', 'Thought: I should search for the recent web results to find the answer.\n\nAction: searxng\n\nAction Input: "current president of south korea"'), 'yoon seok yeol')
    ]
  })
)


input messages=[HumanMessage(content='Answer the following questions as best you can. You have access to the following tools:\n\nsearxng(*args, **kargs) - Search searxng for recent web results\n\nUse only one of following formats:\n\nThought: you should always think about what to do\nAction: the action to take, should be one of [searxng]\nAction Input: the input to the action\n\nor\n\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: who is president of south korea now?')]
tool='searxng' tool_input='current president of South Korea' log='Thought: I should check the recent web results to find the current president of South Korea.\n\nAction: searxng\n\nAction Input: "current president of South Korea"'


In [6]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True, max_iterations=3, return_intermediate_steps=True)


In [7]:
res = agent_executor.invoke(input={'input': 'what is reinforcement learning?'})

print(f'output: {res['output']}')




[1m> Entering new AgentExecutor chain...[0m
input messages=[HumanMessage(content='Answer the following questions as best you can. You have access to the following tools:\n\nsearxng(*args, **kargs) - Search searxng for recent web results\n\nUse only one of following formats:\n\nThought: you should always think about what to do\nAction: the action to take, should be one of [searxng]\nAction Input: the input to the action\n\nor\n\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: what is reinforcement learning?')]
[32;1m[1;3mThought: I should search for recent web results on "reinforcement learning" to get a better understanding of the concept.

Action: searxng

Action Input: "reinforcement learning"[0m[36;1m[1;3mReinforcement learning (RL) is a machine learning (ML) technique that trains software to make decisions to achieve the most optimal results. It mimics the trial-and-error learning process that human