In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

print(f'device: {device}')

tokenizer = AutoTokenizer.from_pretrained('../gemma-2-9b-it')
model = AutoModelForCausalLM.from_pretrained('../gemma-2-9b-it', torch_dtype=torch.float16, device_map={'': 0})
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.pad_token_id = tokenizer.eos_token_id
# model.config.use_cache = False
# model.config.pretraining_tp = 1
# model.generation_config.pad_token_id = tokenizer.pad_token_id
# model.config.pad_token_id = tokenizer.pad_token_id

from transformers import pipeline
from langchain_huggingface import HuggingFacePipeline

pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, max_new_tokens=1024, return_full_text=False)
llm = HuggingFacePipeline(pipeline=pipe)


device: cuda:0


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [2]:
from langchain.agents.output_parsers import ReActSingleInputOutputParser


In [3]:
from chain_items import PromptValueToChatML, ChatTemplateApplier
from chain_items.static import generate_history_passthrough, generate_gemma2_chat_prompt_template, generate_tools  

tools = generate_tools()
agent = generate_history_passthrough() | generate_gemma2_chat_prompt_template(tools) | PromptValueToChatML() | ChatTemplateApplier(tokenizer=tokenizer) | llm | ReActSingleInputOutputParser()


In [4]:
print(
  agent.invoke({
    'input': 'who is president of south korea now?',
    # 'input': 'please give me a address of myongji university yongin campus.',
    'intermediate_steps': [
      # (AgentAction('searxng', 'president of south korea', 'Thought: I should search for the recent web results to find the answer.\n\nAction: searxng\n\nAction Input: "current president of south korea"'), 'yoon seok yeol')
    ]
  })
)


input messages=[HumanMessage(content='Answer the following questions as best you can. You have access to the following tools:\n\nsearxng(*args, **kargs) - Search searxng for recent web results\n\nUse only one of following formats:\n\nThought: you should always think about what to do\nAction: the action to take, should be one of [searxng]\nAction Input: the input to the action\n\nor\n\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: who is president of south korea now?')]
tool='searxng' tool_input='president of south korea \n' log='Action: searxng\nAction Input: president of south korea \n'


In [5]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True, max_iterations=3, return_intermediate_steps=True)


In [7]:
res = agent_executor.invoke(input={'input': 'what is reinforcement learning?'})

print(f'output: {res['output']}')




[1m> Entering new AgentExecutor chain...[0m
input messages=[HumanMessage(content='Answer the following questions as best you can. You have access to the following tools:\n\nsearxng(*args, **kargs) - Search searxng for recent web results\n\nUse only one of following formats:\n\nThought: you should always think about what to do\nAction: the action to take, should be one of [searxng]\nAction Input: the input to the action\n\nor\n\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: what is reinforcement learning?')]
[32;1m[1;3mThought: I need to search the web for information about reinforcement learning. 
Action: searxng
Action Input: reinforcement learning  
[0m[36;1m[1;3mReinforcement learning (RL) is a machine learning (ML) technique that trains software to make decisions to achieve the most optimal results. It mimics the trial-and-error learning process that humans use to achieve their goals.[0minput mes