# How to create custom LangChain model with custom tools 

- Not everyone can pay money for LLM APIs or run even pretty small open models on their device and the most of excelent tutorials and resources are focused on those APIs, which makes learning LangChain or other similar tools more difficult than it needs to be. 

- Goal of this notebook is to give basic framework where you can use relatively small but performant model gemma_2b_it with LangChain and augment it with you own custom tools.

- References from LangChain documentation:
[How to create Custom Langchain LLM](https://python.langchain.com/docs/modules/model_io/llms/custom_llm/)
How to create custom tools: [one](https://python.langchain.com/docs/use_cases/tool_use/prompting/),  [two](https://python.langchain.com/docs/modules/tools/custom_tools/)

In [None]:
#%%capture --no-stderr          # this is basically for silencing warnings
%pip install -U langchain_community tiktoken langchainhub chromadb langchain langgraph faiss-cpu

!pip install -q bitsandbytes accelerate 

In [None]:
from typing import Any, List, Mapping, Optional

from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM

from langchain_core.tools import tool
from langchain_core.output_parsers import JsonOutputParser
from langchain.tools.render import render_text_description
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
#@title  for GPU

model_id = "google/gemma-2b-it"

bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)

### model.save_pretrained("./model.")
### model = AutoModelForCausalLM.from_pretrained("./model")

In [None]:
# print( len( list( model.parameters() ) ) , model.__dict__['_modules']['model'] )
# for param in model.parameters():
#      print(type(param), param.size())

# list( model.parameters() )

In [None]:
# from transformers import pipeline

# pipeline = pipeline(
#         "text-generation",
#         model=model,
#         tokenizer=tokenizer
#     )

# def model_gen(user_input: str = '', pipeline=pipeline):

#     messages = [{"role": "user", "content": user_input},]
#     prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
#     outputs = pipeline(
#         prompt,
#         max_new_tokens=256,
#         do_sample=True,
#         temperature=0.7,
#         top_k=50,
#         top_p=0.95
#     )


#     text = outputs[0]["generated_text"][len(prompt):]
#     # from IPython.display import Markdown, display
#     # display(Markdown(text))
#     return text   #, print(prompt)


# user_input = "write python function for adding two numbers"
# model_gen(user_input)

In [6]:
# Create custom LangChain model from gemma 

import re
import json

class Gemma(LLM):

    model: Any = None
    tokenizer: Any = None
    n: int = None

    def __init__(self, model, tokenizer, n):
        super(Gemma, self).__init__()
        self.model = model
        self.tokenizer = tokenizer
        self.n = n

    @property
    def _llm_type(self) -> str:
        return "Gemma"

    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str:

        input_ids = self.tokenizer(prompt, return_tensors="pt").to(device)
        outputs = self.model.generate(**input_ids, max_new_tokens=self.n)  #, temperature=0.0     # default self.n for gemma huggingface is 20
        text = self.tokenizer.decode(outputs[0])

        start_index, end_index = text.find("<eos>"), text.rfind("<eos>")
        answer_text = text[start_index+len("<eos>"):end_index].strip()

        pattern = r'\{.*?\}'
        matches = re.findall(pattern, answer_text)
        if matches:
            return json.dumps(matches[0])

        if stop is not None:
            raise ValueError("stop kwargs are not permitted.")

        return answer_text

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {"n": self.n}

llm =  Gemma(model, tokenizer, 256)

In [148]:
# Create Custo Tools:
# NOTE: when model is not explicitely trained on "function calling" we can still achieve that behavior. 
#       because the models are not actually calling functions, they just give us specifically structured
#       answers (that struture is predefined structure). These specific structures are important only because
#       they must match the structure of the input of our functions, so that we can use them for calling our functions.
#       So, llm does not do actual function calling (that is done by us), but we use LLMs for getting structured responses
#       that become inputs of our functions. 

@tool
def multiply(first_int: int, second_int: int) -> int:
    """Multiply two integers together."""
    return first_int * second_int

@tool
def add(first_int: int, second_int: int) -> int:
    "Add two integers."
    return first_int + second_int

@tool
def exponentiate(base: int, exponent: int) -> int:
    "Exponentiate the base to the exponent power."
    return base**exponent

tools = [add, exponentiate, multiply]
tool_map = {tool.name: tool for tool in tools}

system_prompt = f"""You are an assistant that has access to the following set of tools. Here are the names and descriptions for each tool:

{tools}

Given the user input, return the name and input of the tool to use. Return your response as a JSON blob with 'name' and 'arguments' keys."""

prompt = ChatPromptTemplate.from_messages(
    [("system", system_prompt), ("user", "{input}")]
)


import ast
def dictifier(string, tool_map=tool_map):

  if not ( string.endswith(')}') or string.endswith('}}') ):
      string = string + '}'
  dictionary = ast.literal_eval(string)

  chosen_tool = tool_map[dictionary["name"]]

  if isinstance(dictionary['arguments'], tuple):
    dictionary['arguments'] = {key: value for key, value in zip(chosen_tool.args.keys(), dictionary["arguments"])}

  return dictionary

def tool_chain(dictionary, tool_map=tool_map):
  chosen_tool = tool_map[dictionary["name"]]
  return itemgetter("arguments") | chosen_tool

chain = prompt | llm | JsonOutputParser() | dictifier | RunnablePassthrough.assign(output=tool_chain) ## or instead just do: | tool_chain
chain.invoke({"input": "what's thirteen exponent 4"})

{'name': 'exponentiate',
 'arguments': {'base': 13, 'exponent': 4},
 'output': 28561}