In [1]:
print(1)

1


# Use langchain tool

In [None]:
# I already did this somewhere. Find? 
# I think we did this with Kostya

In [None]:
from langchain import hub
from langchain_community.chat_models import ChatOpenAI
from langchain.agents import AgentExecutor, create_structured_chat_agent

prompt = hub.pull("hwchase17/structured-chat-agent")
model = ChatOpenAI()
tools = ...

agent = create_structured_chat_agent(model, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools)

agent_executor.invoke({"input": "hi"})

# Using with chat history
from langchain_core.messages import AIMessage, HumanMessage
agent_executor.invoke(
    {
        "input": "what's my name?",
        "chat_history": [
            HumanMessage(content="hi! my name is bob"),
            AIMessage(content="Hello Bob! How can I assist you today?"),
        ],
    }
)

In [14]:
# tool 1
import random
def generate_random_number():
    """Generate a random number between 1 and 100"""
    return random.randint(1, 100)
tool_1 = generate_random_number

In [15]:
# tool 2 
def save_text_to_file(text: str, file_path: str):
    """Save text to a file. Only allow relative paths within pwd."""
    if file_path.startswith("/") or ".." in file_path:
        raise ValueError("Only relative paths are allowed")
    with open(file_path, "w") as file:
        file.write(text)
tool_2 = save_text_to_file

In [None]:
from typing import Type

from langchain_community.tools.office365.base import BaseTool
from langchain_core.pydantic_v1 import BaseModel


class Tool1Schema(BaseModel):
    """Input for Tool1."""


from nltk.corpus import wordnet as wn

adjs = list(wn.all_synsets(wn.ADJ))
nouns = list(wn.all_synsets(wn.NOUN))


def generate_random_collocation():
    """Generate a random pair of words - adj + noun
    use nltk corpus for this"""
    import random
    adj = random.choice(adjs)
    noun = random.choice(nouns)
    # Extract the first lemma name from each synset to get the actual words
    adj_word = adj.lemmas()[0].name()
    noun_word = noun.lemmas()[0].name()

    # Return the formatted string
    return f"{adj_word} {noun_word}"


class RandomCollocationTool(BaseTool):
    """generate a random pair adj + noun"""

    name: str = "Random Word Pair Generator"
    description: str = (
        """Use this tool to generate a random pair of words - an adjective and a noun."""
    )
    args_schema: Type[Tool1Schema] = Tool1Schema

    usage_count = 0
    results = []

    def _run(
            self,
    ) -> str:
        self.usage_count += 1
        result = tool_1()
        self.results.append(result)
        return result


In [10]:
import inspect
from pydantic import BaseModel, create_model
from typing import Any, Callable

def create_pydantic_model_from_func(func: Callable) -> Any:
    # Get the function signature
    signature = inspect.signature(func)
    # Extract function name for documentation purposes
    func_name = func.__name__
    
    # Prepare field definitions for the Pydantic model
    fields = {}
    
    for name, param in signature.parameters.items():
        
        # Type annotation handling (default to Any if not specified)
        field_type = param.annotation if param.annotation != inspect.Parameter.empty else Any
        
        # Check if there is a default value
        default = None
        if param.default != inspect.Parameter.empty:
            default = param.default

        # Add the field to the dictionary
        fields[name] = (field_type, default)
    
    # Create the Pydantic model dynamically
    return create_model(f'{func_name.title()}Model', **fields)

# Example usage
def my_function(a: int, b: str = 'default', c: float = 10.5):
    pass

# Create the Pydantic model
ToolSchema = create_pydantic_model_from_func(my_function)

# Show what the model looks like
print(ToolSchema.schema_json(indent=2))


{
  "properties": {
    "a": {
      "default": null,
      "title": "A",
      "type": "integer"
    },
    "b": {
      "default": "default",
      "title": "B",
      "type": "string"
    },
    "c": {
      "default": 10.5,
      "title": "C",
      "type": "number"
    }
  },
  "title": "My_FunctionModel",
  "type": "object"
}


In [11]:
t = ToolSchema(a=1, b='test', c=3.14)
t2 = ToolSchema(a=2)
t, t2

(My_FunctionModel(a=1, b='test', c=3.14),
 My_FunctionModel(a=2, b='default', c=10.5))

In [34]:
# now, let's write a method that generates a properly formatted langchain tool from a function
# if the necessary metadata is missing - generate it using ai and function code or just the signature
from src.lib import simple_query_openai
import inspect


PROMPT = f"""You're description generator. You generate descriptions for functions given their source code. You output the name and description as a simple json object.
Take into account original function name and dosctring, if present, but don't rely on them too much. Use the function signature and code to generate the description. YOU SHOULD OVERRIDE INCORRECT NAMES AND DESCRIPTIONS.
OUTPUT EXAMPLE:
{{
    "name": "function_name",
    "description": "function_description"
}}
"""
WARMUP_MESSAGES = [{"role": "user", "content": """
def sum_a_b(a: int, b: int) -> int:
    return a + b
"""}, {"role": "assistant", "content": """
{"name": "sum_two_numbers", "description": "A function that takes two numbers and returns their sum."}
"""}, {"role": "user", "content": """
def func(n):
   "This function does amaing magic and performs super complex math with number and guesses what you wanted to do!"
   res = 0
   for _ in range(n):
       res *= n
    return res
"""}, {"role": "assistant", "content": """
{"name": "factorial", "description": "A function that calculates the factorial of a number."}
"""}
                   ]

import json
def generate_tool_description(code,model="gpt-3-turbo",
                              num_retries=3):
    attempts = []
    for _ in range(num_retries):
        try:
            result = simple_query_openai(code, model=model, system=PROMPT, messages=WARMUP_MESSAGES)
            data = json.loads(result)
            if "name" not in data:
                raise ValueError("Name not found in the response:" + result)
            if "description" not in data:
                raise ValueError("Description not found in the response:" + result)
            return data
        except Exception as e:
            attempts.append(e)
    else:
        raise ValueError("Failed to generate a description for the function, attempts:", attempts)

def generate_tool_from_function(func):
    # todo: use cached metadata if available.
    #  where should it be located? in the same directory as the function?
    #  or in the current working directory?
    #  use environment variables to specify the location of the metadata cache.
    #  what is default?
    #  option 1: some static file / dir on the disk - e.g. ~/.langchain
    #  option 2: cur dir
    #  option 3: lib dir
    
    # get the function code
    tool_code = inspect.getsource(func)
    
    tool_name = func.__name__
    # todo: check if the func name is adequate for what it does.
    
    
        
    
    tool_docstring = inspect.getdoc(func)
    if tool_docstring is None:
        # todo: rewrite this, make this much smarter
        #  a) use the function signature to generate a docstring
        #  b) use the function code to generate a docstring
        tool_docstring = simple_query_openai(
            f"What does the function {func.__name__} do?")
    
    tool_description = None
    
    tool_schema = create_pydantic_model_from_func(func)
    
    class Tool(BaseTool):
        f"""{tool_docstring}"""
        
        name: str = func_name
        description: str = docstring
        args_schema: Type[tool_schema] = tool_schema
        
        def _run(self, *args, **kwargs): # todo: should i annotate the return type here?
            # todo: how do I pass the arguments to the function?
            return func(*args, **kwargs)
    
    return Tool


In [19]:
code = inspect.getsource(tool_1)
res1 = generate_tool_description(code)

In [32]:
# todo: now, test with a function witha really stupid name and wrong docstring
def stupid_function(a: int, b: int = 1, c: float = 10.5):
    """This function is a test function that does nothing."""
    # but in fact this should do something.
    # complex and meaningful stuff
    if a == 0:
        return b
    return stupid_function(b % a, a)
res2 = generate_tool_description(inspect.getsource(stupid_function), model="gpt-4-turbo")

In [33]:
res2

{'name': 'stupid_function',
 'description': 'A recursive function that calculates the greatest common divisor (GCD) using the Euclidean algorithm, but defaults to returning the second parameter if the first parameter is zero.'}

In [35]:
res3 = generate_tool_description(inspect.getsource(stupid_function), model="gpt-4-turbo")
res3

{'name': 'calculate_gcd',
 'description': 'A function that calculates the greatest common divisor (GCD) of two integers using Euclidean algorithm.'}