In [64]:
from langchain_core.language_models import BaseChatModel
from langchain_core.pydantic_v1 import root_validator, Field
from langchain_core.messages import BaseMessage, AIMessage
from langchain_core.outputs import ChatGeneration, ChatResult
from typing import Any, Dict, List, Optional

from mlx_lm import load as mlx_load
from mlx_lm import generate as mlx_generate

class MLXChatModel (BaseChatModel) :
    mlx_path: str
    mlx_model: Any = Field(default = None, exclude = True)
    mlx_tokenizer: Any = Field(default = None, exclude = True)
    max_tokens: int = Field(default = 250)

    @property
    def _llm_type(self) -> str:
        return 'MLXChatModel'

    @root_validator
    def load_model(cls, values: Dict) -> Dict:
        
        # Loading the model and tokenizer with the input string
        model, tokenizer = mlx_load(path_or_hf_repo=values['mlx_path'])

        # Saving the variables back appropriately
        values['mlx_model'] = model
        values['mlx_tokenizer'] = tokenizer

        return values
    
    def _generate(self, messages: List[BaseMessage], stop: Optional[List[str]]) -> ChatResult:
        prompt = '''
        You are a helpful AI assistant that helps people with their problems.
        
        '''

        for message in messages:
            prompt += f'\n\n{message.content}'

        mlx_response = mlx_generate(
            model = self.mlx_model,
            tokenizer = self.mlx_tokenizer,
            max_tokens = self.max_tokens,
            prompt = prompt
        )

        return ChatResult(generations= [ChatGeneration(message = AIMessage(content = mlx_response))])

In [65]:
mlx_llm = MLXChatModel(mlx_path = "/Users/austinlackey/.cache/huggingface/hub/models--mlx-community--llama2-13b-qnt4bit/snapshots/02e61fe8846d8b9c94d658227410ab98af56b063")

In [68]:
from langchain_core.tools import tool


@tool
def multiply(first_int: int, second_int: int) -> int:
    """Multiply two integers together."""
    return first_int * second_int

multiply.invoke({"first_int": 4, "second_int": 5})

20

In [69]:
from langchain.tools.render import render_text_description

rendered_tools = render_text_description([multiply])
rendered_tools
from langchain_core.prompts import ChatPromptTemplate

system_prompt = f"""You are an assistant that has access to the following set of tools. Here are the names and descriptions for each tool:

{rendered_tools}

Given the user input, return the name and input of the tool to use. Return your response as a JSON blob with 'name' and 'arguments' keys."""

prompt = ChatPromptTemplate.from_messages(
    [("system", system_prompt), ("user", "{input}")]
)

In [71]:
from langchain_core.output_parsers import JsonOutputParser

chain = prompt | mlx_llm
chain.invoke({"input": "what's thirteen times 4"})

AIMessage(content='?\n\nPlease provide the JSON response.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n', id='run-bb7943a9-9295-41d8-84d2-8efaf23146ba-0')

In [67]:
from langchain import hub

from langchain.agents import AgentExecutor, create_tool_calling_agent
prompt = hub.pull("hwchase17/openai-tools-agent")

agent = create_tool_calling_agent(mlx_llm, tools, prompt)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)



NotImplementedError: 

In [53]:
from langchain.agents import initialize_agent
from langchain.chains.conversation.memory import ConversationBufferWindowMemory

conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)

tools = [FieldCodeTool()]

agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=mlx_llm,
    verbose=True,
    max_iterations=2,
    early_stopping_method='generate',
    memory=conversational_memory
)

In [55]:
print(agent.agent.llm_chain.prompt.messages[0].prompt.template)

Assistant is a large language model trained by OpenAI.

Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.

Overall, Assistant is a powerful system that can help with a wide range of task

In [54]:
agent("The field name Nemo Bridge is not working. Can you help me with the field code?")



[1m> Entering new AgentExecutor chain...[0m


ValueError: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Could not parse LLM output: 

Expected output:

A json blob with a single action, such as:

{
    "action": "Field Code Tool",
    "action_input": "Nemo Bridge"
}











































































































































































































