# Local Model Notebook loader
## This is for people who want to test langchain or other agent/agi related code in a notebook


## ⚠️Llama-cpp users🦙⚠️
If you are using Llama-cpp you can skip down to the llama cpp cell

If your Llama uses gpu then dont skip
# Text-generation-webui related code
## Load Required Libraries and Modules
The first step is to load all the required libraries and modules:

In [None]:
!pip install langchain

In [2]:
import sys
sys.argv = [sys.argv[0]]
import importlib
import json
import math
import os
import re
import sys
import time
import traceback
from functools import partial
from pathlib import Path
from threading import Lock
sys.path.append(str(Path().resolve().parent / "modules"))

import modules.extensions as extensions_module
from modules import chat, presets, shared, training, ui, utils
from modules.extensions import apply_extensions
from modules.github import clone_or_pull_repository
from modules.html_generator import chat_html_wrapper
from modules.LoRA import add_lora_to_model
from modules.models import load_model, unload_model
from modules.text_generation import (generate_reply_wrapper,
                                     get_encoded_length, stop_everything_event)


import torch
torch.cuda.set_device(0)

# Parameters and command-line flags

input your command line arguments like you would when launching server.py [complete list](https://github.com/oobabooga/text-generation-webui#basic-settings)

Example: --auto-devices --wbits 4 --groupsize 128 --no-stream


In [3]:
from modules.shared import parser

def parse_input_string(input_string):
    input_args = input_string.split()
    return parser.parse_args(input_args)

input_string = input('Enter args string: ')
shared.args = parse_input_string(input_string)
# Load custom settings from a JSON file
settings_file = None
if shared.args.settings is not None and Path(shared.args.settings).exists():
    settings_file = Path(shared.args.settings)
elif Path('settings.json').exists():
    settings_file = Path('settings.json')

if settings_file is not None:
    print(f"Loading settings from {settings_file}...")
    new_settings = json.loads(open(settings_file, 'r').read())
    for item in new_settings:
        shared.settings[item] = new_settings[item]

shared.settings['seed'] = -1


# Choose your model

In [None]:
# Function to get available models
def get_available_models():
    if shared.args.flexgen:
        return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower)
    else:
        return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)

# Get the list of available models
available_models = get_available_models()

# Set the model name
if shared.args.model is not None:
    shared.model_name = shared.args.model
else:
    if len(available_models) == 0:
        print('No models are available! Please download at least one.')
        sys.exit(0)
    elif len(available_models) == 1:
        i = 0
    else:
        print('The following models are available:\n')
        for i, model in enumerate(available_models):
            print(f'{i+1}. {model}')
        print(f'\nWhich one do you want to load? 1-{len(available_models)}\n')
        i = int(input()) - 1
        print()
    shared.model_name = available_models[i]


# Load Model and Tokenizer

In [None]:
# Load the model and tokenizer
shared.model, shared.tokenizer = load_model(shared.model_name)

# Add Lora to the model if specified
if shared.args.lora:
    add_lora_to_model(shared.args.lora)

# Set up the tokenizer and model variables
tokenizer = shared.tokenizer
base_model = shared.model

# Create Text-Generation Pipeline
## We create a text-generation pipeline with the specified parameters:
Feel free to change these to best fit your model/usage


In [None]:
# Create a text-generation pipeline with the specified parameters
pipe = pipeline(
    "text-generation",
    model=base_model, 
    tokenizer=tokenizer,
    device=0,
    max_length=1200,
    temperature=0.6,
    top_p=0.95,
    repetition_penalty=1.1
)

llm = HuggingFacePipeline(pipeline=pipe)


# The model is now loaded and can be used with langchain



# 🦙Llama-cpp users🦙
## If you are just using llama-cpp then follow these steps

The a folder containing the bin file should be located in the models folder 

Example: "./models/Alpaca-7B-ggml-4bit-LoRA-merged/ggml-model-q4_0.bin"
## Install and Import dependencies

In [None]:
!pip install llama-cpp-python
!pip install langchain

In [None]:
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain

# Select Model

In [None]:
model_dir = "./models"
import os
# get a list of all folders in the models directory
model_folders = [f for f in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, f))]

# print the list of model names with their index starting at 1
for i, model_name in enumerate(model_folders):
    print(f"{i+1}. {model_name}")

# ask the user to select a model by number
selected_index = int(input("Enter the number of the model to select: ")) - 1
selected_model = model_folders[selected_index]

# check if the selected model contains a .bin file and save the path if it does
model_bin = None
for file in os.listdir(os.path.join(model_dir, selected_model)):
    if file.endswith(".bin"):
        model_bin = os.path.join(model_dir, selected_model, file)
        break

if model_bin:
    print(f"Selected model binary: {model_bin}")
else:
    print("No .bin file found in selected model directory.")
    
llm = LlamaCpp(model_path=model_bin)

# Begginning of Langchain section
I stole some of the code from [this colab](https://colab.research.google.com/drive/1VOwJpcZqOXag-ZXi-52ibOx6L5Pw-YJi#scrollTo=nu-AmhDLEK0h) that goes with [this video](https://www.youtube.com/watch?v=LbT1yp6quS8) by Patrick Loeber. I recommend subscribing.

# Single Action Agent Google search


In [None]:
!pip install langchain
!pip install google-search-results

In [None]:
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import StringPromptTemplate
from langchain import SerpAPIWrapper, LLMChain
from transformers import pipeline as HuggingFacePipeline, AutoModelForCausalLM, AutoTokenizer
from typing import List, Union, Any
from langchain.schema import AgentAction, AgentFinish
import re
import os

In [None]:
# Load in some tools to use

os.environ["SERPAPI_API_KEY"] = ""

In [None]:
# Define which tools the agent can use to answer user queries
search = SerpAPIWrapper()
tools = [
    Tool(
        name = "Search",
        func=search.run,
        description="useful for when you need to answer questions about current events"
    )
]

In [None]:
query = "How many people live in Canada as of 2023?"
result = search.run(query)
print(result)


In [None]:
template = """
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request and explain what actions were used.

### Instruction:
Answer the following questions as best you can. Speak like a priate when you give the Final answer. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! Remember to speak as a pirate when giving your final answer. Use lots of "Arg"s

### Input:
{input}

### Response:
{agent_scratchpad}
"""

In [None]:
# Set up a prompt template
class CustomPromptTemplate(StringPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[Tool]
    
    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        return self.template.format(**kwargs)

# Replace the template variable with the new Alpaca Instruct template
template = """
Please follow the steps below to answer the question using the available tools. Repeat the steps as necessary until you find a solution.

### Instruction:
Answer the question: {input}
You have access to the following tools: {tools}

### Steps:
1. Think about the question and the best tool to use.
2. Perform the action using the selected tool.
3. Observe the results of the action and provide the final answer.

### Response Format:
Thought: Your thought process.
Action: The name of the tool (one word only, from {tool_names}).
Action Input: The input you provide to the tool.
Observation: The results obtained from using the tool.
Final Answer: The answer to the question based on your observation.
"""




prompt = CustomPromptTemplate(
    template=template,
    tools=tools,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"]
)


In [None]:
class CustomOutputParser(AgentOutputParser):
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        if "Final Answer:" in llm_output:
            return AgentFinish(
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )

        regex = r"Action: (.*?)\nAction Input: (.*?)\n"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2).strip()

        return AgentAction(tool=action, tool_input=action_input, log=llm_output)

In [None]:
from langchain.agents import AgentOutputParser
from typing import Any

class RawOutputParser(AgentOutputParser):
    def parse(self, llm_output: str) -> Any:
        return llm_output

agent_executor = AgentExecutor(
    agent=agent,
    agent_output_parser=RawOutputParser(),
    tools=[search],
    name_to_tool_map={"search": search},
    time_limit_secs=60,
)


In [None]:
output_parser = CustomOutputParser()

In [None]:
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt)

In [None]:
from langchain.agents import AgentOutputParser
from typing import Any

class RawOutputParser(AgentOutputParser):
    def parse(self, llm_output: str) -> Any:
        return llm_output

agent_executor = AgentExecutor(
    agent=agent,
    agent_output_parser=RawOutputParser(),
    tools=[search],
    name_to_tool_map={"search": search},
    time_limit_secs=60,
)

In [None]:
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

In [None]:
agent_executor.run("How many people live in canada as of 2023?")

## Set up environment

Do necessary imports, etc.

In [3]:
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import BaseChatPromptTemplate
from langchain import SerpAPIWrapper, LLMChain
from langchain.chat_models import ChatOpenAI
from typing import List, Union
from langchain.schema import AgentAction, AgentFinish, HumanMessage
import re

## Set up tool

Set up any tools the agent may want to use. This may be necessary to put in the prompt (so that the agent knows to use these tools).

In [2]:
# Define which tools the agent can use to answer user queries
search = SerpAPIWrapper()
tools = [
    Tool(
        name = "Search",
        func=search.run,
        description="useful for when you need to answer questions about current events"
    )
]

## Prompt Template

This instructs the agent on what to do. Generally, the template should incorporate:
    
- `tools`: which tools the agent has access and how and when to call them.
- `intermediate_steps`: These are tuples of previous (`AgentAction`, `Observation`) pairs. These are generally not passed directly to the model, but the prompt template formats them in a specific way.
- `input`: generic user input

In [6]:
# Set up the base template
template = """Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! Remember to speak as a pirate when giving your final answer. Use lots of "Arg"s

Question: {input}
{agent_scratchpad}"""

In [7]:
# Set up a prompt template
class CustomPromptTemplate(BaseChatPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[Tool]
    
    def format_messages(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        formatted = self.template.format(**kwargs)
        return [HumanMessage(content=formatted)]

In [8]:
prompt = CustomPromptTemplate(
    template=template,
    tools=tools,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"]
)

## Output Parser

The output parser is responsible for parsing the LLM output into `AgentAction` and `AgentFinish`. This usually depends heavily on the prompt used.

This is where you can change the parsing to do retries, handle whitespace, etc

In [9]:
class CustomOutputParser(AgentOutputParser):
    
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)

In [10]:
output_parser = CustomOutputParser()

## Set up LLM

Choose the LLM you want to use!

In [12]:
llm = ChatOpenAI(temperature=0)

## Define the stop sequence

This is important because it tells the LLM when to stop generation.

This depends heavily on the prompt and model you are using. Generally, you want this to be whatever token you use in the prompt to denote the start of an `Observation` (otherwise, the LLM may hallucinate an observation for you).

## Set up the Agent

We can now combine everything to set up our agent

In [13]:
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt)

In [14]:
tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    stop=["\nObservation:"], 
    allowed_tools=tool_names
)

## Use the Agent

Now we can use it!

In [15]:
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

In [16]:
agent_executor.run("How many people live in canada as of 2023?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Wot year be it now? That be important to know the answer.
Action: Search
Action Input: "current population canada 2023"[0m

Observation:[36;1m[1;3m38,649,283[0m[32;1m[1;3mAhoy! That be the correct year, but the answer be in regular numbers. 'Tis time to translate to pirate speak.
Action: Search
Action Input: "38,649,283 in pirate speak"[0m

Observation:[36;1m[1;3mBrush up on your “Pirate Talk” with these helpful pirate phrases. Aaaarrrrgggghhhh! Pirate catch phrase of grumbling or disgust. Ahoy! Hello! Ahoy, Matey, Hello ...[0m[32;1m[1;3mThat be not helpful, I'll just do the translation meself.
Final Answer: Arrrr, thar be 38,649,283 scallywags in Canada as of 2023.[0m

[1m> Finished chain.[0m


'Arrrr, thar be 38,649,283 scallywags in Canada as of 2023.'