# Local Model Notebook loader
## This is for people who want to test langchain or other agent/agi related code in a notebook


## ⚠️Llama-cpp users🦙⚠️
If you are using Llama-cpp you can skip down to the llama cpp cell

If your Llama uses gpu then dont skip
# Text-generation-webui related code
## Load Required Libraries and Modules
The first step is to load all the required libraries and modules:

In [None]:
!pip install langchain

In [None]:
import sys
sys.argv = [sys.argv[0]]
import os
import re
import time
import json
from pathlib import Path
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
sys.path.append(str(Path().resolve().parent / "modules"))
from modules import api, chat, shared, training, ui
from modules.html_generator import chat_html_wrapper
from modules.LoRA import add_lora_to_model
from modules.models import load_model, load_soft_prompt
from modules.text_generation import generate_reply, stop_everything_event
import torch
torch.cuda.set_device(0)

# Parameters and command-line flags

input your command line arguments like you would when launching server.py [complete list](https://github.com/oobabooga/text-generation-webui#basic-settings)

Example: --auto-devices --wbits 4 --groupsize 128 --no-stream


In [None]:
from modules.shared import parser

def parse_input_string(input_string):
    input_args = input_string.split()
    return parser.parse_args(input_args)

input_string = input('Enter args string: ')
shared.args = parse_input_string(input_string)
# Load custom settings from a JSON file
settings_file = None
if shared.args.settings is not None and Path(shared.args.settings).exists():
    settings_file = Path(shared.args.settings)
elif Path('settings.json').exists():
    settings_file = Path('settings.json')

if settings_file is not None:
    print(f"Loading settings from {settings_file}...")
    new_settings = json.loads(open(settings_file, 'r').read())
    for item in new_settings:
        shared.settings[item] = new_settings[item]

shared.settings['seed'] = -1


# Choose your model

In [None]:
# Function to get available models
def get_available_models():
    if shared.args.flexgen:
        return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower)
    else:
        return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)

# Get the list of available models
available_models = get_available_models()

# Set the model name
if shared.args.model is not None:
    shared.model_name = shared.args.model
else:
    if len(available_models) == 0:
        print('No models are available! Please download at least one.')
        sys.exit(0)
    elif len(available_models) == 1:
        i = 0
    else:
        print('The following models are available:\n')
        for i, model in enumerate(available_models):
            print(f'{i+1}. {model}')
        print(f'\nWhich one do you want to load? 1-{len(available_models)}\n')
        i = int(input()) - 1
        print()
    shared.model_name = available_models[i]


# Load Model and Tokenizer

In [None]:
# Load the model and tokenizer
shared.model, shared.tokenizer = load_model(shared.model_name)

# Add Lora to the model if specified
if shared.args.lora:
    add_lora_to_model(shared.args.lora)

# Set up the tokenizer and model variables
tokenizer = shared.tokenizer
base_model = shared.model

# Create Text-Generation Pipeline
## We create a text-generation pipeline with the specified parameters:
Feel free to change these to best fit your model/usage


In [None]:
# Create a text-generation pipeline with the specified parameters
pipe = pipeline(
    "text-generation",
    model=base_model, 
    tokenizer=tokenizer,
    device=0,
    max_length=800,
    temperature=0.6,
    top_p=0.95,
    repetition_penalty=1.1
)

llm = HuggingFacePipeline(pipeline=pipe)


# The model is now loaded and can be used with langchain



# 🦙Llama-cpp users🦙
## If you are just using llama-cpp then follow these steps

The a folder containing the bin file should be located in the models folder 

Example: "./models/Alpaca-7B-ggml-4bit-LoRA-merged/ggml-model-q4_0.bin"
## Install and Import dependencies

In [None]:
!pip install llama-cpp-python
!pip install langchain

In [None]:
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain

# Select Model

In [None]:
model_dir = "./models"
import os
# get a list of all folders in the models directory
model_folders = [f for f in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, f))]

# print the list of model names with their index starting at 1
for i, model_name in enumerate(model_folders):
    print(f"{i+1}. {model_name}")

# ask the user to select a model by number
selected_index = int(input("Enter the number of the model to select: ")) - 1
selected_model = model_folders[selected_index]

# check if the selected model contains a .bin file and save the path if it does
model_bin = None
for file in os.listdir(os.path.join(model_dir, selected_model)):
    if file.endswith(".bin"):
        model_bin = os.path.join(model_dir, selected_model, file)
        break

if model_bin:
    print(f"Selected model binary: {model_bin}")
else:
    print("No .bin file found in selected model directory.")
    
llm = LlamaCpp(model_path=model_bin)

# Begginning of Langchain section
I stole some of the code from [this colab](https://colab.research.google.com/drive/1VOwJpcZqOXag-ZXi-52ibOx6L5Pw-YJi#scrollTo=nu-AmhDLEK0h) that goes with [this video](https://www.youtube.com/watch?v=LbT1yp6quS8) by Patrick Loeber. I recommend subscribing.

## Your model variable - llm
'llm' is an instance of the HuggingFacePipeline class that wraps around a text-generation pipeline created using the pipeline() method from the transformers library.

## install langchain

In [None]:
!pip install langchain

## import dependencies

In [None]:
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser, load_tools
from langchain.prompts import BaseChatPromptTemplate
from langchain import SerpAPIWrapper, LLMChain, LLMChain
from langchain.chat_models import ChatOpenAI
from typing import List, Union
from langchain.schema import AgentAction, AgentFinish, HumanMessage
import re

## Test Your llm

In [None]:
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))

## Prompt Templates

LangChain helps manage and improve how prompts are used.

When an app uses a language model, it doesn't directly send what the user types to the model. Instead, it creates a question or prompt based on what the user typed and sends that to the model. LangChain makes this process easier and better.

In [None]:
llm("Can Barack Obama have a conversation with George Washington?")

In [None]:
prompt = """Question: Can Barack Obama have a conversation with George Washington?

Let's think step by step.

Answer: """
llm(prompt)

In [None]:
from langchain import PromptTemplate

template = """Question: {question}

Let's think step by step.

Answer: """

prompt = PromptTemplate(template=template, input_variables=["question"])

In [None]:
prompt.format(question="Can Barack Obama have a conversation with George Washington?")

## Chains
Test how well different language models and prompts work together in multi-step processes

In [None]:
from langchain import LLMChain

llm_chain = LLMChain(prompt=prompt, llm=llm)

question = "Can Barack Obama have a conversation with George Washington?"

print(llm_chain.run(question))

## Agents and Tools

An agent is a language model that can make decisions, take actions, observe results, and repeat the process until the task is done. Agents can be very useful when used properly. Here are some concepts to know before using them:

- **Tool:** A tool is a function that does a specific job, such as searching on Google, looking up data in a database, running Python code, or using other chains.
- **LLM:** This stands for "language model" and it is what powers the agent.
- **Agent:** This is the specific agent that you want to use.
- **Tools:** You can find various tools at this link: https://python.langchain.com/en/latest/modules/agents/tools.html
- **Agent Types:** There are different types of agents that you can use. You can find more information on them here: https://python.langchain.com/en/latest/modules/agents/agents/agent_types.html


In [None]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent

In [None]:
!pip install wikipedia

In [None]:
tools = load_tools(["wikipedia", "llm-math"], llm=llm)

In [None]:
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)

In [None]:
agent.run("In what year was the film Departed with Leopnardo Dicaprio released? What is this year raised to the 0.43 power?")

# Langchain Wikipeida Searching
## This part is very hit or miss with local LLMs but works well with chatgpt. 
Below is another agent using the wikipedia tool taht I was able to get results from.

## Define which tools the agent can use to answer user queries
In this case Wikipedia. Install it in the next cell if you dont have it.

In [None]:
!pip install wikipedia

In [None]:
from langchain.utilities import WikipediaAPIWrapper
wikipedia = WikipediaAPIWrapper()
tools = [
    Tool(
        name = "Wikipedia",
        func=wikipedia.run,
        description="useful for when you need to look up information"
    )
]

## Set up the base template

In [None]:

template = """Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: one word, Use only the name of a tool i.e "Wikipedia"
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
{agent_scratchpad}"""

## Create Prompt Template Class

In [None]:
class CustomPromptTemplate(BaseChatPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[Tool]
    
    def format_messages(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        formatted = self.template.format(**kwargs)
        return [HumanMessage(content=formatted)]
    
prompt = CustomPromptTemplate(
    template=template,
    tools=tools,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"]
)

## Create Output Parser

In [None]:
class CustomOutputParser(AgentOutputParser):
    
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        try:
            # Check if agent should finish
            if "Final Answer:" in llm_output:
                return AgentFinish(
                    return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                    log=llm_output,
                )

            # Parse out the action and action input
            regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
            match = re.search(regex, llm_output, re.DOTALL)
            if not match:
                return AgentFinish(
                    return_values={"output": llm_output},
                    log=llm_output,
                )
            action = match.group(1).strip()
            action_input = match.group(2)

            # Return the action and action input
            return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
        
        except Exception as e:

            raise e
            
output_parser = CustomOutputParser()


## Create LLM chain consisting of the LLM and a prompt

In [None]:

llm_chain = LLMChain(llm=llm, prompt=prompt)

## Setup Agent with tools

In [None]:
tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    stop=["\nObservation:"], 
    allowed_tools=tool_names
)

In [None]:
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

# Testing
## Ask the model a question and examine the output

In [None]:
# question = input("Question: ")
question = "whats the population of canada?"
agent_executor.run(question)

## Memory
Add State to Chains and Agents.

Memory is the concept of persisting state between calls of a chain/agent. LangChain provides a standard interface for memory, a collection of memory implementations, and examples of chains/agents that use memory.

In [None]:
from langchain import ConversationChain
conversation = ConversationChain(llm=llm, verbose=True)

conversation.predict(input="Hi there!")

In [None]:
conversation.predict(input="Can we talk about AI?")

In [None]:
conversation.predict(input="I'm interested in Reinforcement Learning.")

## Document Loaders

If you want to use your own text data with a language model, you can use a document loader module


In [None]:
from langchain.document_loaders import NotionDirectoryLoader

loader = NotionDirectoryLoader("Notion_DB")

docs = loader.load()

## Indexes

Indexes refer to ways to structure documents so that LLMs can best interact with them. This module contains utility functions for working with documents

- Embeddings: An embedding is a numerical representation of a piece of information, for example, text, documents, images, audio, etc.
- Text Splitters: When you want to deal with long pieces of text, it is necessary to split up that text into chunks.
- Vectorstores: Vector databases store and index vector embeddings from NLP models to understand the meaning and context of strings of text, sentences, and whole documents for more accurate and relevant search results.

In [None]:
import requests

url = "https://raw.githubusercontent.com/hwchase17/langchain/master/docs/modules/state_of_the_union.txt"
res = requests.get(url)
with open("state_of_the_union.txt", "w") as f:
  f.write(res.text)

In [None]:
# Document Loader
from langchain.document_loaders import TextLoader
loader = TextLoader('./state_of_the_union.txt')
documents = loader.load()

In [None]:
# Text Splitter
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [None]:
!pip install sentence_transformers

In [None]:
# Embeddings
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()

#text = "This is a test document."
#query_result = embeddings.embed_query(text)
#doc_result = embeddings.embed_documents([text])

In [None]:
!pip install faiss-cpu

In [None]:
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
from langchain.vectorstores import FAISS

db = FAISS.from_documents(docs, embeddings)

query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query)

In [None]:
print(docs[0].page_content)

In [None]:
db.save_local("faiss_index")
new_db = FAISS.load_local("faiss_index", embeddings)
docs = new_db.similarity_search(query)
print(docs[0].page_content)