# Local Model Notebook loader
## This is for people who want to test langchain or other agent/agi related code in a notebook


## ⚠️Llama-cpp users🦙⚠️
If you are using Llama-cpp you can skip down to the llama cpp cell

If your Llama uses gpu then dont skip
# Text-generation-webui related code
## Load Required Libraries and Modules
The first step is to load all the required libraries and modules:

In [None]:
!pip install langchain

In [13]:
import sys
sys.argv = [sys.argv[0]]
import os
import re
import time
import json
from pathlib import Path
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
sys.path.append(str(Path().resolve().parent / "modules"))
from modules import api, chat, shared, training, ui
from modules.html_generator import chat_html_wrapper
from modules.LoRA import add_lora_to_model
from modules.models import load_model, load_soft_prompt
from modules.text_generation import generate_reply, stop_everything_event
import torch
torch.cuda.set_device(0)


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: C:\Users\admin\Documents\oobabooga-windows\installer_files\env\bin\cudart64_110.dll
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary C:\Users\admin\Documents\oobabooga-windows\installer_files\env\lib\site-packages\bitsandbytes\libbitsandbytes_cuda117.dll...


# Parameters and command-line flags

input your command line arguments like you would when launching server.py [complete list](https://github.com/oobabooga/text-generation-webui#basic-settings)

Example: --auto-devices --wbits 4 --groupsize 128 --no-stream


In [14]:
from modules.shared import parser

def parse_input_string(input_string):
    input_args = input_string.split()
    return parser.parse_args(input_args)

input_string = input('Enter args string: ')
shared.args = parse_input_string(input_string)
# Load custom settings from a JSON file
settings_file = None
if shared.args.settings is not None and Path(shared.args.settings).exists():
    settings_file = Path(shared.args.settings)
elif Path('settings.json').exists():
    settings_file = Path('settings.json')

if settings_file is not None:
    print(f"Loading settings from {settings_file}...")
    new_settings = json.loads(open(settings_file, 'r').read())
    for item in new_settings:
        shared.settings[item] = new_settings[item]

shared.settings['seed'] = -1


Enter args string: --auto-devices --wbits 4 --no-stream


# Choose your model

In [15]:
# Function to get available models
def get_available_models():
    if shared.args.flexgen:
        return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower)
    else:
        return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)

# Get the list of available models
available_models = get_available_models()

# Set the model name
if shared.args.model is not None:
    shared.model_name = shared.args.model
else:
    if len(available_models) == 0:
        print('No models are available! Please download at least one.')
        sys.exit(0)
    elif len(available_models) == 1:
        i = 0
    else:
        print('The following models are available:\n')
        for i, model in enumerate(available_models):
            print(f'{i+1}. {model}')
        print(f'\nWhich one do you want to load? 1-{len(available_models)}\n')
        i = int(input()) - 1
        print()
    shared.model_name = available_models[i]


The following models are available:

1. anon8231489123_gpt4-x-alpaca-13b-native-4bit-128g
2. chavinlo_alpaca-native
3. gozfarb_oasst-llama13b-4bit-128g
4. llama-13b-ggml-q4_0
5. llama-30b-4bit-128g
6. llama-7b
7. MetaIX_GPT4-X-Alpaca-30B-Int4
8. vicuna-13b-GPTQ-4bit-128g

Which one do you want to load? 1-8

7



# Load Model and Tokenizer

In [16]:
# Load the model and tokenizer
shared.model, shared.tokenizer = load_model(shared.model_name)

# Add Lora to the model if specified
if shared.args.lora:
    add_lora_to_model(shared.args.lora)

# Set up the tokenizer and model variables
tokenizer = shared.tokenizer
base_model = shared.model

Loading MetaIX_GPT4-X-Alpaca-30B-Int4...
Found the following quantized model: models\MetaIX_GPT4-X-Alpaca-30B-Int4\gpt4-x-alpaca-30b-4bit.safetensors
Loading model ...


  with safe_open(filename, framework="pt", device=device) as f:
  return self.fget.__get__(instance, owner)()
  storage = cls(wrap_storage=untyped_storage)


Done.
Loaded the model in 27.48 seconds.


# Create Text-Generation Pipeline
## We create a text-generation pipeline with the specified parameters:
Feel free to change these to best fit your model/usage


In [17]:
# Create a text-generation pipeline with the specified parameters
pipe = pipeline(
    "text-generation",
    model=base_model, 
    tokenizer=tokenizer,
    device=0,
    max_length=256,
    temperature=0.6,
    top_p=0.95,
    repetition_penalty=1.2
)

llm = HuggingFacePipeline(pipeline=pipe)


# The model is now loaded and can be used with langchain



# 🦙Llama-cpp users🦙
## If you are just using llama-cpp then follow these steps

The a folder containing the bin file should be located in the models folder 

Example: "./models/Alpaca-7B-ggml-4bit-LoRA-merged/ggml-model-q4_0.bin"
## Install and Import dependencies

In [None]:
!pip install llama-cpp-python
!pip install langchain

In [None]:
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain

# Select Model

In [None]:
model_dir = "./models"
import os
# get a list of all folders in the models directory
model_folders = [f for f in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, f))]

# print the list of model names with their index starting at 1
for i, model_name in enumerate(model_folders):
    print(f"{i+1}. {model_name}")

# ask the user to select a model by number
selected_index = int(input("Enter the number of the model to select: ")) - 1
selected_model = model_folders[selected_index]

# check if the selected model contains a .bin file and save the path if it does
model_bin = None
for file in os.listdir(os.path.join(model_dir, selected_model)):
    if file.endswith(".bin"):
        model_bin = os.path.join(model_dir, selected_model, file)
        break

if model_bin:
    print(f"Selected model binary: {model_bin}")
else:
    print("No .bin file found in selected model directory.")
    
llm = LlamaCpp(model_path=model_bin)

# Begginning of Langchain section
I stole some of the code from [this colab](https://colab.research.google.com/drive/1VOwJpcZqOXag-ZXi-52ibOx6L5Pw-YJi#scrollTo=nu-AmhDLEK0h) that goes with [this video](https://www.youtube.com/watch?v=LbT1yp6quS8) by Patrick Loeber. I recommend subscribing.