# `LlamaCpp` examples

This notebook contains usage examples of the llama-cpp-python support in guidance.
The first example is RPG character json generator.
The second example takes a jsonformer scheme as input and generate a valid json.
The third example uses the chat mode of guidance.

In [1]:
import guidance

# Create a LlamaCpp instance and pass the settings to it.
# note this assumes you have llama-cpp-python>=0.1.55 installed
llama = guidance.llms.LlamaCpp(
    model = "../../../../../models/gpt4-x-vicuna-13B-GGML/gpt4-x-vicuna-13B.ggmlv3.q5_0.bin",
    tokenizer="ehartford/WizardLM-7B-Uncensored",
    n_gpu_layers=10,
    n_threads=12
)

# Other models would look like this:
# model = "../ggml-v3-models/gpt4-x-vicuna-13B.ggmlv3.q5_1.bin"
# tokenizer = "TheBloke/gpt4-x-vicuna-13B-HF"

# Or like this:
# model = "../ggml-v3-models/Manticore-13B.ggmlv3.q5_1.bin"
# tokenizer = "openaccess-ai-collective/manticore-13b"

llama.cpp: loading model from ../../../../../models/gpt4-x-vicuna-13B-GGML/gpt4-x-vicuna-13B.ggmlv3.q5_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32001
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 8 (mostly Q5_0)
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size = 8535.27 MB
llama_model_load_internal: mem required  = 10583.27 MB (+ 1608.00 MB per state)
....................................................................................................
llama_init_from_file: kv self size  = 1600.00 MB


In [3]:
# 1st Example
# we can pre-define valid option sets
valid_weapons = ["sword", "axe", "mace", "spear", "bow", "crossbow"]

# define the prompt
character_maker = guidance("""The following is a character profile for an RPG game in JSON format.
```json
{
    "id": "{{id}}",
    "description": "{{description}}",
    "name": "{{gen 'name' temperature=0.7}}",
    "age": {{gen 'age' pattern='[0-9]+' max_tokens=2 stop=','}},
    "armor": "{{#select 'armor'}}leather{{or}}chainmail{{or}}plate{{/select}}",
    "weapon": "{{select 'weapon' options=valid_weapons}}",
    "class": "{{gen 'class'}}",
    "mantra": "{{gen 'mantra' temperature=0.7}}",
    "strength": {{gen 'strength' pattern='[0-9]+' max_tokens=2 stop=','}},
    "items": [{{#geneach 'items' num_iterations=5 join=', '}}"{{gen 'this' temperature=0.7}}"{{/geneach}}]
}```""")

# generate a character
char = character_maker(
    id="e1f491f7-7ab8-4dac-8c20-c92b5e7d883d",
    description="A quick and nimble fighter.",
    valid_weapons=valid_weapons, llm=llama, caching=False
)

In [6]:
# 2nd Example
# define a recursive function that takes a jsonformer schema and returns a guidance program
def jsonformer2guidance(schema, key=None, indent=0):
    out = ""
    if schema['type'] == 'object':
        out += "  "*indent + "{\n"
        for k,v in schema['properties'].items():
            out += "  "*(indent+1) + k + ": " + jsonformer2guidance(v, k, indent+1) + ",\n"
        out += "  "*indent + "}"
        return out
    elif schema['type'] == 'array':
        if 'max_items' in schema:
            extra_args = f" max_iterations={schema['max_items']}"
        else:
            extra_args = ""
        return "[{{#geneach '"+key+"' stop=']'"+extra_args+"}}{{#unless @first}}, {{/unless}}" + jsonformer2guidance(schema['items'], "this") + "{{/geneach}}]"
    elif schema['type'] == 'string':
        return "\"{{gen '"+key+"' stop='\"'}}\""
    elif schema['type'] == 'number':
        return "{{gen '"+key+"' pattern='[0-9\\.]' stop=','}}"
    elif schema['type'] == 'boolean':
        return "{{#select '"+key+"'}}True{{or}}False{{/select}}"

In [7]:
# define a jsonformer schema
json_schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "number"},
        "is_student": {"type": "boolean"},
        "courses": {
            "type": "array",
            "max_items": 6,
            "items": {"type": "string"}
        }
    }
}



In [10]:
# run the guidance program
prompt = "Generate a person's information based on the following schema:"
program = guidance(prompt + "\n" + jsonformer2guidance(json_schema))
out = program(llm=llama, silent=True)
print(out)

Generate a person's information based on the following schema:
{
  name: "John Doe",
  age: 3,
  is_student: True,
  courses: ["Math", "Science", "English"],
}


In [14]:
# 3rd Example
# settings =  guidance.llms.LlamaCppSettings()
# settings.model = "../ggml-v3-models/Manticore-13B-Chat-Pyg.ggmlv3.q5_1.bin"
# settings.n_gpu_layers = 10
# settings.n_threads = 12
# settings.tokenizer_name = "openaccess-ai-collective/manticore-13b-chat-pyg"
# settings.before_role = "<|"
# settings.after_role = "|>"
llama = guidance.llms.LlamaCpp(
    model = "../ggml-v3-models/Manticore-13B-Chat-Pyg.ggmlv3.q5_1.bin",
    n_gpu_layers = 10,
    n_threads = 12,
    tokenizer = "openaccess-ai-collective/manticore-13b-chat-pyg",
    before_role = "<|",
    after_role = "|>",
    caching=False, silent=True
)


TypeError: __init__() got an unexpected keyword argument 'tokenizer_name'

In [12]:

# llama = guidance.llms.LlamaCpp(settings=settings, caching=False)
experts = guidance('''
{{#system~}}
You are a helpful and terse assistant.
{{~/system}}

{{#user~}}
I want a response to the following question:
{{query}}
Name 3 world-class experts (past or present) who would be great at answering this?
Don't answer the question yet.
{{~/user}}

{{#assistant~}}
{{gen 'expert_names' temperature=0 max_tokens=300}}
{{~/assistant}}

{{#user~}}
Great, now please answer the question as if these experts had collaborated in writing a joint anonymous answer.
{{~/user}}

{{#assistant~}}
{{gen 'answer' temperature=0 max_tokens=500}}
{{~/assistant}}
''', llm=llama)



In [None]:
print(experts(query='How can I be more productive?', llm=llama))