In [14]:
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import Any, List, Tuple, Type, Optional, Union
import os
import json

In [15]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

## Helper Functions

In [16]:
class LLM:
    def __init__(self):
        self.client = OpenAI(
            api_key=os.environ["OPENAI_API_KEY"],
        )

    def request(self, prompt, temperature=0.2, context=None):
        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
        )
        return response.choices[0].message.content


llm = LLM()

In [17]:
def log(msg):
    print(msg)
    print("-------------")

# How to make Prompts? 

In [18]:
from grammarflow.prompt.builder import PromptBuilder  # Prompt builder

Prompts can be supplied to the `Constrain` block (below) as a string, `Prompt` or `PromptBuilder` object. 

`PromptBuilder` is a straightforward interface to build prompts, which are built using 'sections' which are user-defined. You can specificy which sections of the prompt can be conditionally switched on/off, which can be edited using placeholders, which are fixed sections, where the grammars can be defined, etc.

Each prompt can have one `define_grammar`, `placeholders` and `add_few_shot_examples` boolean fields. These switch on certain functionalities within the sections. You can pass in a function to the `enable_on` field for the conditional enabling.  

In [19]:
# Sample Llama Prompt

llama_prompt = PromptBuilder()
llama_prompt.add_section(
    text="<s>[INST] <<SYS>>\n{system_context}\n<</SYS>>",
    placeholders=["system_context"],
    define_grammar=True,
)
llama_prompt.add_section(
    text="{user_message}[/INST]",
    placeholders=["user_message"],
)

### How to use `enable_on`: 

Say we are working on a conversation chatbot, a chain that needs history for context. We might only want certain sections to be enabled when we have a history. We can use `enable_on` to enable or disable sections based on the presence of a history.

In [20]:
# Dummy Example 

def check_previous_interaction(id_): return id_ > 1

sample_prompt = PromptBuilder() 
sample_prompt.add_section(
    text="You are a intelligent search machine. Your goal is to think about what topics to search about to provide the user with relevant information. Here is his question: {question}",
    placeholders=['question']
) 
sample_prompt.add_section(
  define_grammar=True
)
sample_prompt.add_section(
  text="Choose keywords from the context given below: \n{history}",
  placeholders=["history"],
  enable_on=check_previous_interaction
) 

# How to make Grammars? 

Using Pydantic, we can define how data should be in pure python. GrammarFlow takes care of the rest. In this guide, you will find multiple examples of pydantic models for different use-cases. (Some of them are quite random, but I'm using them to prove effectiveness!)

Here are some important rules! 

1. You can use Optional from `typing`, but the LLM won't understand when and when not to output an optional field. From experience, there are better ways to deal with Optional fields, such as enabling it within the grammar when needed. 
2. When you want to use regex, you can do it using `Field(..., pattern="", description="")`. The `pattern` field will be used during decoding, but is ignored during prompt embedding. This is because LLMs cannot 100% perform regex handling unless explained in human-terms. Moreover, when you see LLMs conforming to the regex expectations, its mostly during the token sampling that it is achieved. So, to overcome this, you can embed a semantic explanation of the regex, like `Field(..., pattern="^(Akshath|Raghav|Ravikiran)$", description="Akshath OR Raghav OR Ravikiran")`.
3. Avoid using `Dict` in the pydantic model directly. Instead, make another model with the Dict fields and add that as a parameter to the original model. This becomes a nested structure, and can be handled well. If you choose `Dict[str, str]` method, then all I can do is put this into the prompt, which is ineffective and random. 

### Important note on acceptable regex for GNBF! 

Ensure your regex is solely `string` or `number` constraining. 
> Example: `'("Akshath"|"Raghav"|"Ravikiran")'` for multiple OR scenarios; `'"https://"[0-9a-fA-F]*'` for links, etc. 

If you want a `string` to be present, enclose it in double-qoutes. To allow for any other sequences, bound with `()` or `[]` and specify directly (`[0-9a-fA-F]*`). 

# Constraining with GrammarFlow

In [21]:
from grammarflow.constrain import Constrain  # Main class

A `Constrain` block acts as a context manager. It keeps track of the serialization type and the #grammars you want to handle. 

It offers three main functions. 
1. `.format()` to format a prompt of your choice. Pass in `placeholders` as present in the format {'placeholder': text}. `grammars` needs to be a list of {'description': , 'model': }. `examples` needs to be a list of {'query': , 'model'}. 
2. `.get_grammar()` to get the corresponding GNBF grammar of the model you pass in. 
3. `.parse()` to parse the response into a `Response` dataclass.  
4. `.inflation_rate()` can be used to get the token size at which the latest prompt has been increased. For smaller prompts, the number is >4x. For larger prompts, the number is <2x. You will find that **with increase of prompt size, the cost you will pay per LLM call will remain the same.**

In [22]:
user_message = ""
system_context = ""


class Model(BaseModel):
    model_name: str


with Constrain('json') as manager:
    prompt = manager.format(llama_prompt, 
                        placeholders={'user_message': user_message, 'system_context': system_context},
                        grammars=[{'model': [Model]}]
    )

# Serialization Use-Cases! 

1. 'JSON' is the classic go-to. Can handle simple stuff, nested-models, complex-grammars, etc. However, a simple missing terminal ('"', '{") can break the sampling chain. 
2. 'XML' is the safest go-to. Can handle all cases, except multiple grammar generation (see at the end of `Examples!` section). The use of starting and ending tags is handled easily by token sampling, and errors within naming/tag is handled by my parser. 
3. 'TOML' is best when we want to get multiple grammars generated in one-go. With a smaller inflation rate of the prompt (before and after grammarflow embeddings), it can handle a longer list of fields. However, it *cannot* work for nested models. TOML nested models usually are in the form given below. From my experimentations, LLMs have a hard time conforming to this and end up generating `obj2` field within `obj1` as a `Dict` object. Is it possible? Sure. Would I trust it? No.
  ```
  [obj1]
  field = "" 
  field2 = "" 

  [obj1.obj2]
  field3 = "" 
  ```

# Examples! 

In [23]:
# # Here's a simple example of asking an LLM to make code.
# # This can be used within coding assistants which requires extra metadata.


class FunctionModel(BaseModel):
    function_name: str
    docstring: str
    depedencies: List[str]
    uuid: Union[float, int]
    is_python: bool
    code: str


input_str = "I want to create a function that returns the fibonacci sequence. The function should be called 'fib'. The function can use numpy."

with Constrain('xml') as manager:
    prompt = manager.format(input_str, grammars=[{"description": "No Code Generation", "model": FunctionModel}])

    llm_response = llm.request(prompt, temperature=0.01)
    log(llm_response)

    response = manager.parse(llm_response)
    log(response)

    log(manager.inflation_rate())

```
<FunctionModel>
<function_name> "fib" </function_name>
<docstring> "This function returns the fibonacci sequence." </docstring>
<depedencies> List["numpy"] </depedencies>
<uuid> 123456789 </uuid>
<is_python> True </is_python>
<code> "def fib(n):\n    import numpy as np\n    fib_seq = [0, 1]\n    for i in range(2, n):\n        fib_seq.append(fib_seq[i-1] + fib_seq[i-2])\n    return fib_seq" </code>
</FunctionModel>
```
-------------
{'FunctionModel': {'function_name': 'fib', 'docstring': 'This function returns the fibonacci sequence.', 'depedencies': typing.List[ForwardRef('numpy')], 'uuid': 123456789, 'is_python': True, 'code': 'def fib(n):\\n    import numpy as np\\n    fib_seq = [0, 1]\\n    for i in range(2, n):\\n        fib_seq.append(fib_seq[i-1] + fib_seq[i-2])\\n    return fib_seq'}}
-------------
{'before': 27, 'after': 143, 'factor': '4.3x'}
-------------


In [24]:
# # The response will be of `Response` type, which can be used to extract the data. If adding the parsed response to this object fails, it will return the dict itself.
print(response.FunctionModel.is_python)

True


In [25]:
# Add some examples too!

Sum_Function_Model = FunctionModel(
    function_name="sum",
    docstring="This function returns the sum of the input list.",
    depedencies=["numpy"],
    uuid=123456789,
    is_python=True,
    code="def sum(a, b):\n\treturn a + b"
)


with Constrain('xml') as manager:
    prompt = manager.format(input_str, 
        grammars=[
            {
                'description': 'No Code Generation',
                'model': FunctionModel
            }
        ],
        examples=[
            {
                'query': "Create a summation function in Python",
                'model': Sum_Function_Model
            }
        ]
    )


    llm_response = llm.request(prompt, temperature=0.01)
    log(llm_response)

    response = manager.parse(llm_response)
    log(response)

    log(manager.inflation_rate())

```
<FunctionModel>
<function_name> fib </function_name>
<docstring> This function returns the fibonacci sequence. </docstring>
<depedencies> ['numpy'] </depedencies>
<uuid> 987654321.0 </uuid>
<is_python> True </is_python>
<code> def fib(n):
	if n <= 1:
		return n
	else:
		return fib(n-1) + fib(n-2) </code>
</FunctionModel>
```
-------------
{
    "FunctionModel": {
        "function_name": "fib",
        "docstring": "This function returns the fibonacci sequence.",
        "depedencies": [
            "numpy"
        ],
        "uuid": 987654321.0,
        "is_python": true,
        "code": "def fib(n):\tif n <= 1:\t\treturn n\telse:\t\treturn fib(n-1) + fib(n-2)"
    }
}
-------------
{'before': 27, 'after': 237, 'factor': '7.8x'}
-------------


In [26]:
# Sample ReAct Model with Llama Prompt
# You can add descriptions within the grammar model to provide it's context and options. This is how we use the LLM in https://github.com/e-lab/Forestry_Student/.

class ThoughtState(BaseModel):
    thought: str
    goal: str
    tool: str = Field(...,
                      description="Choose one of ['Web_QA', 'Web_Search', 'Web_Scraping', 'Web_Automation', 'Web_Research']")
    action: str = Field(...,
                        description="Choose one of ['Create', 'Update', 'Delete', 'Read']")
    action_input: str = Field(..., description="The input data for the action")
    thought_id: Optional[str] = Field(
        None, description="The unique identifier for the thought")


system_context = """Your goal is to think and plan out how to solve questions using agent tools provided to you. Think about all aspects of your thought process."""
user_message = """Who is Vladmir Putin?"""

with Constrain('json') as manager:
    prompt = manager.format(llama_prompt, placeholders={
                          'user_message': user_message,
                          'system_context': system_context
                          },
                          grammars=[{
                              'description': 'This format describes your current thinking state',
                              'model': [ThoughtState]}]
    )

    llm_response = llm.request(prompt, temperature=0.01)
    log(llm_response)

    response = manager.parse(llm_response)
    log(response)

    log(manager.inflation_rate())

```
{
"ThoughtState": {
"thought": "I need to gather information about Vladmir Putin.",
"goal": "To understand who Vladmir Putin is.",
"tool": "Web_Search",
"action": "Read",
"action_input": "Vladmir Putin biography",
"thought_id": "1a2b3c4d"
}
}
```
-------------
{
    "ThoughtState": {
        "thought": "I need to gather information about Vladmir Putin.",
        "goal": "To understand who Vladmir Putin is.",
        "tool": "Web_Search",
        "action": "Read",
        "action_input": "Vladmir Putin biography",
        "thought_id": "1a2b3c4d"
    }
}
-------------
{'before': 48, 'after': 230, 'factor': '3.8x'}
-------------


In [27]:
# # You can then access the response from the `response` object
print(response.ThoughtState.tool)

Web_Search


In [28]:
# You can add complex layers of grammars. You add even using Optional and Union types.
# For complex and nested grammars, JSON and XML are the best formats to use. 

class TeamMember(BaseModel):
    name: str
    role: str

class Task(BaseModel):
    title: str
    description: str
    assigned_to: str = Field(..., pattern="^(Akshath|Raghav|Ravikiran)$")
    due_date: List[str]

class Project(BaseModel):
    name: str
    description: str
    project_url: Optional[str] = None
    team_members: List[TeamMember]
    task: Task

for serialization in ['json', 'xml']:
    with Constrain(serialization) as manager:
        system_context = """You are a project manager and you are responsible for managing a project. You have to manage the project, it's grammars and other aspects. Ensure that you fill out all required fields."""
        user_message = """Make me a project plan for a new project on multimodal document understanding projct."""

        prompt = manager.format(llama_prompt, placeholders={'user_message': user_message,
                            'system_context': system_context},
                            grammars=[{
                                'description': 'This format elaborates on the project and its grammars.',
                                'model': [Project]},
        ]
        )

        llm_response = llm.request(prompt, temperature=0.01)
        log(llm_response)

        response = manager.parse(llm_response)
        log(response)

        log(manager.inflation_rate())

```
{
"Project": {
"name": "Multimodal Document Understanding Project",
"description": "A project focused on developing a system that can understand and analyze documents using multiple modes of input such as text, images, and audio.",
"project_url": "https://example.com/multimodal-document-understanding",
"team_members": [
{
"name": "John Doe",
"role": "Project Manager"
},
{
"name": "Jane Smith",
"role": "Lead Developer"
},
{
"name": "Alice Johnson",
"role": "Data Scientist"
}
],
"task": {
"title": "Research existing multimodal document understanding systems",
"description": "Conduct a literature review and analyze current state-of-the-art systems in the field.",
"assigned_to": "John Doe",
"due_date": ["2022-10-15"]
}
}
}
```
-------------
{
    "Project": {
        "name": "Multimodal Document Understanding Project",
        "description": "A project focused on developing a system that can understand and analyze documents using multiple modes of input such as text, images, and audio.

In [29]:
# You can add multiple grammars to the same prompt. NOT RECOMMENDED.
# If you wish to do so, generally, TOML and JSON are the best formats to use.

class EventIdea(BaseModel):
    event_name: str
    event_description: str
    event_duration: str

class BudgetPlan(BaseModel):
    budget: float
    items: List[str]
    prices: List[int]
    total_cost: int

class EventSchedule(BaseModel):
    event_name: str
    event_time: float
    event_duration: str

prompt = "I am hosting a birthday party for my girlfriend tomorrow. I want to buy a cake, balloons, some roses and ice cream. I have a budget of 500$. Can you create a sample event schedule and budget plan for me?."

with Constrain('toml', 'multi_response') as manager:
    prompt = manager.format(llama_prompt, 
        grammars=[
            {"task_description": "Brainstorming Event Ideas", "model": EventIdea},
            {
                "task_description": "Budget Planning And Activity Planning",
                "model": [BudgetPlan, EventSchedule],
            },
        ],
    )

    llm_response = llm.request(prompt, temperature=0.01)
    log(llm_response)

    response = manager.parse(llm_response)
    log(response)

    log(manager.inflation_rate())

``` 
[EventIdea]
event_name = "Summer BBQ Party"
event_description = "A fun outdoor gathering with delicious food and games"
event_duration = "4 hours"
```

``` 
[BudgetPlan]
budget = 500
items = ["Food", "Drinks", "Games"]
prices = [200, 100, 200]
total_cost = 500
[EventSchedule]
event_name = "Summer BBQ Party"
event_time = 12:00 PM
event_duration = "4 hours"
```
-------------
[{'EventIdea': [{'event_name': 'Summer BBQ Party', 'event_description': 'A fun outdoor gathering with delicious food and games', 'event_duration': '4 hours'}]}, {'BudgetPlan': [{'budget': 500, 'items': ['Food', 'Drinks', 'Games'], 'prices': [200, 100, 200], 'total_cost': 500}], 'EventSchedule': [{'event_name': 'Summer BBQ Party', 'event_time': 12, ':00 PMevent_duration': '4 hours'}]}]
-------------
{'before': 22, 'after': 173, 'factor': '6.9x'}
-------------


# Grammars

> "GBNF (GGML BNF) is a format for defining formal grammars to constrain model outputs in llama.cpp. For example, you can use it to force the model to generate valid JSON, or speak only in emojis."

Read more about it here: https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md

In [30]:
from grammarflow.grammars.gnbf import GNBF
from pydantic import BaseModel, Field
from typing import Optional, List

In [31]:
class TeamMember(BaseModel):
    name: str
    role: str

class TaskUpdate(BaseModel):
    update_time: float
    comment: Optional[str] = None
    status: bool

class Task(BaseModel):
    title: str
    description: str
    assigned_to: str = Field(..., pattern='("Akshath"|"Raghav"|"Ravikiran")')
    due_date: List[str]
    updates: List[TaskUpdate]

class Project(BaseModel):
    name: str
    description: str
    project_url: Optional[str] = Field(None, pattern='"https://"[0-9a-fA-F]*')
    team_members: List[TeamMember]  
    task: Task

In [32]:
grammar = GNBF(Project).generate_grammar()

In [33]:
print(grammar)

root ::= ws Project
Project ::= nl "{" "\"Project\":" ws "{" ws "\"name\":" ws string "," nl "\"description\":" ws string "," nl "\"project-url\":" ws project-url "," nl "\"team-members\":" ws TeamMember "," nl "\"task\":" ws Task "}" ws "}"
project-url ::= "https://"[0-9a-fA-F]*
assigned-to ::= ("Akshath"|"Raghav"|"Ravikiran")
ws ::= [ \t\n]
nl ::= [\n]
string ::=  "\"" (
            [^"\\] |
            "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
            )* "\""
TeamMember ::= nl "{" ws "\"name\":" ws string "," nl "\"role\":" ws string "}"
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)?
boolean ::= ("True" | "False")
TaskUpdate ::= nl "{" ws "\"update-time\":" ws number "," nl "\"comment\":" ws string "," nl "\"status\":" ws boolean "}"
array ::= "[" ws (
                due-date-value
                ("," ws due-date-value)*
            )? "]" ws
due-date-value ::= string
Task ::= nl "{" ws "\"title\":" ws string "," nl "

In [34]:
# Using llama.cpp, we can verify if our grammar string is accepted.
# If successful, no error is thrown. Unfortunately, llama-cpp-python prints out the syntax tree to stdout. 
GNBF.verify_grammar(grammar)

from_string grammar:
root ::= ws Project 
ws ::= [ <U+0009><U+000A>] 
Project ::= nl [{] ["] [P] [r] [o] [j] [e] [c] [t] ["] [:] ws [{] ws ["] [n] [a] [m] [e] ["] [:] ws string [,] nl ["] [d] [e] [s] [c] [r] [i] [p] [t] [i] [o] [n] ["] [:] ws string [,] nl ["] [p] [r] [o] [j] [e] [c] [t] [-] [u] [r] [l] ["] [:] ws project-url [,] nl ["] [t] [e] [a] [m] [-] [m] [e] [m] [b] [e] [r] [s] ["] [:] ws TeamMember [,] nl ["] [t] [a] [s] [k] ["] [:] ws Task [}] ws [}] 
nl ::= [<U+000A>] 
string ::= ["] string_13 ["] 
project-url ::= [h] [t] [t] [p] [s] [:] [/] [/] project-url_8 
TeamMember ::= nl [{] ws ["] [n] [a] [m] [e] ["] [:] ws string [,] nl ["] [r] [o] [l] [e] ["] [:] ws string [}] 
Task ::= nl [{] ws ["] [t] [i] [t] [l] [e] ["] [:] ws string [,] nl ["] [d] [e] [s] [c] [r] [i] [p] [t] [i] [o] [n] ["] [:] ws string [,] nl ["] [a] [s] [s] [i] [g] [n] [e] [d] [-] [t] [o] ["] [:] ws assigned-to [,] nl ["] [d] [u] [e] [-] [d] [a] [t] [e] ["] [:] ws array [,] nl ["] [u] [p] [d] [a] [t] [e] [s] 

<llama_cpp.llama_grammar.LlamaGrammar at 0x2ae9dd5c77a0>

In [35]:
grammar = GNBF(Project).generate_grammar('xml')
GNBF.verify_grammar(grammar)

grammar = GNBF(Project).generate_grammar('toml')
GNBF.verify_grammar(grammar) 

from_string grammar:
root ::= ws Project 
ws ::= [ <U+0009><U+000A>] 
Project ::= [<] [P] [r] [o] [j] [e] [c] [t] [>] ws [<] [n] [a] [m] [e] [>] ws string ws [<] [/] [n] [a] [m] [e] [>] ws [<] [d] [e] [s] [c] [r] [i] [p] [t] [i] [o] [n] [>] ws string ws [<] [/] [d] [e] [s] [c] [r] [i] [p] [t] [i] [o] [n] [>] ws [<] [p] [r] [o] [j] [e] [c] [t] [-] [u] [r] [l] [>] ws project-url ws [<] [/] [p] [r] [o] [j] [e] [c] [t] [-] [u] [r] [l] [>] ws [<] [t] [e] [a] [m] [-] [m] [e] [m] [b] [e] [r] [s] [>] ws TeamMember ws [<] [/] [t] [e] [a] [m] [-] [m] [e] [m] [b] [e] [r] [s] [>] ws [<] [t] [a] [s] [k] [>] ws Task ws [<] [/] [t] [a] [s] [k] [>] ws [<] [/] [P] [r] [o] [j] [e] [c] [t] [>] 
string ::= ["] string_13 ["] 
project-url ::= [h] [t] [t] [p] [s] [:] [/] [/] project-url_7 
TeamMember ::= [<] [n] [a] [m] [e] [>] ws string ws [<] [/] [n] [a] [m] [e] [>] ws [<] [r] [o] [l] [e] [>] ws string ws [<] [/] [r] [o] [l] [e] [>] 
Task ::= [<] [t] [i] [t] [l] [e] [>] ws string ws [<] [/] [t] [i] [t] [l]

<llama_cpp.llama_grammar.LlamaGrammar at 0x2ae9de7b7d70>