In [1]:
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import Any, List, Tuple, Type, Optional, Union
import os
import json

In [2]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

## Helpers!

In [3]:
class LLM:
    def __init__(self):
        self.client = OpenAI(
            api_key=os.environ["OPENAI_API_KEY"],
        )

    def invoke(self, config: dict):
        with PromptContextManager(config) as filled_prompt:
            return self.request(filled_prompt, temperature=0.01)

    def request(self, prompt, temperature=0.2, context=None):
        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
        )
        return response.choices[0].message.content


llm = LLM()

In [4]:
def log(msg):
    print(msg)
    print("-------------")

# Making Prompts!

In [5]:
from grammarflow.prompt.builder import PromptBuilder  # Prompt builder

You can using PromptBuilder to make your own prompt templates. Otherwise, you can just pass in your prompt as a string and `constrain` does the needful. 

If using PromptBuilder, you will add sections within your prompt. Each section takes these attributes: 
- text: str, Fixed text; Can have placeholders, but needs to be specified with `placeholder` attribute.
- placeholder: str, Exact `placeholder` identifiers in `text`.
- define_grammar: True/False, Can be used only once in the template. Defines the section where grammar will be explained.
- add_few_shot_examples: True/False, Adds a filled-up Pydantic model to the prompt aligning with the grammar format.

In [6]:
# Sample Llama Prompt

llama_prompt = PromptBuilder()
llama_prompt.add_section(
    text="<s>[INST] <<SYS>>\n{system_context}\n<</SYS>>",
    placeholders="system_context",
    define_grammar=True,
)
llama_prompt.add_section(
    text="{user_message}[/INST]",
    placeholders="user_message",
)

# Using Constrain:

In [7]:
from grammarflow.constrain import Constrain  # Main class

1. Begin by creating a `Constrain` object with a previously defined `prompt_config`. You can instead, just pass in a string which contains your prompt. 

2. Use `set_config` method on the `Constrain` instance to specify the output format and how the responses should be structured. In this case:
   - `format`: Specifies the output format, here set to 'json'. Can be 'XML' or 'TOML' 
   - `return_sequence`: Determines how responses are returned. Can be 'single_response' or 'multiple_response'. 

3. The `format_prompt` method is used to assemble the final prompt using placeholders and task-specific configurations:
   - `placeholders`: Needs to be a dict of {'placeholder': 'string'}. Required, if your prompt template contains placeholders. 
   - `grammars`: A list of grammars. Each task includes:
       - `description`: A brief explanation of what the task entails or aims to achieve. Optional.
       - `model`: Needs to be an empty pydantic model. 

4. Upon uisng `format_prompt`, `manager` holds the final prompt. You can get it by calling `manager.prompt`

5. `manager.parse(llm_response)` takes in the response string and outputs the parsed object. It will return `Response` object, which is a simple wrapper for easy accessing of nested fields. 

5. You can view the inflation rate of tokens using `manager.inflation_rate()`

In [8]:
user_message = ""
system_context = ""


class Model(BaseModel):
    model_name: str


with Constrain(llama_prompt) as manager:
    manager.set_config(
        format='json',
        return_sequence='single_response'
    )

    manager.format_prompt(placeholders={'user_message': user_message,
                          'system_context': system_context},
                          grammars=[{
                              'model': [Model]},
    ]
    )

    prompt = manager.prompt

# Examples! 

In [9]:
class Annotations(BaseModel): 
    materials: List[str] = Field(..., description="ADD SOMETHING")
    conditions: List[str]= Field(..., description="ADD SOMETHING")
    parameters: List[str]= Field(..., description="ADD SOMETHING")
    processes: List[str]= Field(..., description="ADD SOMETHING")

In [10]:
prompt = PromptBuilder() 
prompt.add_section(
  text="""
Your role is that of a DATA ANNOTATOR for research paper abstracts. You are expected to identify the materials used, different processes involved (such as types of chromatography, purification, methods, preparation, study, etc), and conditions (temperature units, quantity units, mathematical units, percentages, coefficients, any numbers) and any parameters- like factor names, rate, yield, etc mentioned in the abstract. 

I want you to look at the abstract given below and return all the key phrases you find. 
  """
) 
prompt.add_section(
  define_grammar = True
)
prompt.add_section(
  text="""
Here is an example: 

Abstract: Of the three particle sizes studied (10µm, 20µm, 50µm) only 10µm silica resin was able to produce purified API at the yield (>96%) and productivity (> 1kg/kg-resin/day) necessitated by the project. The second case study uses DoE studies to identify critical process parameters of column load, mobile phase solvent ratio and basic modifier level for a low-resolution, preparative, chiral separation.
Annotations: 
```xml
<materials>['silica resin'] </materials>
<conditions>['10µm, 20µm, 50µm',  'only 10µm silica resin, ' yield (>96%)', 'productivity (> 1kg/kg-resin/day)',]</conditions> 
<parameters>[ ' column load', 'mobile phase solvent ratio']</parameters>
<processes>[ 'chiral separation']</processes>
```

Every str object within the list you return for each of the tags must contain at least 2 words and not exceed 8 words. Remember that your role is to automate the data annotation process for  a chemistry based project. 

Begin!
"""
)
prompt.add_section(
  text="Abstract: {abstract}\nAnnotations:", 
  placeholders=["abstract"]
)

In [11]:
abstract = """ The simultaneous determination of multi-mycotoxins in food commodities are highly desirable due to their potential toxic effects and mass consumption of foods. Herein, liquid chromatography-quadrupole exactive orbitrap mass spectrometry was proposed to analyze multi-mycotoxins in commercial vegetable oils. Specifically, the method featured a successive liquid–liquid extraction process, in which the complementary solvents consisted of acetonitrile and water were optimized. Resultantly, matrix effects were reduced greatly. External calibration approach revealed good quantification property for each analyte. Under optimal conditions, the recovery ranging from 80.8% to 109.7%, relative standard deviation less than 11.7%, and good limit of quantification (0.35 to 45.4ng/g) were achieved. The high accuracy of proposed method was also validated. The detection of 20 commercial vegetable oils revealed that aflatoxins B1 and B2, zearalenone were observed in 10 real samples. The as-developed method is simple and low-cost, which merits the wide applications for scanning mycotoxins in oil matrices."""

In [12]:
with Constrain(prompt) as manager:
    manager.set_config(
        format='xml',
        return_sequence='single_response'
    )

    manager.format_prompt(placeholders={
        "abstract": abstract}, 
                          grammars=[{
                              'model': [Annotations]},
    ]
    )

    prompt = manager.prompt
    llm_response = llm.request(prompt)
    response = manager.parse(llm_response)

In [13]:
print(response)

{
    "Annotations": {
        "materials": "['commercial vegetable oils', 'acetonitrile', 'water']",
        "conditions": "['liquid chromatography-quadrupole exactive orbitrap mass spectrometry', 'successive liquid\u2013liquid extraction process', 'acetonitrile and water were optimized', 'under optimal conditions', 'good limit of quantification (0.35 to 45.4ng/g)']",
        "parameters": "['recovery ranging', 'relative standard deviation', 'limit of quantification']",
        "processes": "['liquid chromatography-quadrupole exactive orbitrap mass spectrometry', 'liquid\u2013liquid extraction process', 'external calibration approach']"
    }
}


In [16]:
response.Annotations.conditions

"['liquid chromatography-quadrupole exactive orbitrap mass spectrometry', 'successive liquid–liquid extraction process', 'acetonitrile and water were optimized', 'under optimal conditions', 'good limit of quantification (0.35 to 45.4ng/g)']"

In [14]:
raise ValueError("Stop here")

ValueError: Stop here

In [None]:
def check_previous_interaction(id_): return id_ > 1

In [None]:
class Step(BaseModel):
    thought: str = Field(..., description="This should concisely explain what you want to know for your goal.")
    action: str = Field(..., description="Your options: \
'load_md_file' (Provide the name of the file you want to load. Eg: 'README.md') | 'get_link_from_filename' (Provide the name (can be incomplete) of the file, and get it's link).")
    action_input: str = Field(..., description="The input for the action you want to take. Eg: 'README.md' | 'readme'")
    
prompt = PromptBuilder() 
prompt.add_section(
  text="Your role is that of a {role}. In this ongoing conversation, your goal is to {goal}.\nYour final result should contain {deliverables}.", 
  placeholders=["role", "goal", "deliverables"]
)
prompt.add_section(
  define_grammar=True
) 
prompt.add_section(
  text="\nIn our previous interaction, you wanted to {thought} using {action}. You observed: {observation}.",
  placeholders=["thought", "action", "observation"], 
  enable_on=check_previous_interaction
)
prompt.add_section(
  text="Create the next Step in the conversation. Think through your reasoning and the action you want to take. Ensure that you are progressing towards your goal.",
  enable_on=check_previous_interaction
)
prompt.add_section(
  text="Below is the history of the conversation so far.\n{hsitory}\n",
  placeholders=["history"],
  enable_on=check_previous_interaction
) 

In [None]:
role = "software developer trying to reproduce a codebase"
goal = "create a roadmap to set up the environment a github repository"
deliverables = "the links to the files needed in each step of your roadmap, and the code for each step"
thought = None 
action = None
observation = None
history = None

id_ = 0

In [None]:
with Constrain(prompt) as manager: 
  manager.set_config(
    format='xml'
  ) 
  manager.format_prompt(
    placeholders={ 
      "role": role, 
      "goal": goal, 
      "deliverables": deliverables,
      "thought": thought, 
      "action": action,
      "observation": observation,
      "history": history
    }, 
    grammars=[{
      'description': 'Your thinking state', 
      'model': Step
    }], 
    enable_on={
      'id_':id_ 
    }
  ) 
  
  print(manager.prompt)
  # resp = llm(manager.prompt, temperature=0.01)

  # thought = response.Step.thought 
  # action = response.Step.action
  # action_input = response.Step.action_input

  # if action == "load_md_file":  
  #   git.find_files(action_input)
  # elif action == "get_link_from_filename":
  #   git.find_files(action_input)
  #   git.get_file_url(action_input)

Your role is that of a software developer trying to reproduce a codebase. In this ongoing conversation, your goal is to create a roadmap to set up the environment a github repository.
Your final result should contain the links to the files needed in each step of your roadmap, and the code for each step.

Here is the XML output format you are expected to return your response in.

Your thinking state
```
<Step>
<thought> #string# </thought> # This should concisely explain what you want to know for your goal.
<action> #string# </action> # Your options: 'load_md_file' (Provide the name of the file you want to load. Eg: 'README.md') | 'get_link_from_filename' (Provide the name (can be incomplete) of the file, and get it's link).
<action_input> #string# </action_input> # The input for the action you want to take. Eg: 'README.md' | 'readme'
</Step>

```

RETURN ONLY ONE OF <Step>. DO NOT FORGET TO COVER YOUR OUTPUTS WITH '```'.


In [None]:
raise Exception()

Exception: 

In [None]:
# Here's a simple example of asking an LLM to make code.
# This can be used within coding assistants which requires extra metadata.


class FunctionModel(BaseModel):
    function_name: str
    docstring: str
    depedencies: List[str]
    uuid: Union[float, int]
    is_python: bool
    code: str


input_str = "I want to create a function that returns the fibonacci sequence. The function should be called 'fib'. The function can use numpy."

with Constrain(input_str) as manager:
    manager.set_config(format="xml", return_sequence="single_response")

    manager.format_prompt(
        grammars=[{"description": "No Code Generation", "model": FunctionModel}]
    )

    prompt = manager.prompt

    llm_response = llm.request(prompt, temperature=0.01)
    log(llm_response)

    response = manager.parse(llm_response)
    log(response)

    log(manager.inflation_rate())

```
<FunctionModel>
<function_name>fib</function_name>
<docstring>Function to generate the Fibonacci sequence</docstring>
<depedencies>numpy</depedencies>
<uuid>123456789</uuid>
<is_python>true</is_python>
<code>
def fib(n):
    import numpy as np
    a, b = 0, 1
    result = []
    for _ in range(n):
        result.append(a)
        a, b = b, a + b
    return np.array(result)
</code>
</FunctionModel>
```
-------------
{
    "FunctionModel": {
        "function_name": "fib",
        "docstring": "Function to generate the Fibonacci sequence",
        "depedencies": "numpy",
        "uuid": 123456789,
        "is_python": true,
        "code": "def fib(n):    import numpy as np    a, b = 0, 1    result = []    for _ in range(n):        result.append(a)        a, b = b, a + b    return np.array(result)"
    }
}
-------------
{'before': 27, 'after': 146, 'factor': '4.4x'}
-------------


In [None]:
# The response will be of `Response` type, which can be used to extract the data. If adding the parsed response to this object fails, it will return the dict itself.
print(response.FunctionModel.is_python)

True


In [None]:
# Add some examples too!

Sum_Function_Model = FunctionModel(
    function_name="sum",
    docstring="This function returns the sum of the input list.",
    depedencies=["numpy"],
    uuid=123456789,
    is_python=True,
    code="def sum(a, b):\n\treturn a + b"
)


with Constrain(input_str) as manager:
    manager.set_config(
        format='xml',
        return_sequence='single_response'
    )

    manager.format_prompt(
        grammars=[
            {
                'description': 'No Code Generation',
                'model': FunctionModel
            }
        ],
        examples=[
            {
                'query': "Create a summation function in Python",
                'model': Sum_Function_Model
            }
        ]
    )

    prompt = manager.prompt

    llm_response = llm.request(prompt, temperature=0.01)
    log(llm_response)

    response = manager.parse(llm_response)
    log(response)

    log(manager.inflation_rate())

```
<FunctionModel>
<function_name> fib </function_name>
<docstring> This function returns the Fibonacci sequence up to the input number. </docstring>
<depedencies> ['numpy'] </depedencies>
<uuid> 987654321 </uuid>
<is_python> True </is_python>
<code> def fib(n):
    a, b = 0, 1
    result = []
    while a < n:
        result.append(a)
        a, b = b, a + b
    return result </code>
</FunctionModel>
```
-------------
{
    "FunctionModel": {
        "function_name": "fib",
        "docstring": "This function returns the Fibonacci sequence up to the input number.",
        "depedencies": "['numpy']",
        "uuid": 987654321,
        "is_python": true,
        "code": "def fib(n):    a, b = 0, 1    result = []    while a < n:        result.append(a)        a, b = b, a + b    return result"
    }
}
-------------
{'before': 27, 'after': 252, 'factor': '8.3x'}
-------------


In [None]:
# Sample ReAct Model with Llama Prompt
# You can add descriptions within the grammar model to provide it's context and options. This is how we use the LLM in https://github.com/e-lab/Forestry_Student/.

class ThoughtState(BaseModel):
    thought: str
    goal: str
    tool: str = Field(...,
                      description="Choose one of ['Web_QA', 'Web_Search', 'Web_Scraping', 'Web_Automation', 'Web_Research']")
    action: str = Field(...,
                        description="Choose one of ['Create', 'Update', 'Delete', 'Read']")
    action_input: str = Field(..., description="The input data for the action")
    thought_id: Optional[str] = Field(
        None, description="The unique identifier for the thought")


system_context = """Your goal is to think and plan out how to solve questions using agent tools provided to you. Think about all aspects of your thought process."""
user_message = """Who is Vladmir Putin?"""

with Constrain(llama_prompt) as manager:
    manager.set_config(
        format='json',
        return_sequence='single_response'
    )

    manager.format_prompt(placeholders={
                          'user_message': user_message,
                          'system_context': system_context
                          },
                          grammars=[{
                              'description': 'This format describes your current thinking state',
                              'model': [ThoughtState]},
    ]
    )

    prompt = manager.prompt

    llm_response = llm.request(prompt, temperature=0.01)
    log(llm_response)

    response = manager.parse(llm_response)
    log(response)

    log(manager.inflation_rate())

```json
{
"ThoughtState": {
"thought": "Vladimir Putin is the President of Russia.",
"goal": "To provide information about Vladimir Putin.",
"tool": "Web_Search",
"action": "Read",
"action_input": "Vladimir Putin",
"thought_id": "12345"
    }
}
```
-------------
{
    "ThoughtState": {
        "thought": "Vladimir Putin is the President of Russia.",
        "goal": "To provide information about Vladimir Putin.",
        "tool": "Web_Search",
        "action": "Read",
        "action_input": "Vladimir Putin",
        "thought_id": "12345"
    }
}
-------------
{'before': 115, 'after': 277, 'factor': '1.4x'}
-------------


In [None]:
# You can then access the response from the `response` object
print(response.ThoughtState.tool)

Web_Search


In [None]:
# You can add complex layers of grammars. You add even using Optional and Union types.

class TeamMember(BaseModel):
    name: str
    role: str


class TaskUpdate(BaseModel):
    update_time: float
    comment: Optional[str] = None
    status: bool


class Task(BaseModel):
    title: str
    description: str
    assigned_to: List[TeamMember]
    due_date: List[str]
    updates: List[TaskUpdate]


class Project(BaseModel):
    name: str
    description: str
    project_url: Optional[str] = None
    team_members: List[TeamMember]
    grammars: Task


with Constrain(llama_prompt) as manager:
    manager.set_config(
        format='xml',
        return_sequence='single_response'
    )

    system_context = """You are a project manager and you are responsible for managing a project. You have to manage the project, it's grammars and other aspects. Ensure that you fill out all required fields."""
    user_message = """Make me a project plan for a new project on multimodal document understanding projct."""

    manager.format_prompt(placeholders={'user_message': user_message,
                          'system_context': system_context},
                          grammars=[{
                              'description': 'This format elaborates on the project and its grammars.',
                              'model': [Project]},
    ]
    )

    prompt = manager.prompt

    llm_response = llm.request(prompt, temperature=0.01)
    log(llm_response)

    response = manager.parse(llm_response)
    log(response)

    log(manager.inflation_rate())

``` 
<Project>
<name> Multimodal Document Understanding Project </name>
<description> This project aims to develop a system that can understand and analyze multimodal documents, including text, images, and videos. </description>
<project_url> www.multimodalproject.com </project_url>
<team_members>
    <TeamMember>
        <name> John Doe </name>
        <role> Project Manager </role>
    </TeamMember>
    <TeamMember>
        <name> Jane Smith </name>
        <role> Data Scientist </role>
    </TeamMember>
</team_members>
<grammars>
    <Task>
        <title> Develop text analysis module </title>
        <description> Implement natural language processing techniques to extract key information from text documents. </description>
        <assigned_to>
            <TeamMember>
                <name> Jane Smith </name>
                <role> Data Scientist </role>
            </TeamMember>
        </assigned_to>
        <due_date> 2022-10-15 </due_date>
        <updates>
            <TaskU

In [None]:
# The Response object allows for easy access
print(response.Project.grammars)

{
    "Task": [
        {
            "title": "Develop text analysis module",
            "description": "Implement natural language processing techniques to extract key information from text documents.",
            "assigned_to": {
                "TeamMember": {
                    "name": "Jane Smith",
                    "role": "Data Scientist"
                }
            },
            "due_date": "2022-10-15",
            "updates": {
                "TaskUpdate": {
                    "update_time": 1633660800,
                    "comment": "Completed initial data preprocessing",
                    "status": true
                }
            }
        },
        {
            "title": "Develop image analysis module",
            "description": "Implement computer vision algorithms to analyze images and extract relevant features.",
            "assigned_to": {
                "TeamMember": {
                    "name": "John Doe",
                    "role": "Project Mana

In [None]:
# You can add multiple grammars to the same prompt. NOT RECOMMENDED.

class EventIdea(BaseModel):
    event_name: str
    event_description: str
    event_duration: str


class BudgetPlan(BaseModel):
    budget: float
    items: List[str]
    prices: List[int]
    total_cost: int


class EventSchedule(BaseModel):
    event_name: str
    event_time: float
    event_duration: str


prompt = "I am hosting a birthday party for my girlfriend tomorrow. I want to buy a cake, balloons, some roses and ice cream. I have a budget of 500$. Can you create a sample event schedule and budget plan for me?."

with Constrain(prompt) as manager:
    manager.set_config(
        format="toml",
        return_sequence="multi_response",
    )

    manager.format_prompt(
        grammars=[
            {"task_description": "Brainstorming Event Ideas", "model": EventIdea},
            {
                "task_description": "Budget Planning And Activity Planning",
                "model": [BudgetPlan, EventSchedule],
            },
        ],
    )

    prompt = manager.prompt

    llm_response = llm.request(prompt, temperature=0.01)
    log(llm_response)

    response = manager.parse(llm_response)
    log(response)

    log(manager.inflation_rate())

```
[EventIdea]
event_name = "Birthday Party for Girlfriend"
event_description = "A special celebration for my girlfriend's birthday"
event_duration = "3 hours"

[BudgetPlan]
budget = 500
items = ["Cake", "Balloons", "Roses", "Ice Cream"]
prices = [50, 20, 30, 40]
total_cost = 140

[EventSchedule]
event_name = "Birthday Party for Girlfriend"
event_time = 12
event_duration = "3 hours"
```
-------------
{
    "EventIdea": [
        {
            "event_name": "Birthday Party for Girlfriend",
            "event_description": "A special celebration for my girlfriend's birthday",
            "event_duration": "3 hours"
        }
    ],
    "BudgetPlan": [
        {
            "budget": 500,
            "items": [
                "Cake",
                "Balloons",
                "Roses",
                "Ice Cream"
            ],
            "prices": [
                50,
                20,
                30,
                40
            ],
            "total_cost": 140
        }
   

# Grammars

> "GBNF (GGML BNF) is a format for defining formal grammars to constrain model outputs in llama.cpp. For example, you can use it to force the model to generate valid JSON, or speak only in emojis."

Read more about it here: https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md

In [None]:
from grammarflow.grammars.gnbf import GNBF

In [None]:
grammar = GNBF(Project).generate_grammar()

In [None]:
log(grammar)

root ::= project ws
project ::= "{" ws "\"name\":" ws string "," ws "\"description\":" ws string "," ws "\"project-url\":" ws string "," ws "\"team-members\":" ws teammember "," ws "\"grammars\":" ws grammars "}" ws
ws ::= [ \t\n]*
string ::=  "\"" (
            [^"\\] |
            "\\" (["\\/bfnrt] | "u" [0-9a-fa-f] [0-9a-fa-f] [0-9a-fa-f] [0-9a-fa-f])
            )* "\""
teammember ::= "{" ws "\"name\":" ws string "," ws "\"role\":" ws string "}" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([ee] [-+]? [0-9]+)?
taskupdate ::= "{" ws "\"update-time\":" ws number "," ws "\"comment\":" ws string "," ws "\"status\":" ws status "}" ws
array ::= "[" ws (
                due-date-value
                ("," ws due-date-value)*
            )? "]" ws
due-date-value ::= string
task ::= "{" ws "\"title\":" ws string "," ws "\"description\":" ws string "," ws "\"assigned-to\":" ws teammember "," ws "\"due-date\":" ws array "," ws "\"updates\":" ws taskupdate "}" ws
-------------


In [None]:
# Using llama.cpp, we can verify if our grammar string is accepted.
GNBF.verify_grammar(grammar)

from_string grammar:
root ::= project ws 
project ::= [{] ws ["] [n] [a] [m] [e] ["] [:] ws string [,] ws ["] [d] [e] [s] [c] [r] [i] [p] [t] [i] [o] [n] ["] [:] ws string [,] ws ["] [p] [r] [o] [j] [e] [c] [t] [-] [u] [r] [l] ["] [:] ws string [,] ws ["] [t] [e] [a] [m] [-] [m] [e] [m] [b] [e] [r] [s] ["] [:] ws teammember [,] ws ["] [g] [r] [a] [m] [m] [a] [r] [s] ["] [:] ws grammars [}] ws 
ws ::= ws_6 
string ::= ["] string_9 ["] 
teammember ::= [{] ws ["] [n] [a] [m] [e] ["] [:] ws string [,] ws ["] [r] [o] [l] [e] ["] [:] ws string [}] ws 
print_grammar: error printing grammar: malformed rule, does not end with LLAMA_GRETYPE_END: 5



<llama_cpp.llama_grammar.LlamaGrammar at 0x7fc102ff2c70>