In [1]:
! pip show accelerate

Name: accelerate
Version: 0.26.1
Summary: Accelerate
Home-page: https://github.com/huggingface/accelerate
Author: The HuggingFace team
Author-email: sylvain@huggingface.co
License: Apache
Location: /Users/calebcourier/Projects/jsonformer/.venv/lib/python3.12/site-packages
Requires: huggingface-hub, numpy, packaging, psutil, pyyaml, safetensors, torch
Required-by: 


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer

print("Loading model and tokenizer...")
model_name = "databricks/dolly-v2-3b"
model = AutoModelForCausalLM.from_pretrained(model_name, use_cache=True, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, use_cache=True)
print("Loaded model and tokenizer")

  from .autonotebook import tqdm as notebook_tqdm
W0915 16:28:07.600000 92212 torch/distributed/elastic/multiprocessing/redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


Loading model and tokenizer...
Loaded model and tokenizer


In [3]:
from jsonformer.format import highlight_values
from jsonformer.main import Jsonformer

ecomm = {
    "type": "object",
    "properties": {
        "store": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "location": {"type": "string"},
                "inventory": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "productId": {"type": "string"},
                            "name": {"type": "string"},
                            "description": {"type": "string"},
                            "category": {"type": "string"},
                            "price": {"type": "number"},
                            "inStock": {"type": "boolean"},
                            "rating": {"type": "number"},
                            "images": {"type": "array", "items": {"type": "string"}},
                        },
                    },
                },
            },
        }
    },
}


builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=ecomm,
    prompt="write a description about mike's ski shop which sells premium skis and snowboards",
    max_string_token_length=20,
)

print("Generating...")
output = builder()

highlight_values(output)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generating...
{
  store: {
    name: [32m"Mike's Ski Shop"[0m,
    location: [32m"Somewhere"[0m,
    inventory: [
      {
        productId: [32m"1234567890"[0m,
        name: [32m"Mike's Ski Shop"[0m,
        description: [32m"Ski Shop"[0m,
        category: [32m"Ski Shop"[0m,
        price: [32m20.09999[0m,
        inStock: [32mTrue[0m,
        rating: [32m5.09999[0m,
        images: [
          [32m"http://www.example.com/images/ski.png"[0m,
          [32m"http://www.example.com/images/ski.png"[0m
        ]
      },
      {
        productId: [32m"9876543210"[0m,
        name: [32m"Mike's Ski Shop"[0m,
        description: [32m"Ski Shop"[0m,
        category: [32m"Ski Shop"[0m,
        price: [32m20.09999[0m,
        inStock: [32mTrue[0m,
        rating: [32m5.09999[0m,
        images: [
          [32m"http://www.example.com/images/ski.png"[0m
        ]
      },
      {
        productId: [32m"9876543211"[0m,
        name: [32m"Mike's Ski Sho

In [4]:
car = {
    "type": "object",
    "properties": {
        "make": {"type": "string"},
        "model": {"type": "string"},
        "year": {"type": "number"},
        "colors_available": {
            "type": "array",
            "items": {"type": "string"},
        },
    },
}

builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=car,
    prompt="generate an example car",
)

print("Generating...")
output = builder()

highlight_values(output)


Generating...
{
  make: [32m"audi"[0m,
  model: [32m"a4"[0m,
  year: [32m2016.0[0m,
  colors_available: [
    [32m"blue"[0m
  ]
}


In [5]:
complex_car = {"type": "object", "properties": {"car": {"type": "object", "properties": {"make": {"type": "string"}, "model": {"type": "string"}, "year": {"type": "number"}, "colors": {"type": "array", "items": {"type": "string"}}, "features": {"type": "object", "properties": {"audio": {"type": "object", "properties": {"brand": {"type": "string"}, "speakers": {"type": "number"}, "hasBluetooth": {"type": "boolean"}}}, "safety": {"type": "object", "properties": {"airbags": {"type": "number"}, "parkingSensors": {"type": "boolean"}, "laneAssist": {"type": "boolean"}}}, "performance": {"type": "object", "properties": {"engine": {"type": "string"}, "horsepower": {"type": "number"}, "topSpeed": {"type": "number"}}}}}}}, "owner": {"type": "object", "properties": {"firstName": {"type": "string"}, "lastName": {"type": "string"}, "age": {"type": "number"}}}}}
builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=complex_car,
    prompt="generate an example Rolls Royce Phantom",
)

print("Generating...")
output = builder()

highlight_values(output)


Generating...
{
  car: {
    make: [32m"Rolls Royce"[0m,
    model: [32m"Phantom"[0m,
    year: [32m2016.0[0m,
    colors: [
      [32m"Gold"[0m
    ],
    features: {
      audio: {
        brand: [32m"Mercedes"[0m,
        speakers: [32m2.0[0m,
        hasBluetooth: [32mTrue[0m
      },
      safety: {
        airbags: [32m2.0[0m,
        parkingSensors: [32mTrue[0m,
        laneAssist: [32mTrue[0m
      },
      performance: {
        engine: [32m"Mercedes-Benz OM615"[0m,
        horsepower: [32m350.0[0m,
        topSpeed: [32m220.0[0m
      }
    }
  },
  owner: {
    firstName: [32m"John"[0m,
    lastName: [32m"Doe"[0m,
    age: [32m40.0[0m
  }
}
