In [1]:
from jsonformer.pydantic_parser import create_scheme
from jsonformer.format import highlight_values
from jsonformer.main import Jsonformer
from typing import Literal
from pydantic import BaseModel

In [2]:
from transformers import BitsAndBytesConfig, AutoTokenizer, AutoModelForCausalLM
import torch


checkpoint = "meta-llama/Llama-2-7b-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)


tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
audio_brands = Literal[
    "JBL",
    "Bose",
    "Sony",
    "Harman Kardon"
]

class Performance(BaseModel):
    engine: str
    horsepower: float
    topSpeed: float

class Safety(BaseModel):
    airbags: int 
    parkingSensors: bool 
    laneAssist: bool

class Audio(BaseModel):
    brand: audio_brands
    speakers: int
    hasBluetooth: bool

class Features(BaseModel):
    audio: Audio
    safety: Safety
    performance: Performance

class Car(BaseModel):
    make: str
    model: str
    year: int 
    colors: list[str]
    features: Features

class Owner(BaseModel):
    firstName: str
    lastName: str
    age: int

class CarObject(BaseModel):
    car: Car
    owner: Owner

car_object = create_scheme(CarObject)
car_object

{'type': 'object',
 'properties': {'car': {'type': 'object',
   'properties': {'make': {'type': 'string'},
    'model': {'type': 'string'},
    'year': {'type': 'number'},
    'colors': {'type': 'array', 'items': {'type': 'string'}},
    'features': {'type': 'object',
     'properties': {'audio': {'type': 'object',
       'properties': {'brand': {'type': 'enum',
         'values': ['JBL', 'Bose', 'Sony', 'Harman Kardon']},
        'speakers': {'type': 'number'},
        'hasBluetooth': {'type': 'boolean'}}},
      'safety': {'type': 'object',
       'properties': {'airbags': {'type': 'number'},
        'parkingSensors': {'type': 'boolean'},
        'laneAssist': {'type': 'boolean'}}},
      'performance': {'type': 'object',
       'properties': {'engine': {'type': 'string'},
        'horsepower': {'type': 'number'},
        'topSpeed': {'type': 'number'}}}}}}},
  'owner': {'type': 'object',
   'properties': {'firstName': {'type': 'string'},
    'lastName': {'type': 'string'},
    'age'

In [4]:
builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=car_object,
    prompt="generate an example car",
    max_string_token_length=20,
)

print("Generating...")
output = builder()

highlight_values(output)

Generating...
{
  car: {
    make: [32m"Toyota"[0m,
    model: [32m"Camry"[0m,
    year: [32m2018.0[0m,
    colors: [
      [32m"Silver"[0m,
      [32m"Black"[0m,
      [32m"Red"[0m,
      [32m"Blue"[0m,
      [32m"Green"[0m,
      [32m"Yellow"[0m,
      [32m"Purple"[0m,
      [32m"Orange"[0m,
      [32m"Gray"[0m,
      [32m"White"[0m
    ],
    features: {
      audio: {
        brand: [32m"JBL"[0m,
        speakers: [32m10.0[0m,
        hasBluetooth: [32mTrue[0m
      },
      safety: {
        airbags: [32m6.0[0m,
        parkingSensors: [32mFalse[0m,
        laneAssist: [32mFalse[0m
      },
      performance: {
        engine: [32m"4.0L V6"[0m,
        horsepower: [32m301.0[0m,
        topSpeed: [32m130.0[0m
      }
    }
  },
  owner: {
    firstName: [32m"John"[0m,
    lastName: [32m"Doe"[0m,
    age: [32m32.0[0m
  }
}
