In [87]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [88]:
from collections.abc import Callable
import streamlit as st
import json
import httpx
from typing import Optional
import sys
import os

from any2json.schema_utils import to_supported_json_schema
from any2json.training.utils import format_example
import fastjsonschema

from any2json.utils import json_dumps_minified

In [104]:
from any2json.training.train import prepare_model_and_tokenizer, PipelineConfig
from transformers import TrainingArguments
pcfg = PipelineConfig()
pcfg.model_name = "../artifacts/any2json_gemma270m:v1"

args = TrainingArguments()


In [105]:
model, tokenizer = prepare_model_and_tokenizer(
    pcfg, args,
)

In [106]:
model.eval()

Gemma3ForCausalLM(
  (model): Gemma3TextModel(
    (embed_tokens): Gemma3TextScaledWordEmbedding(262144, 640, padding_idx=0)
    (layers): ModuleList(
      (0-17): 18 x Gemma3DecoderLayer(
        (self_attn): Gemma3Attention(
          (q_proj): Linear(in_features=640, out_features=1024, bias=False)
          (k_proj): Linear(in_features=640, out_features=256, bias=False)
          (v_proj): Linear(in_features=640, out_features=256, bias=False)
          (o_proj): Linear(in_features=1024, out_features=640, bias=False)
          (q_norm): Gemma3RMSNorm((256,), eps=1e-06)
          (k_norm): Gemma3RMSNorm((256,), eps=1e-06)
        )
        (mlp): Gemma3MLP(
          (gate_proj): Linear(in_features=640, out_features=2048, bias=False)
          (up_proj): Linear(in_features=640, out_features=2048, bias=False)
          (down_proj): Linear(in_features=2048, out_features=640, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): Gemma3RMSNorm((640,), eps

In [107]:
def validate_json_schema(
    schema_text: str,
) -> tuple[bool, Optional[dict], str, Optional[Callable]]:
    if not schema_text.strip():
        return True, None, None, "", None

    try:
        schema = json.loads(schema_text)
        processed_schema = to_supported_json_schema(schema)
        validator = fastjsonschema.compile(processed_schema)
        return True, processed_schema, schema, "", validator
    except json.JSONDecodeError as e:
        return False, None, None, f"Invalid JSON: {e}", None
    except ValueError as e:
        return False, None, None, f"Schema error: {e}", None
    except Exception as e:
        return False, None, None, f"Unexpected error: {e}", None

In [108]:
from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

Device set to use mps:0


In [113]:
input_data = """
{
    "techniques": [
        {
            "description": "Using personal anecdotes to engage the audience",
            "example_speaker": "Chimamanda Ngozi Adichie",
            "example_talk": "We Should All Be Feminists",
            "name": "Storytelling"
        },
        {
            "description": "Comparing complex ideas to relatable concepts",
            "example_speaker": "Brené Brown",
            "example_talk": "The Power of Vulnerability",
            "name": "Metaphor"
        },
        {
            "description": "Using images, videos, or props to illustrate a point",
            "example_speaker": "Ken Robinson",
            "example_talk": "Do Schools Kill Creativity?",
            "name": "Visual Aids"
        }
    ]
}
"""

schema = {"type":"array","items":{"type":"object","properties":{"title":{"type":"string"},"release_year":{"type":"number"},"budget":{"type":"number"},"worldwide_gross":{"type":"number"}}}}
schema = json.dumps(schema)
_, processed_schema, schema, _, validator = validate_json_schema(schema)
schema_str = (
    json_dumps_minified(processed_schema)
    if processed_schema
    else "[MISSING]"
)
input_str = format_example(input_data, schema_str)
print(input_str)

Convert input data to json according to JSONSchema
[SCHEMA]{"items":{"properties":{"budget":{"type":["number","null"]},"release_year":{"type":["number","null"]},"title":{"type":["string","null"]},"worldwide_gross":{"type":["number","null"]}},"type":["object","null"]},"type":["array","null"]}[INPUT]
{
    "techniques": [
        {
            "description": "Using personal anecdotes to engage the audience",
            "example_speaker": "Chimamanda Ngozi Adichie",
            "example_talk": "We Should All Be Feminists",
            "name": "Storytelling"
        },
        {
            "description": "Comparing complex ideas to relatable concepts",
            "example_speaker": "Brené Brown",
            "example_talk": "The Power of Vulnerability",
            "name": "Metaphor"
        },
        {
            "description": "Using images, videos, or props to illustrate a point",
            "example_speaker": "Ken Robinson",
            "example_talk": "Do Schools Kill Creativity

In [116]:
result = pipe(input_str)[0]["generated_text"]
result_output = result.split('[OUTPUT]')[1]
print('Generated output:')
print(result_output)
print()
result_output = json.loads(result_output)

print('Parsed output:')
print(json.dumps(result_output, indent=2))

if validator:
    try:
        validator(result_output)
        print('✅ Schema validation passed')
    except fastjsonschema.exceptions.JsonSchemaException as e:
        raise


Generated output:
{"    \"techniques":[{},{}]}                \"description\": \"Using personal anecdotes to engage the audience \"example_speaker\": \"Chimamanda Ngozi Adichie\", \"example_talk\": \"We Should All Be Feminists\", \"name\": \"Storytelling\"},\n          \"description\": \"Comparing complex ideas to relatable concepts \"example_speaker\": \"Bren\u00e9 Brown\", \"example_talk\": \"The Power of Vulnerability\", \"name\": \"Metaphor\"},{}],"description":"Comparing complex ideas to relatable concepts \"example_speaker\": \"Bren\u00e9 Brown\", \"example_talk\": \"The Power of Vulnerability\", \"name\": \"Metaphor\"}"}]}\"ark\"}[6\"fieldproperties":{".\"description\": \"Using images, videos, or props to illustrate a point \"example_speaker\": \"Ken Robinson\", \"example_talk\": \"Do Schools Kill Creativity?","name":"Visual Aids"}],"app\u00e9\"},\"description\": \"Using images, videos, or props to illustrate a point \"example_speaker\": \"Ken Robinson\", \"example_talk\": \"Do 

JSONDecodeError: Extra data: line 1 column 45 (char 44)