Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions docs/specs/SPEC-SCHEMA-IMPL.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class SchemaDefinition:
version: int # Schema version
fields: list[SchemaField] # Parsed fields
validation_mode: str # "warn" | "strict" | "off"
frontmatter_fields: list[SchemaField] # From settings.frontmatter (default: [])


def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
Expand Down Expand Up @@ -145,14 +146,16 @@ class ValidationResult:
async def validate_note(
note: Note,
schema: SchemaDefinition,
frontmatter: dict | None = None,
) -> ValidationResult:
"""Validate a note against a schema definition.

Mapping rules:
- field: string → observation [field] exists
- field?(array): type → multiple [field] observations
- field?: EntityType → relation 'field [[...]]' exists
- field?(enum): [v] → observation [field] value ∈ enum values
- field: string → observation [field] exists
- field?(array): type → multiple [field] observations
- field?: EntityType → relation 'field [[...]]' exists
- field?(enum): [v] → observation [field] value ∈ enum values
- settings.frontmatter field → frontmatter key presence/value
"""
```

Expand Down
30 changes: 30 additions & 0 deletions docs/specs/SPEC-SCHEMA.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ authors to learn.
| `field?(array): EntityType` | Multiple `field` relations | `- authored [[Book]]` (×N) |
| `tags` | Frontmatter `tags` array | `tags: [startups, essays]` |
| `field?(enum): [values]` | Observation `[field] value` where value ∈ set | `- [status] active` |
| `settings.frontmatter` field | Frontmatter key presence/value | `tags: [python, ai]` |

### Key Insight

Expand All @@ -99,6 +100,9 @@ schema:
expertise?(array): string, areas of knowledge
settings:
validation: warn # warn | strict | off
frontmatter:
tags?(array): string, note categories
status?(enum): [draft, review, published]
---

# Person
Expand Down Expand Up @@ -230,6 +234,32 @@ $ bm schema validate people/ada-lovelace.md
"Unmatched" items are informational — observations and relations the schema doesn't cover.
They're valid. Schemas are a subset, not a straitjacket.

### Frontmatter Validation

Schema notes can declare validation rules for frontmatter keys under `settings.frontmatter`
using the same Picoschema syntax as the `schema` block:

```yaml
settings:
validation: warn
frontmatter:
tags?(array): string
status?(enum): [draft, review, published]
```

- Frontmatter rules use the same Picoschema key syntax (`?` for optional, `(enum)`, `(array)`)
- Only available on schema notes (inline schemas skip frontmatter validation)
- Checks key presence (required vs optional) and enum value membership
- Unmatched frontmatter keys not in the schema are silently ignored
- Missing required frontmatter keys produce a warning (or error in strict mode)

Example output for a missing required frontmatter key:

```
⚠ Person schema validation:
- Missing required frontmatter key: status
```

### Batch Validation

```
Expand Down
2 changes: 2 additions & 0 deletions src/basic_memory/api/v2/routers/schema_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ async def search_fn(query: str) -> list[dict]:
schema_def,
_entity_observations(entity),
_entity_relations(entity),
frontmatter=frontmatter,
)
results.append(_to_note_validation_response(result))

Expand Down Expand Up @@ -149,6 +150,7 @@ async def search_fn(query: str) -> list[dict]:
schema_def,
_entity_observations(entity),
_entity_relations(entity),
frontmatter=frontmatter,
)
results.append(_to_note_validation_response(result))

Expand Down
11 changes: 11 additions & 0 deletions src/basic_memory/schema/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class SchemaDefinition:
version: int # Schema version
fields: list[SchemaField] # Parsed fields
validation_mode: str # "warn" | "strict" | "off"
frontmatter_fields: list[SchemaField] = field(default_factory=list) # From settings.frontmatter


# --- Built-in scalar types ---
Expand Down Expand Up @@ -228,9 +229,19 @@ def parse_schema_note(frontmatter: dict) -> SchemaDefinition:

fields = parse_picoschema(schema_dict)

# --- Frontmatter validation rules ---
# Trigger: settings.frontmatter is a dict of Picoschema field declarations
# Why: allows schema notes to validate frontmatter keys (tags, status, etc.)
# Outcome: frontmatter_fields populated using same parser as schema fields
frontmatter_dict = settings.get("frontmatter") if isinstance(settings, dict) else None
frontmatter_fields = (
parse_picoschema(frontmatter_dict) if isinstance(frontmatter_dict, dict) else []
)

return SchemaDefinition(
entity=entity,
version=version,
fields=fields,
validation_mode=validation_mode,
frontmatter_fields=frontmatter_fields,
)
84 changes: 84 additions & 0 deletions src/basic_memory/schema/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def validate_note(
schema: SchemaDefinition,
observations: list[ObservationData],
relations: list[RelationData],
frontmatter: dict | None = None,
) -> ValidationResult:
"""Validate a note against a schema definition.

Expand All @@ -64,6 +65,7 @@ def validate_note(
schema: The resolved SchemaDefinition to validate against.
observations: List of ObservationData from the note's observations.
relations: List of RelationData from the note's relations.
frontmatter: The note's frontmatter dict for settings.frontmatter validation.

Returns:
A ValidationResult with per-field results, unmatched items, and warnings/errors.
Expand Down Expand Up @@ -113,6 +115,33 @@ def validate_note(
else:
result.warnings.append(msg)

# --- Validate frontmatter fields ---
# Trigger: schema has frontmatter_fields and caller provided frontmatter dict
# Why: settings.frontmatter rules validate metadata keys like tags, status
# Outcome: frontmatter fields produce the same FieldResult/warning/error as content fields
if frontmatter is not None and schema.frontmatter_fields:
for fm_field in schema.frontmatter_fields:
field_result = _validate_frontmatter_field(fm_field, frontmatter)
result.field_results.append(field_result)

if field_result.status == "missing" and fm_field.required:
msg = f"Missing required frontmatter key: {fm_field.name}"
if schema.validation_mode == "strict":
result.errors.append(msg)
result.passed = False
else:
result.warnings.append(msg)

elif field_result.status == "enum_mismatch":
msg = field_result.message or (
f"Frontmatter key '{fm_field.name}' has invalid enum value"
)
if schema.validation_mode == "strict":
result.errors.append(msg)
result.passed = False
else:
result.warnings.append(msg)

# --- Collect unmatched observations ---
for category, values in obs_by_category.items():
if category not in matched_categories:
Expand Down Expand Up @@ -227,6 +256,61 @@ def _validate_enum_field(
)


# --- Frontmatter Field Validation ---


def _validate_frontmatter_field(
schema_field: SchemaField,
frontmatter: dict,
) -> FieldResult:
"""Validate a single frontmatter key against a schema field declaration.

Checks presence and, for enum fields, value membership. Array fields
collect all list items as string values.
"""
value = frontmatter.get(schema_field.name)

if value is None:
return FieldResult(
field=schema_field,
status="missing",
message=f"Missing frontmatter key: {schema_field.name}",
)

# --- Enum validation ---
if schema_field.is_enum:
str_value = str(value)
if str_value not in schema_field.enum_values:
allowed = ", ".join(schema_field.enum_values)
return FieldResult(
field=schema_field,
status="enum_mismatch",
values=[str_value],
message=f"Frontmatter key '{schema_field.name}' has invalid value: "
f"{str_value} (allowed: {allowed})",
)
return FieldResult(
field=schema_field,
status="present",
values=[str_value],
)

# --- Array / list values ---
if isinstance(value, list):
return FieldResult(
field=schema_field,
status="present",
values=[str(v) for v in value],
)

# --- Scalar values ---
return FieldResult(
field=schema_field,
status="present",
values=[str(value)],
)


# --- Helper Functions ---


Expand Down
Loading
Loading