In [1]:
from spindle import SpindleExtractor, create_ontology, GraphStore

In [2]:
# Use environment variable or default
store = GraphStore()

In [3]:
store.create_graph("test1")

In [4]:
# Create extractor and extract triples
entity_types = [
    {"name": "Person", "description": "A human being"},
    {"name": "Organization", "description": "A company"}
]
relation_types = [
    {
        "name": "works_at",
        "description": "Employment relationship",
        "domain": "Person",
        "range": "Organization"
    }
]

ontology = create_ontology(entity_types, relation_types)
extractor = SpindleExtractor(ontology)

text = "Alice Johnson works at TechCorp in San Francisco."
result = extractor.extract(text, source_name="Company Directory")

2025-10-30T15:47:55.271 [BAML [92mINFO[0m] [35mFunction ExtractTriples[0m:
    [33mClient: CustomSonnet4 (claude-sonnet-4-20250514) - 6922ms. StopReason: end_turn. Tokens(in/out): 737/292[0m
    [34m---PROMPT---[0m
    [2m[43muser: [0m[2mYou are a knowledge graph extraction expert. Your task is to extract structured triples (subject-predicate-object) from the provided text, along with supporting evidence.ONTOLOGY:
    You must extract triples that conform to the following ontology:
    
    Valid Entity Types:
    - Person: A human being
    - Organization: A company
    
    Valid Relation Types:
    - works_at: Employment relationship
      (Domain: Person, Range: Organization)
    
    SOURCE METADATA:
    Source Name: Company Directory
    
    EXISTING TRIPLES:
    This is the first extraction, so there are no existing triples to consider.
    
    TEXT TO ANALYZE:
    Alice Johnson works at TechCorp in San Francisco.
    
    INSTRUCTIONS:
    1. Extract all meaningfu

In [6]:
print(result)

triples=[Triple(subject='Alice Johnson', predicate='works_at', object='TechCorp', source=SourceMetadata(source_name='Company Directory', source_url=None), supporting_spans=[CharacterSpan(text='Alice Johnson works at TechCorp', start=0, end=31)], extraction_datetime='2025-10-30T20:47:55Z')] reasoning="I extracted one triple from the text that conforms to the ontology. The subject 'Alice Johnson' is a Person entity type, the predicate 'works_at' is a valid relation type for employment relationships, and the object 'TechCorp' is an Organization entity type. This matches the domain and range requirements (Person works_at Organization). I used the full name 'Alice Johnson' for consistency and clarity rather than just 'Alice'. The supporting span 'Alice Johnson works at TechCorp' provides direct evidence for the employment relationship. The additional information about San Francisco describes a location but doesn't fit the available ontology, so it was not extracted as a separate triple."


In [8]:
result.triples

[Triple(subject='Alice Johnson', predicate='works_at', object='TechCorp', source=SourceMetadata(source_name='Company Directory', source_url=None), supporting_spans=[CharacterSpan(text='Alice Johnson works at TechCorp', start=0, end=31)], extraction_datetime='2025-10-30T20:47:55Z')]

In [13]:
# Add triples
store.add_triples(result.triples)

1

In [14]:
store.query_by_pattern(predicate="works_at")

[{'subject': 'Alice Johnson',
  'predicate': 'works_at',
  'object': 'TechCorp',
  'source': 'Company Directory',
  'extraction_datetime': '2025-10-30T20:47:55Z',
  'supporting_evidence': [{'text': 'Alice Johnson works at TechCorp',
    'start': 0,
    'end': 31}],
  'metadata': {'source_url': ''}},
 {'subject': 'Alice Johnson',
  'predicate': 'works_at',
  'object': 'TechCorp',
  'source': 'Company Directory',
  'extraction_datetime': '2025-10-30T20:47:55Z',
  'supporting_evidence': [{'text': 'Alice Johnson works at TechCorp',
    'start': 0,
    'end': 31}],
  'metadata': {'source_url': ''}},
 {'subject': 'Alice Johnson',
  'predicate': 'works_at',
  'object': 'TechCorp',
  'source': 'Company Directory',
  'extraction_datetime': '2025-10-30T20:47:55Z',
  'supporting_evidence': [{'text': 'Alice Johnson works at TechCorp',
    'start': 0,
    'end': 31}],
  'metadata': {'source_url': ''}}]

In [15]:
stats = store.get_statistics()

In [18]:
stats

{'node_count': 2,
 'edge_count': 3,
 'sources': ['Company Directory'],
 'predicates': ['works_at'],
 'date_range': {'earliest': '2025-10-30T20:47:55Z',
  'latest': '2025-10-30T20:47:55Z'}}

In [19]:
store.get_node("Alice Johnson")

{'name': 'Alice Johnson',
 'type': 'Entity',
 'metadata': {'sources': ['Company Directory'],
  'first_seen': '2025-10-30T20:47:55Z'}}

In [22]:
store.get_edge("Alice Johnson", "works_at", "TechCorp")

[{'subject': 'Alice Johnson',
  'predicate': 'works_at',
  'object': 'TechCorp',
  'source': 'Company Directory',
  'extraction_datetime': '2025-10-30T20:47:55Z',
  'supporting_evidence': [{'text': 'Alice Johnson works at TechCorp',
    'start': 0,
    'end': 31}],
  'metadata': {'source_url': ''}},
 {'subject': 'Alice Johnson',
  'predicate': 'works_at',
  'object': 'TechCorp',
  'source': 'Company Directory',
  'extraction_datetime': '2025-10-30T20:47:55Z',
  'supporting_evidence': [{'text': 'Alice Johnson works at TechCorp',
    'start': 0,
    'end': 31}],
  'metadata': {'source_url': ''}},
 {'subject': 'Alice Johnson',
  'predicate': 'works_at',
  'object': 'TechCorp',
  'source': 'Company Directory',
  'extraction_datetime': '2025-10-30T20:47:55Z',
  'supporting_evidence': [{'text': 'Alice Johnson works at TechCorp',
    'start': 0,
    'end': 31}],
  'metadata': {'source_url': ''}}]

In [16]:
print(f"Stored {stats['edge_count']} relationships")

Stored 3 relationships


In [17]:
# Verify nodes were created
alice_node = store.get_node("Alice Johnson")
techcorp_node = store.get_node("TechCorp")
print(f"Alice node exists: {alice_node is not None}")
print(f"TechCorp node exists: {techcorp_node is not None}")


Alice node exists: True
TechCorp node exists: True


In [None]:
## Issue Resolution

**Problem:** The `store.add_triples()` method was not storing triples correctly.

**Root Cause:** The `pandas` library was not installed. KÃ¹zu's `get_as_df()` method requires pandas (and numpy) to convert query results to DataFrames, but these dependencies were missing from the project.

**Solution:** Added `pandas>=2.0.0` to the project dependencies in `pyproject.toml`.

Now the triples are being stored and queried correctly!
