# Stage 3 Test: Graph Builder

This notebook tests the hierarchy graph builder.

**Purpose**: Verify that graph correctly computes ultimate parents from immediate relationships.


In [None]:
# Setup
import sys
sys.path.append('/Workspace/Repos/phmsa-company-hierarchy/')

import pandas as pd
from phmsa_hierarchy import HierarchyGraphBuilder

graph_builder = HierarchyGraphBuilder()

print("✓ Graph Builder initialized")


In [None]:
# Test 1: Simple Hierarchy
print("=== Test 1: Simple Hierarchy (3 levels) ===\n")

# Create test data: A → B → C (C is ultimate parent)
test_data = pd.DataFrame([
    {"child": "Company A", "parent": "Company B"},
    {"child": "Company B", "parent": "Company C"},
    {"child": "Company C", "parent": "ULTIMATE"}
])

print("Input relationships:")
print(test_data.to_string(index=False))
print()

# Build graph
result = graph_builder.build(test_data)

print("Output hierarchy:")
print(result[['company', 'immediate_parent', 'ultimate_parent', 'hierarchy_path', 'hierarchy_depth']].to_string(index=False))

# Validations
assert result[result['company'] == 'Company A']['ultimate_parent'].values[0] == 'Company C'
assert result[result['company'] == 'Company A']['hierarchy_depth'].values[0] == 2
assert result[result['company'] == 'Company C']['ultimate_parent'].values[0] == 'Company C'

print("\n✓ Test passed - hierarchy correctly resolved")


In [None]:
# Test 2: Multiple Branches
print("=== Test 2: Multiple Subsidiaries ===\n")

# Create test data: Multiple companies with same parent
test_data = pd.DataFrame([
    {"child": "Subsidiary 1", "parent": "Parent Corp"},
    {"child": "Subsidiary 2", "parent": "Parent Corp"},
    {"child": "Subsidiary 3", "parent": "Parent Corp"},
    {"child": "Parent Corp", "parent": "ULTIMATE"}
])

print("Input relationships:")
print(test_data.to_string(index=False))
print()

# Build graph
result = graph_builder.build(test_data)

print("Output hierarchy:")
print(result[['company', 'ultimate_parent', 'hierarchy_depth']].to_string(index=False))

# Get statistics
stats = graph_builder.get_statistics()
print(f"\nStatistics:")
print(f"  Total companies: {stats['total_companies']}")
print(f"  Root companies: {stats['root_companies']}")
print(f"  Leaf companies: {stats['leaf_companies']}")

print("\n✓ Test passed")
