# Stage 2 Test: LLM Validation

This notebook tests the LLM validator with candidate context.

**Purpose**: Verify that LLM correctly validates and selects parents from candidate list.


In [None]:
# Setup
import sys
sys.path.append('/Workspace/Repos/phmsa-company-hierarchy/')

from langchain_community.chat_models import ChatDatabricks
from langchain_community.tools import DuckDuckGoSearchResults
from phmsa_hierarchy import LLMValidator

# Initialize LLM and search
llm = ChatDatabricks(
    endpoint="databricks-claude-sonnet-4-5",
    extra_params={"temperature": 0, "max_tokens": 1000}
)
search_tool = DuckDuckGoSearchResults()

# Initialize validator
llm_validator = LLMValidator(llm, search_tool)

print("✓ LLM Validator initialized")


In [None]:
# Test 1: Validation with Candidates
print("=== Test 1: LLM Validation with Candidates ===\n")

company_name = "ENBRIDGE ENERGY, LIMITED PARTNERSHIP"
candidates = [
    {
        "name": "ENBRIDGE",
        "confidence": 0.90,
        "reason": "Parent name contained in child name",
        "match_type": "name_containment"
    },
    {
        "name": "ENBRIDGE ENERGY PARTNERS LP",
        "confidence": 0.85,
        "reason": "Similar base name",
        "match_type": "base_name_match"
    }
]

print(f"Company: {company_name}")
print(f"Candidates provided: {len(candidates)}\n")

result = llm_validator.validate(
    company_name=company_name,
    candidates=candidates,
    operator_id=11169,
    address="915 N ELDRIDGE PARKWAY, SUITE 1100, HOUSTON, TX"
)

print(f"Selected Parent: {result['parent']}")
print(f"Confidence: {result['confidence']}/10")
print(f"Reasoning: {result['reasoning']}")
print(f"Candidates Considered: {result['candidates_considered']}")

print("\n✓ Validation completed")


In [None]:
# Test 2: No Candidates (Ultimate Parent)
print("=== Test 2: Company with No Candidates ===\n")

company_name = "COLONIAL PIPELINE CO"
candidates = []  # No candidates found

print(f"Company: {company_name}")
print(f"Candidates provided: {len(candidates)}\n")

result = llm_validator.validate(
    company_name=company_name,
    candidates=candidates,
    operator_id=2552
)

print(f"Selected Parent: {result['parent']}")
print(f"Confidence: {result['confidence']}/10")
print(f"Reasoning: {result['reasoning']}")

# Should return ULTIMATE
assert result['parent'] == 'ULTIMATE', "Should identify as ultimate parent"
print("\n✓ Test passed - correctly identified as ultimate parent")
