In [1]:
import json
import os
from pathlib import Path
import grpc
import pandas as pd
import networkx as nx
from pyvis.network import Network
from senzing import SzEngine, SzError
from senzing_grpc import SzAbstractFactoryGrpc
from IPython.display import display, HTML

# Get connection info from environment
SENZING_HOST = os.getenv('SENZING_GRPC_HOST', 'senzing')
SENZING_PORT = os.getenv('SENZING_GRPC_PORT', '8261')

print(f"Connecting to Senzing at {SENZING_HOST}:{SENZING_PORT}")

# Create gRPC channel and engine
grpc_url = f"{SENZING_HOST}:{SENZING_PORT}"
grpc_channel = grpc.insecure_channel(grpc_url)
sz_abstract_factory = SzAbstractFactoryGrpc(grpc_channel)
sz_engine = sz_abstract_factory.create_engine()

print("Connected to Senzing successfully")

Connecting to Senzing at senzing:8261
Connected to Senzing successfully


In [2]:
# Export entities to work with - include record JSON data
print("Exporting entity data from Senzing with full details...")
print("This may take a minute depending on dataset size...")

from senzing import SzEngineFlags

entities = []
export_handle = sz_engine.export_json_entity_report(
    flags=SzEngineFlags.SZ_EXPORT_INCLUDE_ALL_ENTITIES | 
          SzEngineFlags.SZ_ENTITY_INCLUDE_RECORD_JSON_DATA
)
count = 0

while True:
    try:
        entity_json = sz_engine.fetch_next(export_handle)
        if not entity_json:
            break
        
        entity = json.loads(entity_json)
        entities.append(entity)
        count += 1
        
        if count % 100 == 0:
            print(f"  Exported {count} entities...", end='\r')
    except StopIteration:
        break

sz_engine.close_export_report(export_handle)
print(f"\nExported {len(entities)} entities total")

Exporting entity data from Senzing with full details...
This may take a minute depending on dataset size...
  Exported 100 entities...
Exported 196 entities total


In [3]:
# Get dataset statistics from Senzing
print("Dataset Overview:")
print("="*60)

stats = sz_engine.get_stats()
stats_dict = json.loads(stats)

if 'workload' in stats_dict:
    total_records = stats_dict['workload'].get('loadedRecords', 0)
    print(f"Total records in database: {total_records:,}")

# This should come after your export cell
num_entities = len(entities)
print(f"Total unique entities:     {num_entities:,}")
print(f"Records merged:            {total_records - num_entities:,}")
print(f"Reduction:                 {((total_records - num_entities) / total_records * 100):.1f}%")
print("="*60)

Dataset Overview:
Total records in database: -1
Total unique entities:     196
Records merged:            -197
Reduction:                 19700.0%


In [4]:
# Let's look at what we have in the entities
print("Sample Entity Structure:")
print("="*60)

if entities:
    sample = entities[0]
    
    print(f"Entity ID: {sample.get('RESOLVED_ENTITY', {}).get('ENTITY_ID')}")
    print(f"Entity Name: {sample.get('RESOLVED_ENTITY', {}).get('ENTITY_NAME')}")
    
    # Check for records
    records = sample.get('RESOLVED_ENTITY', {}).get('RECORDS', [])
    print(f"Number of records: {len(records)}")
    
    # Check for relationships
    relationships = sample.get('RESOLVED_ENTITY', {}).get('RELATIONSHIPS', [])
    print(f"Number of relationships: {len(relationships)}")
    
    print("\nFirst few records:")
    for rec in records[:3]:
        print(f"  {rec.get('DATA_SOURCE')}: {rec.get('RECORD_ID')}")
    
    if relationships:
        print("\nSample relationship:")
        print(json.dumps(relationships[0], indent=2))
else:
    print("No entities found")

Sample Entity Structure:
Entity ID: 1
Entity Name: None
Number of records: 3
Number of relationships: 0

First few records:
  OPEN-SANCTIONS: NK-25vyVFzt8vdJGgAXMRTwTJ
  OPEN-OWNERSHIP: 17207853441353212969
  OPEN-OWNERSHIP: 6747548100436839873


In [5]:
# Let's check what person names actually look like
print("Checking person name structure...")
print("="*60)

person_found = False
for entity in entities[:20]:
    entity_data = entity.get('RESOLVED_ENTITY', {})
    records = entity_data.get('RECORDS', [])
    
    for rec in records:
        json_data = rec.get('JSON_DATA', {})
        if json_data.get('RECORD_TYPE') == 'PERSON':
            print(f"\nEntity ID: {entity_data.get('ENTITY_ID')}")
            print(f"ENTITY_NAME: {entity_data.get('ENTITY_NAME')}")
            print(f"\nNAMES structure:")
            names = json_data.get('NAMES', [])
            for name in names:
                print(f"  Keys: {list(name.keys())}")
                print(f"  Content: {name}")
            person_found = True
            break
    
    if person_found:
        break

if not person_found:
    print("No person records found in first 20 entities")

Checking person name structure...

Entity ID: 1
ENTITY_NAME: None

NAMES structure:
  Keys: ['NAME_TYPE', 'NAME_FULL']
  Content: {'NAME_TYPE': 'PRIMARY', 'NAME_FULL': 'Abassin BADSHAH'}


In [6]:
# Find entities that don't have names
print("Looking for entities without names...")
print("="*60)

no_name_count = 0
for entity in entities[:50]:
    entity_data = entity.get('RESOLVED_ENTITY', {})
    entity_id = entity_data.get('ENTITY_ID')
    records = entity_data.get('RECORDS', [])
    
    has_name = False
    if records:
        first_record = records[0]
        json_data = first_record.get('JSON_DATA', {})
        name_list = json_data.get('NAMES', [])
        
        for name_obj in name_list:
            if name_obj.get('NAME_FULL') or name_obj.get('PRIMARY_NAME_ORG') or name_obj.get('NAME_ORG'):
                has_name = True
                break
    
    if not has_name:
        no_name_count += 1
        if no_name_count <= 3:  # Show first 3 examples
            print(f"\nEntity {entity_id} has no name:")
            print(f"  Number of records: {len(records)}")
            if records:
                rec = records[0]
                print(f"  Data source: {rec.get('DATA_SOURCE')}")
                print(f"  Record ID: {rec.get('RECORD_ID')}")
                json_data = rec.get('JSON_DATA', {})
                print(f"  RECORD_TYPE: {json_data.get('RECORD_TYPE')}")
                print(f"  JSON keys: {list(json_data.keys())}")
                
                # Show NAMES structure
                names = json_data.get('NAMES', [])
                print(f"  NAMES field: {names}")

print(f"\nTotal entities without names in first 50: {no_name_count}")

Looking for entities without names...

Entity 27 has no name:
  Number of records: 2
  Data source: OPEN-OWNERSHIP
  Record ID: 10264459789712927869
  RECORD_TYPE: PERSON
  JSON keys: ['DATA_SOURCE', 'RECORD_ID', 'statementDate', 'RECORD_TYPE', 'NAMES', 'PRIMARY_NAME_FULL', 'personType', 'ATTRIBUTES', 'ADDRESSES', 'IDENTIFIERS', 'LINKS', 'RELATIONSHIPS', 'replaces_statements']
  NAMES field: []

Entity 28 has no name:
  Number of records: 3
  Data source: OPEN-OWNERSHIP
  Record ID: 10369029484097831758
  RECORD_TYPE: PERSON
  JSON keys: ['DATA_SOURCE', 'RECORD_ID', 'statementDate', 'RECORD_TYPE', 'NAMES', 'PRIMARY_NAME_FULL', 'personType', 'ATTRIBUTES', 'DATE_OF_BIRTH', 'ADDRESSES', 'LINKS', 'RELATIONSHIPS']
  NAMES field: []

Entity 29 has no name:
  Number of records: 2
  Data source: OPEN-OWNERSHIP
  Record ID: 10390699576067371333
  RECORD_TYPE: PERSON
  JSON keys: ['DATA_SOURCE', 'RECORD_ID', 'statementDate', 'RECORD_TYPE', 'NAMES', 'PRIMARY_NAME_FULL', 'personType', 'ATTRIBUTES'

In [7]:
# Build NetworkX graph from entity relationships
print("Building graph from entity relationships...")

G = nx.Graph()

# Add nodes for each entity
for entity in entities:
    entity_data = entity.get('RESOLVED_ENTITY', {})
    entity_id = entity_data.get('ENTITY_ID')
    entity_name = entity_data.get('ENTITY_NAME', f"Entity {entity_id}")
    
    # Get entity type and more details from records
    records = entity_data.get('RECORDS', [])
    record_type = 'UNKNOWN'
    data_sources = []
    addresses = []
    identifiers = []
    names = []
    
    if records:
        # Get type and names from first record
        first_record = records[0]
        json_data = first_record.get('JSON_DATA', {})
        record_type = json_data.get('RECORD_TYPE', 'UNKNOWN')
        
        # Check for PRIMARY_NAME_FULL at top level first
        if json_data.get('PRIMARY_NAME_FULL'):
            names.append(json_data.get('PRIMARY_NAME_FULL'))
        
        # Extract names from NAMES array
        name_list = json_data.get('NAMES', [])
        for name_obj in name_list:
            # Try NAME_FULL first (works for both persons and orgs)
            full_name = name_obj.get('NAME_FULL')
            if full_name:
                names.append(full_name)
            # Try organization name
            elif name_obj.get('PRIMARY_NAME_ORG'):
                names.append(name_obj.get('PRIMARY_NAME_ORG'))
            elif name_obj.get('NAME_ORG'):
                names.append(name_obj.get('NAME_ORG'))
        
        # Collect info from all records
        for rec in records:
            data_sources.append(rec.get('DATA_SOURCE'))
            rec_json = rec.get('JSON_DATA', {})
            
            # Get addresses
            addrs = rec_json.get('ADDRESSES', [])
            for addr in addrs[:1]:  # Just first address
                addr_full = addr.get('ADDR_FULL', '')
                if addr_full:
                    addresses.append(addr_full)
            
            # Get identifiers
            ids = rec_json.get('IDENTIFIERS', [])
            for id_obj in ids[:2]:  # First 2 identifiers
                id_type = id_obj.get('NATIONAL_ID_TYPE') or id_obj.get('OTHER_ID_TYPE')
                id_num = id_obj.get('NATIONAL_ID_NUMBER') or id_obj.get('OTHER_ID_NUMBER')
                if id_type and id_num:
                    identifiers.append(f"{id_type}: {id_num}")
    
    data_sources = list(set(data_sources))
    
    # Use the extracted name or fall back to entity name
    primary_name = names[0] if names else entity_name or f"Entity {entity_id}"
    
    # Create readable tooltip with newlines
    tooltip_parts = [
        primary_name,
        f"Type: {record_type}",
        f"Entity ID: {entity_id}",
        f"Records merged: {len(records)}",
        f"Data sources: {', '.join(data_sources)}"
    ]
    
    # Add alternate names if any
    if len(names) > 1:
        tooltip_parts.append(f"Also known as: {', '.join(names[1:3])}")
    
    if addresses:
        tooltip_parts.append(f"Address: {addresses[0][:60]}")
    
    if identifiers:
        tooltip_parts.append("Identifiers:")
        for ident in identifiers[:3]:
            tooltip_parts.append(f"  {ident}")
    
    tooltip = "\n".join(tooltip_parts)
    
    # Use primary name for display label
    display_label = primary_name
    if len(display_label) > 35:
        display_label = display_label[:32] + "..."
    
    # Add node with attributes
    G.add_node(
        entity_id,
        label=display_label,
        title=tooltip,
        type=record_type,
        data_sources=data_sources,
        num_records=len(records),
        full_name=primary_name
    )

print(f"Added {G.number_of_nodes()} nodes")

# Add edges from relationships in the original records
edges_added = 0

for entity in entities:
    entity_data = entity.get('RESOLVED_ENTITY', {})
    anchor_entity_id = entity_data.get('ENTITY_ID')
    
    # Look through records for relationship data
    for record in entity_data.get('RECORDS', []):
        relationships = record.get('JSON_DATA', {}).get('RELATIONSHIPS', [])
        
        for rel in relationships:
            pointer_key = rel.get('REL_POINTER_KEY')
            pointer_role = rel.get('REL_POINTER_ROLE', 'related')
            
            # Find which entity this pointer_key belongs to
            for target_entity in entities:
                target_data = target_entity.get('RESOLVED_ENTITY', {})
                target_entity_id = target_data.get('ENTITY_ID')
                
                for target_record in target_data.get('RECORDS', []):
                    if target_record.get('RECORD_ID') == pointer_key:
                        if anchor_entity_id != target_entity_id:
                            G.add_edge(
                                anchor_entity_id,
                                target_entity_id,
                                relationship=pointer_role
                            )
                            edges_added += 1
                        break

print(f"Added {edges_added} edges")
print(f"\nGraph statistics:")
print(f"  Nodes: {G.number_of_nodes()}")
print(f"  Edges: {G.number_of_edges()}")
print(f"  Connected components: {nx.number_connected_components(G)}")

Building graph from entity relationships...
Added 196 nodes
Added 396 edges

Graph statistics:
  Nodes: 196
  Edges: 233
  Connected components: 59


In [8]:
# Create interactive visualization with PyVis - LARGE VERSION
print("Creating interactive visualization...")

net = Network(
    height="1200px",  # Taller
    width="100%",
    bgcolor="#ffffff",
    font_color="#000000",
    notebook=True
)

# Set physics for better layout
net.barnes_hut(
    gravity=-5000,
    central_gravity=0.3,
    spring_length=100,
    spring_strength=0.001,
    damping=0.09
)

# Color nodes by type
color_map = {
    'PERSON': '#ff7f0e',
    'ORGANIZATION': '#1f77b4',
    'UNKNOWN': '#7f7f7f'
}

# Add nodes with styling
for node_id, node_data in G.nodes(data=True):
    node_type = node_data.get('type', 'UNKNOWN')
    num_records = node_data.get('num_records', 1)
    size = 10 + (num_records * 3)
    
    net.add_node(
        node_id,
        label=node_data.get('label', str(node_id)),
        title=node_data.get('title', ''),
        color=color_map.get(node_type, '#7f7f7f'),
        size=size
    )

# Add edges with labels
for source, target, edge_data in G.edges(data=True):
    relationship = edge_data.get('relationship', 'related')
    label = relationship.split()[0] if relationship else 'related'
    
    net.add_edge(
        source,
        target,
        label=label,
        title=relationship,
        color='#888888',
        font={'size': 10, 'color': '#333333'}
    )

# Save the graph
output_file = 'entity_graph.html'
net.save_graph(output_file)

print(f"\nVisualization saved successfully")
print("\nLegend:")
print("  Orange nodes = Persons")
print("  Blue nodes = Organizations")
print("  Node size = Number of records merged")
print("  Edge labels = Relationship type")
print(f"\nGraph contains {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")

# Display with larger IFrame
from IPython.display import IFrame
display(IFrame(src=output_file, width='100%', height=1200))

Creating interactive visualization...

Visualization saved successfully

Legend:
  Orange nodes = Persons
  Blue nodes = Organizations
  Node size = Number of records merged
  Edge labels = Relationship type

Graph contains 196 nodes and 233 edges


In [10]:
# Build TRUE combined graph: entities + records + relationships
print("Building true combined graph...")

G_true_combined = nx.Graph()

# Dictionary to track entity info for later
entity_info = {}

# First: Add all entities and their records (like resolution graph)
entities_added = 0
records_added = 0

for entity in entities:
    entity_data = entity.get('RESOLVED_ENTITY', {})
    entity_id = entity_data.get('ENTITY_ID')
    records = entity_data.get('RECORDS', [])
    
    if not records:
        continue
    
    # Get entity info
    first_record = records[0]
    json_data = first_record.get('JSON_DATA', {})
    record_type = json_data.get('RECORD_TYPE', 'UNKNOWN')
    
    # Get name
    name = json_data.get('PRIMARY_NAME_FULL')
    if not name:
        name_list = json_data.get('NAMES', [])
        for name_obj in name_list:
            name = name_obj.get('NAME_FULL') or name_obj.get('PRIMARY_NAME_ORG') or name_obj.get('NAME_ORG')
            if name:
                break
    
    if not name:
        name = f"Entity {entity_id}"
    
    # Get data sources
    data_sources = list(set([r.get('DATA_SOURCE') for r in records]))
    is_cross_source = len(data_sources) > 1
    
    # Store entity info
    entity_info[entity_id] = {
        'name': name,
        'type': record_type,
        'num_records': len(records),
        'is_cross_source': is_cross_source,
        'data_sources': data_sources
    }
    
    # Create entity tooltip
    tooltip_parts = [
        name,
        f"Type: {record_type}",
        f"Entity ID: {entity_id}",
        f"Records merged: {len(records)}",
        f"Data sources: {', '.join(data_sources)}"
    ]
    if is_cross_source:
        tooltip_parts.append("⚠️ CROSS-SOURCE RESOLUTION")
    
    tooltip = "\n".join(tooltip_parts)
    
    # Label
    display_label = name[:30] + "..." if len(name) > 30 else name
    
    # Add entity node
    entity_node_id = f"entity_{entity_id}"
    G_true_combined.add_node(
        entity_node_id,
        label=display_label,
        title=tooltip,
        node_type='entity',
        type=record_type,
        num_records=len(records),
        is_cross_source=is_cross_source,
        entity_id=entity_id
    )
    entities_added += 1
    
    # Add record nodes and connect to entity
    for rec in records:
        rec_id = rec.get('RECORD_ID')
        rec_source = rec.get('DATA_SOURCE')
        rec_json = rec.get('JSON_DATA', {})
        
        # Get record-specific name
        rec_name = rec_json.get('PRIMARY_NAME_FULL')
        if not rec_name:
            rec_names = rec_json.get('NAMES', [])
            for name_obj in rec_names:
                rec_name = name_obj.get('NAME_FULL') or name_obj.get('PRIMARY_NAME_ORG') or name_obj.get('NAME_ORG')
                if rec_name:
                    break
        
        if not rec_name:
            rec_name = name
        
        rec_type = rec_json.get('RECORD_TYPE', 'UNKNOWN')
        
        # Create record tooltip
        rec_tooltip_parts = [
            f"Record: {rec_name}",
            f"Source: {rec_source}",
            f"Type: {rec_type}",
            f"Record ID: {rec_id}",
            f"Resolves to: {name}"
        ]
        rec_tooltip = "\n".join(rec_tooltip_parts)
        
        # Label
        rec_label = rec_name[:20] + "..." if len(rec_name) > 20 else rec_name
        
        # Add record node
        record_node_id = f"record_{rec_source}_{rec_id}"
        G_true_combined.add_node(
            record_node_id,
            label=rec_label,
            title=rec_tooltip,
            node_type='record',
            data_source=rec_source,
            type=rec_type
        )
        records_added += 1
        
        # Connect record to entity (resolution edge)
        G_true_combined.add_edge(
            record_node_id,
            entity_node_id,
            edge_type='resolution'
        )

print(f"Added {entities_added} entity nodes")
print(f"Added {records_added} record nodes")
print(f"Added {G_true_combined.number_of_edges()} resolution edges")

# Second: Add relationship edges between entities
relationship_edges = 0

for entity in entities:
    entity_data = entity.get('RESOLVED_ENTITY', {})
    anchor_entity_id = entity_data.get('ENTITY_ID')
    anchor_node_id = f"entity_{anchor_entity_id}"
    
    # Look through records for relationship data
    for record in entity_data.get('RECORDS', []):
        relationships = record.get('JSON_DATA', {}).get('RELATIONSHIPS', [])
        
        for rel in relationships:
            pointer_key = rel.get('REL_POINTER_KEY')
            pointer_role = rel.get('REL_POINTER_ROLE', 'related')
            
            # Find which entity this pointer_key belongs to
            for target_entity in entities:
                target_data = target_entity.get('RESOLVED_ENTITY', {})
                target_entity_id = target_data.get('ENTITY_ID')
                target_node_id = f"entity_{target_entity_id}"
                
                for target_record in target_data.get('RECORDS', []):
                    if target_record.get('RECORD_ID') == pointer_key:
                        if anchor_entity_id != target_entity_id:
                            # Add relationship edge between entities
                            G_true_combined.add_edge(
                                anchor_node_id,
                                target_node_id,
                                edge_type='relationship',
                                relationship=pointer_role
                            )
                            relationship_edges += 1
                        break

print(f"Added {relationship_edges} relationship edges")
print(f"\nTrue Combined Graph Statistics:")
print(f"  Total nodes: {G_true_combined.number_of_nodes()}")
print(f"  Total edges: {G_true_combined.number_of_edges()}")
print(f"  Entity nodes: {entities_added}")
print(f"  Record nodes: {records_added}")
print(f"  Resolution edges: {G_true_combined.number_of_edges() - relationship_edges}")
print(f"  Relationship edges: {relationship_edges}")

Building true combined graph...
Added 196 entity nodes
Added 282 record nodes
Added 282 resolution edges
Added 396 relationship edges

True Combined Graph Statistics:
  Total nodes: 478
  Total edges: 515
  Entity nodes: 196
  Record nodes: 282
  Resolution edges: 119
  Relationship edges: 396


In [11]:
# Visualize with fast-converging physics
print("Creating visualization with fast physics...")

net_true = Network(
    height="1200px",
    width="100%",
    bgcolor="#ffffff",
    font_color="#000000",
    notebook=True
)

# Fast physics that settles quickly
net_true.barnes_hut(
    gravity=-8000,
    central_gravity=0.3,
    spring_length=150,
    spring_strength=0.005,  # Stronger springs = faster settling
    damping=0.5  # High damping = stops moving faster
)

# Colors
entity_color_map = {
    'PERSON': '#ff7f0e',
    'ORGANIZATION': '#1f77b4',
    'UNKNOWN': '#7f7f7f'
}

record_color_map = {
    'OPEN-OWNERSHIP': '#3498db',
    'OPEN-SANCTIONS': '#e74c3c'
}

# Add nodes
for node_id, node_data in G_true_combined.nodes(data=True):
    if node_data.get('node_type') == 'entity':
        # Entity nodes
        node_type = node_data.get('type', 'UNKNOWN')
        num_records = node_data.get('num_records', 1)
        is_cross_source = node_data.get('is_cross_source', False)
        
        size = 30 + (num_records * 5)
        
        if is_cross_source:
            shape = 'star'
        elif num_records > 1:
            shape = 'diamond'
        else:
            shape = 'triangle'
        
        color = entity_color_map.get(node_type, '#7f7f7f')
        if is_cross_source:
            border_color = '#e74c3c'
            border_width = 5
        elif num_records > 1:
            border_color = '#2ecc71'
            border_width = 3
        else:
            border_color = color
            border_width = 1
        
        net_true.add_node(
            node_id,
            label=node_data.get('label', ''),
            title=node_data.get('title', ''),
            color={'background': color, 'border': border_color},
            size=size,
            shape=shape,
            borderWidth=border_width
        )
    else:
        # Record nodes
        data_source = node_data.get('data_source', 'UNKNOWN')
        color = record_color_map.get(data_source, '#95a5a6')
        
        net_true.add_node(
            node_id,
            label=node_data.get('label', ''),
            title=node_data.get('title', ''),
            color=color,
            size=15,
            shape='dot'
        )

# Add edges
for source, target, edge_data in G_true_combined.edges(data=True):
    edge_type = edge_data.get('edge_type', 'resolution')
    
    if edge_type == 'relationship':
        relationship = edge_data.get('relationship', 'related')
        label = relationship.split()[0] if relationship else 'related'
        
        net_true.add_edge(
            source,
            target,
            label=label,
            title=relationship,
            color='#e74c3c',
            width=3,
            font={'size': 10, 'color': '#e74c3c'},
            smooth=False
        )
    else:
        net_true.add_edge(
            source,
            target,
            title='resolved to',
            color='#cccccc',
            width=1,
            dashes=True,
            smooth=False
        )

# Save and modify to auto-disable physics
output_file = 'true_combined_graph.html'
net_true.save_graph(output_file)

# Add code to disable physics after 10 seconds
with open(output_file, 'r') as f:
    html_content = f.read()

physics_auto_off = """
<script type="text/javascript">
  // Disable physics after 10 seconds
  setTimeout(function() {
    network.setOptions({ physics: false });
    console.log("Physics auto-disabled after 10 seconds");
  }, 10000);
</script>
"""

html_content = html_content.replace('</body>', physics_auto_off + '</body>')

with open(output_file, 'w') as f:
    f.write(html_content)

print(f"\nVisualization saved successfully")
print("\n" + "="*70)
print("HOW TO READ THIS GRAPH")
print("="*70)
print("\nLARGE SHAPES = RESOLVED ENTITIES (after entity resolution)")
print("  Triangles:")
print("    - Single record, no merging happened")
print("    - Orange = Person, Blue = Organization")
print("  ")
print("  Diamonds (GREEN BORDER):")
print("    - Multiple records from SAME data source merged together")
print("    - Example: 3 OPEN-OWNERSHIP records identified as same company")
print("    - Orange = Person, Blue = Organization")
print("  ")
print("  Stars (RED BORDER):")
print("    - Records from DIFFERENT data sources merged together")
print("    - Example: Person in OPEN-SANCTIONS matched to director in OPEN-OWNERSHIP")
print("    - Orange = Person, Blue = Organization")
print("    - These are the most interesting - cross-dataset connections!")
print("\nSMALL DOTS = ORIGINAL RECORDS (before entity resolution)")
print("  Blue dots = Records from OPEN-OWNERSHIP dataset")
print("  Red dots = Records from OPEN-SANCTIONS dataset")
print("\nLINES = CONNECTIONS")
print("  Gray dashed lines:")
print("    - Connect original records to their resolved entity")
print("    - Show which records Senzing merged together")
print("  ")
print("  Red solid lines (with labels):")
print("    - Show business relationships between entities")
print("    - Labels show relationship type: shareholding, Directorship, voting_rights, etc.")
print("\nINTERACTION:")
print("  - Physics will settle in about 10 seconds")
print("  - Then auto-disables so nodes stay where you drag them")
print("  - Scroll to zoom, drag background to pan")
print("\nEXAMPLE: A star with red border surrounded by blue and red dots")
print("  = Person/org found in BOTH datasets, with multiple records merged")
print("  = The dots show the original records that were combined")
print("  = Red lines show their business connections to other entities")
print("="*70)

# Display
from IPython.display import IFrame
display(IFrame(src=output_file, width='100%', height=1200))

Creating visualization with fast physics...

Visualization saved successfully

HOW TO READ THIS GRAPH

LARGE SHAPES = RESOLVED ENTITIES (after entity resolution)
  Triangles:
    - Single record, no merging happened
    - Orange = Person, Blue = Organization
  
  Diamonds (GREEN BORDER):
    - Multiple records from SAME data source merged together
    - Example: 3 OPEN-OWNERSHIP records identified as same company
    - Orange = Person, Blue = Organization
  
  Stars (RED BORDER):
    - Records from DIFFERENT data sources merged together
    - Example: Person in OPEN-SANCTIONS matched to director in OPEN-OWNERSHIP
    - Orange = Person, Blue = Organization
    - These are the most interesting - cross-dataset connections!

SMALL DOTS = ORIGINAL RECORDS (before entity resolution)
  Blue dots = Records from OPEN-OWNERSHIP dataset
  Red dots = Records from OPEN-SANCTIONS dataset

LINES = CONNECTIONS
  Gray dashed lines:
    - Connect original records to their resolved entity
    - Show whic