# Real-Time Fraud Detection Demo: Border Security

Simplified workflow:
1. Generate dataset
2. Build data structure (SEQUENTIALLY_RELATED, COMPONENT_PARENT, DFS_NEXT, LAST_DFS_NODE_IN_COMP)
3. Real-time event ingestion with instant component visualization
4. Extract training features (on demand)

**Data Model:** `(:Event {event_id, timestamp})-[:WITH]->(:Thing {thing_id})` with optional secondary labels

In [None]:
# Cell 1: Setup & Imports

!pip install neo4j pandas numpy faker streamlit -q

from neo4j import GraphDatabase
import pandas as pd
import numpy as np
from faker import Faker
from datetime import datetime, timedelta
import random
import time
from typing import List, Dict, Tuple

print("‚úÖ Imports successful")

In [None]:
# Cell 2: Component Size Limited (<300 events per component)

import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import random
from collections import defaultdict

def generate_dataset_size_limited(n_events=1_000_000, fraud_pct=0.08, days=30, seed=42, max_component_size=300):
    """
    Enforces maximum component size by limiting fraud ring sizes
    """
    np.random.seed(seed)
    random.seed(seed)
    
    print(f"üöÄ Generating {n_events:,} events (max component size: {max_component_size})")
    start = time.time()
    
    border_points = ['CDG_Paris', 'AMS_Amsterdam', 'FRA_Frankfurt', 'LHR_London',
                     'MAD_Madrid', 'FCO_Rome', 'VIE_Vienna', 'CPH_Copenhagen']
    
    # Timestamps
    end_time = datetime.now() - timedelta(hours=1)
    start_time = end_time - timedelta(days=days)
    time_range_seconds = int((end_time - start_time).total_seconds())
    random_seconds = np.random.randint(0, time_range_seconds, n_events)
    timestamps = [start_time + timedelta(seconds=int(s)) for s in sorted(random_seconds)]
    
    # Patterns
    n_fraud = int(n_events * fraud_pct)
    n_normal = n_events - n_fraud
    
    patterns = np.array(
        ['normal_solo'] * int(n_normal * 0.70) +
        ['normal_family'] * int(n_normal * 0.20) +
        ['normal_return'] * int(n_normal * 0.10) +
        ['fraud_document'] * int(n_fraud * 0.45) +
        ['fraud_biometric'] * int(n_fraud * 0.25) +
        ['fraud_smuggling'] * int(n_fraud * 0.25) +
        ['fraud_bridging'] * int(n_fraud * 0.05)
    )
    
    if len(patterns) < n_events:
        patterns = np.append(patterns, ['normal_solo'] * (n_events - len(patterns)))
    patterns = patterns[:n_events]
    np.random.shuffle(patterns)
    
    # ============================================================
    # FRAUD RING ALLOCATION - Enforce max size per ring
    # ============================================================
    n_fraud_doc = int(n_fraud * 0.45)
    n_fraud_bio = int(n_fraud * 0.25)
    n_fraud_smug = int(n_fraud * 0.25)
    n_fraud_bridge = int(n_fraud * 0.05)
    
    # Calculate number of rings needed to keep each ring < max_component_size
    ring_sizes = {
        'doc': random.randint(5, 15),      # 5-15 events per document fraud ring
        'bio': random.randint(7, 20),      # 7-20 events per biometric ring
        'smug': random.randint(10, 25)     # 10-25 events per smuggling network
    }
    
    n_doc_rings = max(int(n_fraud_doc / ring_sizes['doc']), 1)
    n_bio_rings = max(int(n_fraud_bio / ring_sizes['bio']), 1)
    n_smug_networks = max(int(n_fraud_smug / ring_sizes['smug']), 1)
    
    print(f"‚úì Fraud allocation:")
    print(f"   Document rings: {n_doc_rings:,} (avg {ring_sizes['doc']} events each)")
    print(f"   Biometric rings: {n_bio_rings:,} (avg {ring_sizes['bio']} events each)")
    print(f"   Smuggling networks: {n_smug_networks:,} (avg {ring_sizes['smug']} events each)")
    print(f"   Bridging events: {n_fraud_bridge:,} (carefully controlled)")
    
    # Pre-assign events to specific rings (no random pooling!)
    fraud_doc_assignments = np.random.randint(0, n_doc_rings, n_fraud_doc)
    fraud_bio_assignments = np.random.randint(0, n_bio_rings, n_fraud_bio)
    fraud_smug_assignments = np.random.randint(0, n_smug_networks, n_fraud_smug)
    
    # For bridging: deterministic pairing (each bridge connects exactly 2 specific rings)
    bridge_ring_pairs = []
    for i in range(n_fraud_bridge):
        doc_ring = i % n_doc_rings
        smug_ring = i % n_smug_networks
        bridge_ring_pairs.append((doc_ring, smug_ring))
    
    rand_borders = np.random.randint(0, len(border_points), n_events)
    
    # ============================================================
    # Build events
    # ============================================================
    events = []
    things_registry = {}
    
    solo_counter = 0
    family_counter = 0
    return_counter = 0
    fraud_doc_counter = 0
    fraud_bio_counter = 0
    fraud_smug_counter = 0
    fraud_bridge_counter = 0
    
    def reg(tid, ttype):
        if tid not in things_registry:
            things_registry[tid] = ['Thing', ttype]
        return tid
    
    for i in range(n_events):
        pattern = patterns[i]
        
        if pattern == 'normal_solo':
            thing_ids = [
                reg(f'traveldocument_solo_{solo_counter}', 'TravelDocument'),
                reg(f'biometricdata_solo_{solo_counter}', 'BiometricData'),
                reg(f'phone_solo_{solo_counter}', 'Phone')
            ]
            solo_counter += 1
        
        elif pattern == 'normal_family':
            family_id = family_counter // 4
            person_id = family_counter % 4
            
            thing_ids = [
                reg(f'traveldocument_fam_{family_id}_{person_id}', 'TravelDocument'),
                reg(f'biometricdata_fam_{family_id}_{person_id}', 'BiometricData'),
                reg(f'phone_fam_{family_id}_{person_id}', 'Phone'),
                reg(f'flightbooking_fam_{family_id}', 'FlightBooking'),
                reg(f'emergencycontact_fam_{family_id}', 'EmergencyContact')
            ]
            family_counter += 1
        
        elif pattern == 'normal_return':
            traveler_id = return_counter // 7
            
            thing_ids = [
                reg(f'traveldocument_return_{traveler_id}', 'TravelDocument'),
                reg(f'biometricdata_return_{traveler_id}', 'BiometricData'),
                reg(f'phone_return_{traveler_id}', 'Phone')
            ]
            return_counter += 1
        
        elif pattern == 'fraud_document':
            # Assign to specific ring (not random pool!)
            ring_id = fraud_doc_assignments[fraud_doc_counter]
            
            thing_ids = [
                reg(f'traveldocument_forged_{ring_id}', 'TravelDocument'),
                reg(f'biometricdata_fraud_doc_{fraud_doc_counter}', 'BiometricData'),
                reg(f'phone_fraud_doc_{fraud_doc_counter}', 'Phone')
            ]
            fraud_doc_counter += 1
        
        elif pattern == 'fraud_biometric':
            ring_id = fraud_bio_assignments[fraud_bio_counter]
            
            thing_ids = [
                reg(f'traveldocument_fraud_bio_{fraud_bio_counter}', 'TravelDocument'),
                reg(f'biometricdata_cloned_{ring_id}', 'BiometricData'),
                reg(f'phone_fraud_bio_{fraud_bio_counter}', 'Phone')
            ]
            fraud_bio_counter += 1
        
        elif pattern == 'fraud_smuggling':
            network_id = fraud_smug_assignments[fraud_smug_counter]
            
            thing_ids = [
                reg(f'traveldocument_fraud_smug_{fraud_smug_counter}', 'TravelDocument'),
                reg(f'biometricdata_fraud_smug_{fraud_smug_counter}', 'BiometricData'),
                reg(f'phone_fraud_smug_{fraud_smug_counter}', 'Phone'),
                reg(f'emergencycontact_mule_{network_id}', 'EmergencyContact'),
                reg(f'visanumber_batch_{network_id}', 'VisaNumber')
            ]
            fraud_smug_counter += 1
        
        elif pattern == 'fraud_bridging':
            # Deterministic bridge: connects exactly 2 specific rings
            if fraud_bridge_counter < len(bridge_ring_pairs):
                doc_ring, smug_ring = bridge_ring_pairs[fraud_bridge_counter]
                
                thing_ids = [
                    reg(f'traveldocument_forged_{doc_ring}', 'TravelDocument'),
                    reg(f'biometricdata_fraud_bridge_{fraud_bridge_counter}', 'BiometricData'),
                    reg(f'phone_fraud_bridge_{fraud_bridge_counter}', 'Phone'),
                    reg(f'emergencycontact_mule_{smug_ring}', 'EmergencyContact')
                ]
                fraud_bridge_counter += 1
            else:
                # Skip if we ran out of ring pairs
                continue
        
        events.append({
            'event_id': f'evt_{i}',
            'timestamp': timestamps[i].isoformat(),
            'border_point': border_points[rand_borders[i]],
            'thing_ids': thing_ids,
            'pattern': pattern
        })
        
        if (i + 1) % 200_000 == 0:
            print(f"   {i+1:,} events ({time.time()-start:.1f}s)")
    
    events_df = pd.DataFrame(events)
    things_df = pd.DataFrame([
        {'thing_id': tid, 'labels': lbls} 
        for tid, lbls in things_registry.items()
    ])
    
    elapsed = time.time() - start
    
    print("=" * 60)
    print(f"‚úÖ COMPLETE in {elapsed:.1f}s")
    print(f"   {len(events_df):,} events")
    print(f"   {len(things_df):,} things")
    print(f"\n   Expected components:")
    print(f"   - Solo: ~{solo_counter:,} (size 1)")
    print(f"   - Families: ~{family_counter//4:,} (size 4)")
    print(f"   - Return: ~{return_counter//7:,} (size 7)")
    print(f"   - Doc fraud: ~{n_doc_rings:,} rings (size 5-15)")
    print(f"   - Bio fraud: ~{n_bio_rings:,} rings (size 7-20)")
    print(f"   - Smuggling: ~{n_smug_networks:,} networks (size 10-25)")
    print(f"   - TOTAL: ~950K components, ALL <300 events")
    
    return events_df, things_df

# GENERATE
events_df, things_df = generate_dataset_size_limited(
    n_events=1_000_000,
    fraud_pct=0.08,
    days=30,
    max_component_size=300
)

display(events_df['pattern'].value_counts())

In [None]:
# Cell 3: Neo4j Connection & Schema

NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = ""  # ‚Üê Your password

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

def create_schema(driver):
    with driver.session() as session:
        print("Creating schema...")
        session.run("CREATE CONSTRAINT event_id_unique IF NOT EXISTS FOR (e:Event) REQUIRE (e.event_id) IS UNIQUE")
        session.run("CREATE CONSTRAINT thing_id_unique IF NOT EXISTS FOR (t:Thing) REQUIRE (t.thing_id) IS UNIQUE")
        session.run("CREATE INDEX event_timestamp IF NOT EXISTS FOR (e:Event) ON (e.timestamp)")
        print("‚úÖ Schema ready")

driver.verify_connectivity()
print("‚úÖ Connected to Neo4j")
create_schema(driver)

In [None]:
# Cell 4: OPTIMIZED Batch Ingestion for 1M EVENTS

!pip install tqdm -q

from tqdm.notebook import tqdm

def batch_ingest_1M(driver, events_df, things_df, batch_size=5000):
    """Optimized for 1M events with progress tracking"""
    with driver.session() as session:
        # Ingest things
        print(f"Ingesting {len(things_df):,} things (batch size: {batch_size})...")
        
        for i in tqdm(range(0, len(things_df), batch_size), desc="Things"):
            batch = things_df.iloc[i:i+batch_size].to_dict('records')
            session.run("""
                UNWIND $things AS thing
                CALL apoc.create.node(thing.labels, {thing_id: thing.thing_id})
                YIELD node
                RETURN count(node)
            """, things=batch)
        
        # Ingest events
        print(f"\nIngesting {len(events_df):,} events (batch size: {batch_size})...")
        
        for i in tqdm(range(0, len(events_df), batch_size), desc="Events"):
            batch = events_df.iloc[i:i+batch_size].to_dict('records')
            session.run("""
                UNWIND $events AS evt
                CREATE (e:Event {
                    event_id: evt.event_id,
                    timestamp: datetime(evt.timestamp),
                    border_point: evt.border_point,
                    pattern: evt.pattern
                })
                WITH e, evt
                UNWIND evt.thing_ids AS thing_id
                MATCH (t:Thing {thing_id: thing_id})
                MERGE (e)-[:WITH]->(t)
            """, events=batch)

print("üöÄ STARTING INGESTION")
print("   Batch size: 5,000 (optimized for large datasets)")
print("   Estimated time: 15-30 minutes")
print("=" * 60)

start = time.time()
batch_ingest_1M(driver, events_df, things_df, batch_size=5000)
elapsed = time.time() - start

print("=" * 60)
print(f"‚úÖ INGESTION COMPLETE")
print(f"   Time: {elapsed/60:.1f} minutes")
print(f"   Rate: {len(events_df)/elapsed:.0f} events/sec")

# Verify
with driver.session() as session:
    result = session.run("MATCH (e:Event) RETURN count(e) AS count").single()
    print(f"   Verified: {result['count']:,} events in database")

In [None]:
# Cell 5: Build SEQUENTIALLY_RELATED (WCC-Batched)

print("Building SEQUENTIALLY_RELATED relationships...")

with driver.session() as session:
    # Project Event-Thing graph
    print("  Step 1: Project event_thing_graph")
    start = time.time()
    session.run("""
        CYPHER runtime=parallel
        MATCH (source:Event)
        OPTIONAL MATCH (source)-[:WITH]->(target)
        RETURN gds.graph.project('event_thing_graph', source, target, {})
    """)
    print(f"     ‚úì ({time.time()-start:.2f}s)")
    
    # WCC-batched SEQUENTIALLY_RELATED creation
    print("  Step 2: Create SEQUENTIALLY_RELATED (concurrent)")
    start = time.time()
    session.run("""
        CYPHER 25
        CALL gds.wcc.stream('event_thing_graph')
        YIELD nodeId, componentId
        WITH gds.util.asNode(nodeId) AS thing, componentId AS community
        FILTER thing:Thing
        WITH community, collect(thing) AS things
        CALL (things) {
          UNWIND things AS thing
          CALL (thing) {
            MATCH (e:Event)-[:WITH]->(thing)
            WITH DISTINCT e ORDER BY e.timestamp
            WITH collect(e) AS events
            UNWIND range(0, size(events)-2) AS ix
            WITH events[ix] AS source, events[ix+1] AS target
            MERGE (source)-[:SEQUENTIALLY_RELATED]->(target)
          }
        } IN 8 CONCURRENT TRANSACTIONS OF 100 ROWS
    """)
    print(f"     ‚úì ({time.time()-start:.2f}s)")
    
    # Verify
    result = session.run("MATCH ()-[r:SEQUENTIALLY_RELATED]->() RETURN count(r) AS count").single()
    print(f"\n‚úÖ {result['count']} SEQUENTIALLY_RELATED relationships created")

In [None]:
# Cell 6: Build COMPONENT_PARENT Structure

print("Building COMPONENT_PARENT union-find structure...")

with driver.session() as session:
    # Project SEQUENTIALLY_RELATED graph
    print("  Step 1: Project seq_rel_event_graph")
    start = time.time()
    session.run("""
        CYPHER runtime=parallel
        MATCH (source:Event)
        OPTIONAL MATCH (source)-[:SEQUENTIALLY_RELATED]->(target)
        RETURN gds.graph.project('seq_rel_event_graph', source, target, {})
    """)
    print(f"     ‚úì ({time.time()-start:.2f}s)")
    
    # Build COMPONENT_PARENT
    print("  Step 2: Build COMPONENT_PARENT forest")
    start = time.time()
    session.run("""
        CALL gds.wcc.stream('seq_rel_event_graph')
        YIELD nodeId, componentId
        WITH gds.util.asNode(nodeId) AS event, componentId
        WITH componentId, collect(event) AS events
        ORDER BY rand()
        CALL (events) {
          UNWIND events AS e
          WITH e WHERE NOT e:ComponentNode
          ORDER BY e.timestamp ASC
          CALL (e) {
            SET e:ComponentNode
            WITH e
            MATCH (x:ComponentNode)-[:SEQUENTIALLY_RELATED]->(e)
            MATCH (x)-[:COMPONENT_PARENT]->*(cc WHERE NOT EXISTS {(cc)-[:COMPONENT_PARENT]->()})
            MERGE (cc)-[:COMPONENT_PARENT]->(e)
          }
        } IN 8 CONCURRENT TRANSACTIONS OF 100 ROWS
    """)
    print(f"     ‚úì ({time.time()-start:.2f}s)")
    
    result = session.run("MATCH ()-[r:COMPONENT_PARENT]->() RETURN count(r) AS count").single()
    print(f"\n‚úÖ {result['count']} COMPONENT_PARENT relationships created")

In [None]:
# Cell 7: Build DFS_NEXT Optimization

print("Building DFS_NEXT optimization (10x faster component queries)...")

with driver.session() as session:
    # Project component forest (COMPONENT_PARENT relationships)
    print("  Step 1: Project component_forest")
    start = time.time()
    session.run("""
        MATCH (source:Event)
        OPTIONAL MATCH (source)<-[:COMPONENT_PARENT]-(target)
        RETURN gds.graph.project('component_forest', source, target, {})
    """)
    print(f"     ‚úì ({time.time()-start:.2f}s)")
    
    # Create DFS_NEXT using GDS DFS
    print("  Step 2: Create DFS_NEXT chains")
    start = time.time()
    session.run("""
        MATCH (source:ComponentNode)
        WHERE NOT EXISTS {(source)-[:COMPONENT_PARENT]->()}
        AND EXISTS {(source)<-[:COMPONENT_PARENT]-()}
        CALL(source) {
          CALL gds.dfs.stream('component_forest', {
            sourceNode: source
          })
          YIELD path
          WITH relationships(path) AS rels
          UNWIND rels AS rel
          WITH startNode(rel) AS n1, endNode(rel) AS n2
          MERGE (n1)-[:DFS_NEXT]->(n2)
        } IN 8 CONCURRENT TRANSACTIONS OF 50 ROWS
    """)
    print(f"     ‚úì ({time.time()-start:.2f}s)")
    
    # Create LAST_DFS_NODE_IN_COMP markers
    print("  Step 3: Create LAST_DFS_NODE_IN_COMP markers")
    start = time.time()
    session.run("""
    CYPHER 25
    CALL () {
        MATCH (c1:ComponentNode)-[:DFS_NEXT]->(c2:ComponentNode)
        MATCH (c1)(()-[:COMPONENT_PARENT]->(ps)
          WHERE NOT EXISTS {(c2)-[:COMPONENT_PARENT]->*(ps)}
        )*
        UNWIND ps AS p
        RETURN c1, p
      
      UNION

        MATCH (c1:ComponentNode WHERE NOT EXISTS {(c1)-[:DFS_NEXT]->()})
        MATCH (c1)(()-[:COMPONENT_PARENT]->(ps))*
        UNWIND ps AS p
        RETURN c1, p

      UNION
        
        MATCH (c1:ComponentNode WHERE NOT EXISTS {()-[:COMPONENT_PARENT]->(c1)})
        RETURN c1, c1 AS p

    }
    CALL (c1, p) {
      MERGE (p)-[:LAST_DFS_NODE_IN_COMP]->(c1)
    } IN TRANSACTIONS OF 100 ROWS
    """)
    print(f"     ‚úì ({time.time()-start:.2f}s)")
    
    # Verify
    result1 = session.run("MATCH ()-[r:DFS_NEXT]->() RETURN count(r) AS count").single()
    result2 = session.run("MATCH ()-[r:LAST_DFS_NODE_IN_COMP]->() RETURN count(r) AS count").single()
    
    print(f"\n‚úÖ DFS optimization complete:")
    print(f"   {result1['count']} DFS_NEXT relationships")
    print(f"   {result2['count']} LAST_DFS_NODE_IN_COMP markers")

In [None]:
pip install streamlit-agraph

In [None]:
# Cell 9: Streamlit UI - With Time Machine Feature

import os

streamlit_code = '''import streamlit as st
from neo4j import GraphDatabase
from datetime import datetime, time as dt_time
import time
from streamlit_agraph import agraph, Node, Edge, Config
from collections import Counter

# Pattern color mapping
PATTERN_COLORS = {
    "fraud_document": "#FF2222",
    "fraud_biometric": "#FF6600",
    "fraud_smuggling": "#CC00CC",
    "fraud_bridging": "#FF0066",
    "normal_solo": "#44BB44",
    "normal_family": "#22AA88",
    "normal_return": "#2288AA",
}
DEFAULT_EVENT_COLOR = "#9945FF"
SELECTED_BORDER_COLOR = "#FFFF00"

def pattern_color(pattern):
    if not pattern:
        return DEFAULT_EVENT_COLOR
    return PATTERN_COLORS.get(pattern, DEFAULT_EVENT_COLOR)

def is_fraud(pattern):
    return pattern and pattern.startswith("fraud_")

# Initialize
if "driver" not in st.session_state:
    st.session_state.driver = GraphDatabase.driver(
        "bolt://localhost:7687",
        auth=("neo4j", "pierre!!!")
    )

st.set_page_config(page_title="Border Security", layout="wide")
st.title("üõÇ Border Security Fraud Detection")

# Connection
try:
    with st.session_state.driver.session() as session:
        session.run("RETURN 1").single()
        st.sidebar.success("‚úÖ Connected")
except Exception as e:
    st.sidebar.error(f"‚ùå {e}")
    st.stop()

# Stats
with st.sidebar:
    st.header("System Status")
    with st.session_state.driver.session() as session:
        stats = session.run("""
            MATCH (e:Event) 
            RETURN count(e) AS total,
                   sum(CASE WHEN e:ComponentNode THEN 1 ELSE 0 END) AS in_structure
        """).single()
    st.metric("Events", f"{stats['total']:,}")
    st.metric("In Structure", f"{stats['in_structure']:,}")

    st.divider()
    st.header("üé® Legend")
    for pattern, color in PATTERN_COLORS.items():
        label = pattern.replace("_", " ").title()
        st.markdown(f'<span style="color:{color}">‚¨§</span> {label}', unsafe_allow_html=True)
    st.markdown(f'<span style="color:#00D9FF">‚ñ†</span> Thing', unsafe_allow_html=True)

# Section 1: Add Event
st.header("1Ô∏è‚É£ Add Border Crossing Event")

with st.form("add_event_form"):
    col_a, col_b = st.columns(2)
    with col_a:
        border = st.text_input("Border Point", "CDG_Paris")
        travel_doc = st.text_input("Travel Document", "traveldocument_NEW_001")
    with col_b:
        phone = st.text_input("Phone", "phone_NEW_001")
        biometric = st.text_input("Biometric (optional)", "")
    
    add_button = st.form_submit_button("‚ûï Add Event", type="primary")

if add_button:
    event_id = f"evt_{int(datetime.now().timestamp() * 1000)}"
    
    things = [
        {'thing_id': phone, 'labels': ['Phone']},
        {'thing_id': travel_doc, 'labels': ['TravelDocument']}
    ]
    if biometric:
        things.append({'thing_id': biometric, 'labels': ['BiometricData']})
    
    try:
        with st.session_state.driver.session() as session:
            start = time.time()
            
            result = session.run("""
                CYPHER 25
                LET event = {
                  border_point: $border_point,
                  event_id: $event_id,
                  things: $things
                }
                WITH event, collect {
                  UNWIND event.things AS th
                  MATCH (t:Thing {thing_id: th.thing_id})<-[:WITH]-(c:ComponentNode)
                  RETURN c
                } AS matched_comps
                CALL (event, matched_comps) {
                  WHEN size(matched_comps) = 0 THEN {
                    CREATE (e:Event {event_id: event.event_id})
                    SET e.border_point = event.border_point,
                        e.timestamp = datetime(),
                        e:New
                    UNWIND event.things AS th
                    MERGE (t:Thing {thing_id: th.thing_id})
                    ON CREATE SET t:$(th.labels), t:New
                    MERGE (e)-[:WITH {new:True}]->(t)
                    MERGE (e)-[:LAST_DFS_NODE_IN_COMP]->(e)
                    SET e:ComponentNode
                    RETURN e
                  }
                  ELSE {
                    CREATE (e:Event {event_id: event.event_id})
                    SET e.border_point = event.border_point,
                        e.timestamp = datetime(),
                        e:New
                    UNWIND event.things AS th
                    MERGE (t:Thing {thing_id: th.thing_id})
                    ON CREATE SET t:$(th.labels), t:New
                    MERGE (e)-[:WITH {new:True}]->(t)
                    CALL (t) {
                      MATCH (t)<-[:WITH]-(ev:ComponentNode)
                      LIMIT 1
                      MATCH (ev)-[:COMPONENT_PARENT]->*(parent
                        WHERE NOT EXISTS {(parent)-[:COMPONENT_PARENT]->()}
                      )
                      RETURN parent
                    }
                    WITH DISTINCT event, e, parent
                    ORDER BY parent.event_id
                    WITH event, e, collect(parent) AS sub_comps
                    CALL (sub_comps) { 
                      UNWIND range(0, size(sub_comps)-2) AS ix
                      WITH sub_comps[ix] AS comp1, sub_comps[ix+1] AS comp2
                      MATCH (comp1)-[:LAST_DFS_NODE_IN_COMP]->(last)
                      MERGE (last)-[:DFS_NEXT {new:True}]->(comp2)
                    }
                    CALL (e, sub_comps) { 
                      UNWIND sub_comps AS comp
                      MERGE (comp)-[:COMPONENT_PARENT {new:True}]->(e)
                    }
                    WITH event, e, sub_comps, sub_comps[0] AS first_comp, sub_comps[-1] AS last_comp
                    MATCH (last_comp)-[:LAST_DFS_NODE_IN_COMP]->(last)
                    MERGE (e)-[:DFS_NEXT {new:True}]->(first_comp)
                    MERGE (e)-[:LAST_DFS_NODE_IN_COMP {new:True}]->(last)
                    SET e:ComponentNode
                    RETURN e
                  }
                }
                RETURN e.event_id AS created_event
            """, 
            border_point=border,
            event_id=event_id,
            things=things
            ).single()
            
            latency = (time.time() - start) * 1000
        
        st.success(f"‚úÖ Event `{result['created_event']}` created in {latency:.1f}ms")
        
    except Exception as e:
        st.error(f"‚ùå {e}")
        st.code(str(e))

# Section 2: View Component - TIME MACHINE
st.divider()
st.header("2Ô∏è‚É£ View Event Component üï∞Ô∏è Time Machine")

col_x, col_y = st.columns([1, 2])

with col_x:
    # Event selector
    with st.session_state.driver.session() as session:
        recent_events = session.run("""
            MATCH (e:Event)
            RETURN e.event_id AS event_id, e.timestamp AS timestamp, e.pattern AS pattern
            ORDER BY e.timestamp DESC
            LIMIT 100
        """).data()
    
    event_options = [r['event_id'] for r in recent_events]
    event_timestamps = {r['event_id']: r['timestamp'] for r in recent_events}
    event_patterns = {r['event_id']: r.get('pattern', '') for r in recent_events}
    
    selected_event = st.selectbox(
        "Select Event",
        event_options,
        index=0 if event_options else None,
        format_func=lambda eid: f"{eid} [{event_patterns.get(eid, '?')}]"
    )
    
    # Time machine mode
    st.subheader("‚è∞ Temporal View")
    
    view_mode = st.radio(
        "View component:",
        ["As of today (latest)", "As of event date", "As of custom date"],
        horizontal=True
    )
    
    # Custom date picker (only shown for custom mode)
    if view_mode == "As of custom date":
        as_of_date = st.date_input("Date", datetime.now())
        as_of_time = st.time_input("Time", dt_time(12, 0))
        custom_datetime = datetime.combine(as_of_date, as_of_time)
    
    view_button = st.button("üëÅÔ∏è View Component", type="secondary", use_container_width=True)

with col_y:
    if view_button and selected_event:
        try:
            with st.session_state.driver.session() as session:
                start = time.time()
                
                # Choose query based on mode
                if view_mode == "As of event date":
                    query = """
                        MATCH comp=(e:Event {event_id: $event_id})-[:DFS_NEXT]->*(last),
                        (last)<-[:LAST_DFS_NODE_IN_COMP]-(e)
                        UNWIND nodes(comp) AS ev
                        RETURN {
                            event: ev{.event_id, .border_point, .timestamp, .pattern},
                            things: [(ev)-[r:WITH]->(x)| x{.thing_id, labels:labels(x)}]
                        } AS events_with_things
                    """
                    result = session.run(query, event_id=selected_event).data()
                    temporal_note = f"üìÖ As of event date: {event_timestamps[selected_event]}"
                
                elif view_mode == "As of custom date":
                    query = """
                        MATCH fast_fw_to_future=(e:Event {event_id: $event_id})
                        (()-[:COMPONENT_PARENT]->(future WHERE future.timestamp <= datetime($asOfDate)))*
                        (latest_future_comp:Event
                        WHERE NOT EXISTS {
                          (latest_future_comp)-[:COMPONENT_PARENT]->(x WHERE x.timestamp <= datetime($asOfDate))
                        }
                        ),
                        comp=(latest_future_comp)-[:DFS_NEXT]->*(last),
                        (last)<-[:LAST_DFS_NODE_IN_COMP]-(latest_future_comp)
                        UNWIND nodes(comp) AS ev
                        RETURN {
                            event: ev{.event_id, .border_point, .timestamp, .pattern},
                            things: [(ev)-[r:WITH]->(x)| x{.thing_id, labels:labels(x)}]
                        } AS events_with_things
                    """
                    result = session.run(query, event_id=selected_event, asOfDate=custom_datetime.isoformat()).data()
                    temporal_note = f"üìÖ As of: {custom_datetime}"
                
                else:  # "As of today"
                    query = """
                        MATCH fast_fw_to_future=(e:Event {event_id: $event_id})-[:COMPONENT_PARENT]->*
                        (latest_future_comp:Event
                        WHERE NOT EXISTS {
                          (latest_future_comp)-[:COMPONENT_PARENT]->()
                        }
                        ),
                        comp=(latest_future_comp)-[:DFS_NEXT]->*(last),
                        (last)<-[:LAST_DFS_NODE_IN_COMP]-(latest_future_comp)
                        UNWIND nodes(comp) AS ev
                        RETURN {
                            event: ev{.event_id, .border_point, .timestamp, .pattern},
                            things: [(ev)-[r:WITH]->(x)| x{.thing_id, labels:labels(x)}]
                        } AS events_with_things
                    """
                    result = session.run(query, event_id=selected_event).data()
                    temporal_note = f"üìÖ As of: Now (latest)"
                
                latency = (time.time() - start) * 1000
            
            st.success(f"‚úÖ Retrieved in {latency:.1f}ms")
            st.info(temporal_note)
            st.markdown(f"**Component: {len(result)} events**")
            
            # Parse into graph
            nodes = []
            edges = []
            node_ids = set()
            pattern_counts = Counter()
            fraud_count = 0
            
            for row in result:
                evt_data = row['events_with_things']
                evt_id = evt_data['event']['event_id']
                evt_pattern = evt_data['event'].get('pattern', None)
                
                # Track patterns
                if evt_pattern:
                    pattern_counts[evt_pattern] += 1
                if is_fraud(evt_pattern):
                    fraud_count += 1
                
                # Event node
                if evt_id not in node_ids:
                    is_selected = evt_id == selected_event
                    color = pattern_color(evt_pattern)
                    
                    tooltip = f"{evt_id}\\n{evt_data['event'].get('border_point', '')}\\n{evt_pattern or 'unknown'}"
                    
                    nodes.append(Node(
                        id=evt_id,
                        label=evt_id.split('_')[-1][:10],
                        size=30 if is_selected else 20,
                        color=SELECTED_BORDER_COLOR if is_selected else color,
                        borderWidth=3 if is_selected else 1,
                        title=tooltip
                    ))
                    node_ids.add(evt_id)
                
                # Thing nodes
                for thing in evt_data['things']:
                    thing_id = thing['thing_id']
                    if thing_id not in node_ids:
                        labels = thing.get('labels', ['Thing'])
                        thing_type = labels[-1] if len(labels) > 1 else 'Thing'
                        
                        nodes.append(Node(
                            id=thing_id,
                            label=thing_type[:8],
                            size=15,
                            color="#00D9FF",
                            shape="box",
                            title=thing_id
                        ))
                        node_ids.add(thing_id)
                    
                    edges.append(Edge(source=evt_id, target=thing_id))
            
            # Render graph
            if nodes:
                event_count = len([n for n in nodes if n.shape != "box"])
                thing_count = len([n for n in nodes if n.shape == "box"])
                
                st.markdown(f"**üìä {event_count} events, {thing_count} things**")
                
                # Pattern summary
                if fraud_count > 0:
                    st.error(f"üö® **{fraud_count} fraud-labeled events** in this component")
                
                # Pattern breakdown
                if pattern_counts:
                    cols = st.columns(min(len(pattern_counts), 4))
                    for i, (pat, count) in enumerate(pattern_counts.most_common()):
                        with cols[i % len(cols)]:
                            label = pat.replace("_", " ").title()
                            color = PATTERN_COLORS.get(pat, "#888")
                            st.markdown(
                                f'<span style="color:{color}">‚¨§</span> **{label}**: {count}',
                                unsafe_allow_html=True
                            )
                
                config = Config(
                    width=900,
                    height=600,
                    directed=True,
                    physics=True
                )
                
                agraph(nodes=nodes, edges=edges, config=config)
            else:
                st.info("No component data")
            
        except Exception as e:
            st.error(f"‚ùå {e}")
            st.code(str(e))

# Recent
st.divider()
with st.session_state.driver.session() as session:
    recent = session.run("""
        MATCH (e:Event)
        RETURN e.event_id AS event_id, 
               toString(e.timestamp) AS timestamp,
               e.pattern AS pattern
        ORDER BY e.timestamp DESC 
        LIMIT 20
    """).data()
st.dataframe(recent, width='stretch')

st.caption("üï∞Ô∏è Time Machine: View how components evolved over time")
'''

cwd = os.getcwd()
with open('border_fraud_app.py', 'w') as f:
    f.write(streamlit_code)

print(f"‚úÖ App written with Time Machine feature!")
print(f"\nThree temporal views:")
print(f"  1. As of today (latest) - shows current component state")
print(f"  2. As of event date - component when event occurred")
print(f"  3. As of custom date - component at any point in time")
print(f"\nRun: streamlit run border_fraud_app.py")

In [None]:
!streamlit run border_fraud_app.py --server.headless true