In [None]:
# Causal Web Theory - Master Analysis Notebook (v3 - Final)
# This notebook is designed to analyze the output of the CWT simulation,
# focusing on the essential analyses required to validate the theory's core hypotheses.
# It is tailored to the specific log file schemas provided.

# --- NEW CELL ---

# ## 1. Setup & Imports
# Import all the necessary libraries for data handling, analysis, and visualization.

import json
from pathlib import Path

import networkx as nx
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

print("Libraries imported successfully.")

# --- NEW CELL ---

# ## 2. Configuration & Data Loading
# Define the path to your log directory and load all log files into pandas DataFrames.

LOG_DIR = Path("output") # Assumes 'output' folder in the same directory as the notebook.

def parse_nested_log(log_path: Path) -> pd.DataFrame:
    """Parses nested JSON logs (like law_wave_log) into a tidy DataFrame."""
    records = []
    with open(log_path, 'r') as f:
        for line in f:
            try:
                data = json.loads(line)
                tick_str, nested_data = list(data.items())[0]
                tick = int(tick_str)
                for key, value in nested_data.items():
                    record = {"tick": tick, "id": key, "value": value}
                    records.append(record)
            except (json.JSONDecodeError, IndexError):
                continue
    return pd.DataFrame(records)

def load_log_file(log_path: Path, is_nested: bool = False) -> pd.DataFrame:
    """Safely loads a JSON log file into a pandas DataFrame."""
    if not log_path.exists():
        print(f"⚠️ Warning: Log file not found at {log_path}. Returning empty DataFrame.")
        return pd.DataFrame()
    try:
        if is_nested:
            df = parse_nested_log(log_path)
        else:
            df = pd.read_json(log_path, lines=True)
        print(f"✅ Successfully loaded {log_path.name} with {len(df)} records.")
        return df
    except Exception as e:
        print(f"❌ Error loading {log_path.name}: {e}")
        return pd.DataFrame()

# Load all log files based on the defined schemas
node_state_df = load_log_file(LOG_DIR / "node_state_log.json")
structural_growth_df = load_log_file(LOG_DIR / "structural_growth_log.json")
emergence_log_df = load_log_file(LOG_DIR / "node_emergence_log.json")
collapse_chain_df = load_log_file(LOG_DIR / "collapse_chain_log.json")
# Special parser for the nested law_wave_log
law_wave_df = load_log_file(LOG_DIR / "law_wave_log.json", is_nested=True)
if not law_wave_df.empty:
    law_wave_df = law_wave_df.rename(columns={'id': 'node_id', 'value': 'frequency'})

# --- NEW CELL ---

# ## 3. Health Dashboard & Overview 📈
# High-level metrics and visualizations for the entire simulation run.

if not structural_growth_df.empty:
    # --- Node Growth Over Time ---
    fig_growth = px.line(
        structural_growth_df,
        x='tick',
        y='node_count',
        title='Node Count Growth Over Time'
    )
    fig_growth.update_layout(title_x=0.5)
    fig_growth.show()

if not emergence_log_df.empty and 'origin_type' in emergence_log_df.columns:
    # --- Propagation Origin Type Ratio ---
    propagation_counts = emergence_log_df['origin_type'].value_counts()
    fig_pie = px.pie(
        values=propagation_counts.values,
        names=propagation_counts.index,
        title='Propagation Origin Type Ratio'
    )
    fig_pie.show()

# --- NEW CELL ---

# ## 4. Essential Analysis 1: Law-Wave Speciation and Dominance 🔬
# **Hypothesis:** The simulation should evolve from chaos to a state dominated by a few stable Law-Waves.

if not law_wave_df.empty:
    active_waves = law_wave_df[law_wave_df['frequency'] > 0.001]
    frequency_diversity = active_waves.groupby('tick')['frequency'].nunique().reset_index()

    fig_lw = px.line(
        frequency_diversity,
        x='tick',
        y='frequency',
        title='Law-Wave Speciation: Number of Unique Frequencies Over Time',
        labels={'tick': 'Tick', 'frequency': 'Count of Unique Frequencies'}
    )
    fig_lw.update_layout(title_x=0.5)
    fig_lw.show()
    print("A downward trend indicates a 'great filter' where few laws become dominant.")
else:
    print("Skipping Law-Wave analysis: law_wave_log.json not found or empty.")


# --- NEW CELL ---

# ## 5. Essential Analysis 2: The Creative Cycle (SIP vs. CSP) 🔄
# **Hypothesis:** The interplay between SIP and CSP creates a balanced, self-regulating ecosystem.

if not structural_growth_df.empty and 'avg_coherence' in structural_growth_df.columns:
    fig_cycle = make_subplots(specs=[[{"secondary_y": True}]])
    fig_cycle.add_trace(go.Scatter(x=structural_growth_df['tick'], y=structural_growth_df['avg_coherence'], name='Average Coherence', line=dict(color='green')), secondary_y=False)
    fig_cycle.add_trace(go.Scatter(x=structural_growth_df['tick'], y=structural_growth_df['sip_success'].cumsum(), name='Cumulative SIP', line=dict(color='royalblue', dash='dash')), secondary_y=True)
    fig_cycle.add_trace(go.Scatter(x=structural_growth_df['tick'], y=structural_growth_df['csp_success'].cumsum(), name='Cumulative CSP', line=dict(color='firebrick', dash='dash')), secondary_y=True)

    fig_cycle.update_layout(title_text='Creative Cycle: Coherence vs. Propagation Types', title_x=0.5)
    fig_cycle.update_xaxes(title_text='Tick')
    fig_cycle.update_yaxes(title_text='Average Coherence', secondary_y=False)
    fig_cycle.update_yaxes(title_text='Cumulative Node Creations', secondary_y=True)
    fig_cycle.show()
else:
    print("Skipping Creative Cycle analysis: structural_growth_log lacks required columns.")

# --- NEW CELL ---

# ## 6. Essential Analysis 3: The Classicalization Front 🌍
# **Hypothesis:** A visible boundary between quantum-like (coherent) and classical (collapsed) states propagates through the web.

if not node_state_df.empty:
    required_cols = ['tick', 'node_id', 'pos_x', 'pos_y', 'state']
    if all(col in node_state_df.columns for col in required_cols):
        fig_front = px.scatter(
            node_state_df.sort_values('tick'),
            x='pos_x',
            y='pos_y',
            animation_frame='tick',
            animation_group='node_id',
            color='state',
            hover_name='node_id',
            size_max=15,
            category_orders={'state': ['coherent', 'strained', 'collapsed']},
            color_discrete_map={'coherent': 'blue', 'strained': 'yellow', 'collapsed': 'red'},
            title='Animated Classicalization Front'
        )
        fig_front.update_layout(title_x=0.5)
        fig_front.show()
    else:
        print(f"Skipping Classicalization Front: node_state_log is missing one of {required_cols}.")
else:
    print("Skipping Classicalization Front: node_state_log not found or empty.")


# --- NEW CELL ---

# ## 7. Essential Analysis 4: Causal Lineage Tracing 🧬
# **Hypothesis:** Successful structures descend from other stable structures, demonstrating an evolutionary principle.

def trace_and_visualize_lineage(start_node_id: str):
    """Traces the ancestry of a node and visualizes it as a graph."""
    if emergence_log_df.empty or 'origin_type' not in emergence_log_df.columns:
        print("Cannot trace lineage, emergence_log is empty or missing 'origin_type' column.")
        return

    G = nx.DiGraph()
    nodes_to_process = [start_node_id]
    processed_nodes = set()

    while nodes_to_process:
        current_node_id = nodes_to_process.pop(0)
        if current_node_id in processed_nodes:
            continue
        processed_nodes.add(current_node_id)
        
        creation_event = emergence_log_df[emergence_log_df['id'] == current_node_id]
        if creation_event.empty:
            G.add_node(current_node_id, label=f"{current_node_id}\n(Primordial)")
            continue

        event_data = creation_event.iloc[0]
        G.add_node(current_node_id, label=f"{current_node_id}\n({event_data['origin_type']})")
        
        parents = event_data.get('parents', [])
        if isinstance(parents, list):
            for parent_id in parents:
                G.add_edge(parent_id, current_node_id)
                if parent_id not in processed_nodes:
                    nodes_to_process.append(parent_id)

    if not G.nodes:
        print(f"No lineage information found for node {start_node_id}.")
        return
        
    pos = nx.spring_layout(G, seed=42)
    
    edge_x, edge_y = [], []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])
    edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=1, color='#888'), hoverinfo='none', mode='lines')

    node_x = [pos[node][0] for node in G.nodes()]
    node_y = [pos[node][1] for node in G.nodes()]
    node_labels = [G.nodes[node]['label'] for node in G.nodes()]
    
    node_trace = go.Scatter(
        x=node_x, y=node_y, mode='markers+text', text=node_labels,
        textposition='top center', marker=dict(size=20, color='skyblue'),
        hoverinfo='text', hovertext=node_labels
    )
    
    fig_lineage = go.Figure(data=[edge_trace, node_trace],
                 layout=go.Layout(
                    title=f"Causal Lineage for Node: {start_node_id}", title_x=0.5,
                    showlegend=False, hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                    )
    fig_lineage.show()

# --- EXAMPLE USAGE ---
if not emergence_log_df.empty:
    # Use the 'id' column for the node_id
    if 'id' in emergence_log_df.columns:
        last_created_node = emergence_log_df['id'].iloc[-1]
        print(f"\n--- Visualizing lineage for an example node: {last_created_node} ---")
        trace_and_visualize_lineage(last_created_node)
    else:
        print("\nCould not run lineage example: 'id' column not found in emergence log.")
else:
    print("\nSkipping lineage example: emergence_log.json not found or empty.")

Libraries imported successfully.
✅ Successfully loaded node_state_log.json with 203 records.
✅ Successfully loaded structural_growth_log.json with 32 records.
✅ Successfully loaded node_emergence_log.json with 32 records.
✅ Successfully loaded collapse_chain_log.json with 0 records.
✅ Successfully loaded law_wave_log.json with 955 records.


A downward trend indicates a 'great filter' where few laws become dominant.
Skipping Creative Cycle analysis: structural_growth_log lacks required columns.
Skipping Classicalization Front: node_state_log is missing one of ['tick', 'node_id', 'pos_x', 'pos_y', 'state'].

--- Visualizing lineage for an example node: A_S4_S195 ---


KeyError: 'mechanism'