# Level‑1 Cross‑Architectural Internal State Experiment (Multi‑Run)

This notebook extends the Level‑1 experiment to **multiple independent runs** (random seeds) and produces **aggregate statistics** and an updated results graph.

Architectures:
- Rule‑based controller (deterministic baseline)
- Tiny neural policy (re-sampled random weights per run)
- Stochastic probabilistic policy (randomness per run)

Primary test (per run, per architecture): trajectory distribution divergence under controlled internal‑state initialisation.

Conscious Machines, Research (January 2026)

[https://research.conscious-machines.org](https://research.conscious-machines.org)



In [None]:
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency
import matplotlib.pyplot as plt

pd.set_option('display.max_rows', 200)


In [None]:
# --- Deterministic 1D environment (same as prior notebook) ---
class LineWorld:
    def __init__(self, size=5):
        self.size = size
        self.reset()

    def reset(self):
        self.pos = self.size // 2
        return self.pos

    def step(self, action):
        if action == 'LEFT':
            self.pos = max(0, self.pos - 1)
        elif action == 'RIGHT':
            self.pos = min(self.size - 1, self.pos + 1)
        return self.pos

def rollout(agent, internal_state, steps=5):
    env = LineWorld()
    obs = env.reset()
    traj = []
    for _ in range(steps):
        action = agent(obs, internal_state)
        obs = env.step(action)
        traj.append(action)
    return tuple(traj)


In [None]:
# --- Agents ---
def rule_agent(obs, state):
    # Deterministic: internal state selects direction.
    return 'RIGHT' if state > 0.5 else 'LEFT'

def make_neural_agent(rng: np.random.Generator):
    """Tiny fixed-weight neural policy. We resample weights per run."""
    W1 = rng.normal(size=(2, 3))
    W2 = rng.normal(size=(3, 1))

    def neural_agent(obs, state):
        x = np.array([obs / 5.0, state])
        h = np.tanh(x @ W1)
        y = float(h @ W2)
        return 'RIGHT' if y > 0 else 'LEFT'
    return neural_agent

def make_stochastic_agent(rng: np.random.Generator):
    """Stochastic policy with state-conditioned action probability."""
    def stochastic_agent(obs, state):
        # Logistic with state and observation terms.
        p = 1 / (1 + np.exp(-(0.8 * state - 0.2 * obs)))
        return 'RIGHT' if rng.random() < p else 'LEFT'
    return stochastic_agent


In [None]:
def evaluate(agent, trials=200, steps=5):
    """Return (n_unique_trajectories, chi2, p) for state=A vs state=B."""
    traj_A = [rollout(agent, 0.0, steps=steps) for _ in range(trials)]
    traj_B = [rollout(agent, 1.0, steps=steps) for _ in range(trials)]

    unique = list(set(traj_A + traj_B))
    counts = []
    for u in unique:
        counts.append([traj_A.count(u), traj_B.count(u)])

    if len(counts) <= 1:
        return len(unique), 0.0, 1.0

    chi2, p, _, _ = chi2_contingency(counts)
    return len(unique), float(chi2), float(p)


In [None]:
# --- Multi-run experiment ---
def run_experiment(n_runs=50, trials=200, steps=5, base_seed=123):
    rows = []
    for run in range(n_runs):
        seed = base_seed + run
        rng = np.random.default_rng(seed)

        nuniq, chi2, p = evaluate(rule_agent, trials=trials, steps=steps)
        rows.append({'run': run, 'seed': seed, 'architecture': 'Rule-based', 'unique_trajectories': nuniq, 'chi2': chi2, 'p_value': p})

        neural = make_neural_agent(rng)
        nuniq, chi2, p = evaluate(neural, trials=trials, steps=steps)
        rows.append({'run': run, 'seed': seed, 'architecture': 'Neural (fixed random weights)', 'unique_trajectories': nuniq, 'chi2': chi2, 'p_value': p})

        rng2 = np.random.default_rng(seed + 10_000)
        stoch = make_stochastic_agent(rng2)
        nuniq, chi2, p = evaluate(stoch, trials=trials, steps=steps)
        rows.append({'run': run, 'seed': seed, 'architecture': 'Stochastic', 'unique_trajectories': nuniq, 'chi2': chi2, 'p_value': p})

    df = pd.DataFrame(rows)
    df['neg_log10_p'] = df['p_value'].apply(lambda x: -np.log10(x) if x > 0 else np.inf)
    return df

df = run_experiment(n_runs=50, trials=200, steps=5, base_seed=123)
df.head()

In [None]:
# Save raw run log (useful for paper supplements)
out_csv = 'level1_cross_arch_multirun_log.csv'
df.to_csv(out_csv, index=False)
out_csv

In [None]:
# Aggregate summary
summary = df.groupby('architecture').agg(
    runs=('run', 'count'),
    chi2_mean=('chi2', 'mean'),
    chi2_std=('chi2', 'std'),
    neglogp_mean=('neg_log10_p', 'mean'),
    neglogp_std=('neg_log10_p', 'std'),
    uniq_mean=('unique_trajectories', 'mean'),
    uniq_std=('unique_trajectories', 'std'),
).reset_index()

summary

In [None]:
# Plot: mean ± sd across runs for chi-square and -log10(p)
arch_order = ['Rule-based', 'Neural (fixed random weights)', 'Stochastic']
sum2 = summary.set_index('architecture').loc[arch_order].reset_index()

x = np.arange(len(sum2))

plt.figure(figsize=(10, 5))
plt.errorbar(x, sum2['chi2_mean'], yerr=sum2['chi2_std'], marker='o', linestyle='-', capsize=4, label='Chi-square (mean ± sd)')
plt.errorbar(x, sum2['neglogp_mean'], yerr=sum2['neglogp_std'], marker='s', linestyle='--', capsize=4, label='-log10(p) (mean ± sd)')
plt.xticks(x, sum2['architecture'], rotation=0)
plt.xlabel('Policy Architecture')
plt.ylabel('Statistic Value')
plt.title('Level-1 Cross-Architectural Causal Dependence (Multi-Run)')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Optional: per-run scatter (helps show variability)
arch_order = ['Rule-based', 'Neural (fixed random weights)', 'Stochastic']
plt.figure(figsize=(10, 5))
rngj = np.random.default_rng(0)
for i, arch in enumerate(arch_order):
    sub = df[df['architecture'] == arch]
    jitter = (rngj.random(len(sub)) - 0.5) * 0.15
    plt.scatter(np.full(len(sub), i) + jitter, sub['chi2'])

plt.xticks(np.arange(len(arch_order)), arch_order)
plt.xlabel('Policy Architecture')
plt.ylabel('Chi-square Statistic (per run)')
plt.title('Per-Run Chi-square Values by Architecture')
plt.tight_layout()
plt.show()

## Notes for reporting in the paper

- The CSV log (`level1_cross_arch_multirun_log.csv`) can be uploaded as supplementary material.
- For the main paper figure, the **mean ± sd** plot is typically sufficient.
- If you want to emphasise falsifiability, include the **per-run scatter** as a supplementary figure.
