# Slope Analysis: Selection Gradients in Multilayer Networks

**Purpose**: Analyze θ,φ coefficients from enumeration to compute selection gradients  
**Input**: CSV/Parquet from `Enumeration_N6.ipynb`  
**Language**: Python 3.10+

---

## Overview

This notebook analyzes the enumeration results to compute:

1. **Selection gradients**: ∂r*/∂b = φ₂₀/θ₂ and ∂r*/∂c = (φ₀₁−φ₂₁)/θ₂
2. **Sign classifications**: Distribution of configurations by sign(θ₁−θ₃) × sign(φ₀₁−φ₂₁)
3. **Statistical aggregations**: By root positions, theta triplets, etc.
4. **Visualizations**: Heatmaps, scatter plots, histograms

### Prerequisites

```bash
pip install polars duckdb matplotlib numpy
```

---
## 1. Configuration & Imports

In [None]:
# =============================================================================
# CONFIGURATION
# =============================================================================

# Input data (from Enumeration_N6.ipynb)
DATA_FILE = "N6_enumeration.csv"        # Or .parquet
# DATA_FILE = "N6_enumeration.parquet"  # Faster for large files

# Analysis parameters
EPS = 1e-8  # Tolerance for sign classification

# Output figures
SAVE_FIGURES = True
FIG_DPI = 300

In [None]:
import polars as pl
import duckdb
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from pathlib import Path

# Plotting style
plt.rcParams.update({
    'font.size': 11,
    'axes.titlesize': 12,
    'axes.labelsize': 11,
    'figure.dpi': 100,
})

---
## 2. Load Data

Uses Polars LazyFrame for memory-efficient processing of large datasets.

In [None]:
def load_data(filepath: str) -> pl.LazyFrame:
    """
    Load enumeration data as a Polars LazyFrame.
    
    Supports both CSV and Parquet formats.
    """
    path = Path(filepath)
    if not path.exists():
        raise FileNotFoundError(f"Data file not found: {filepath}")
    
    if path.suffix == '.parquet':
        lf = pl.scan_parquet(filepath)
    else:
        lf = pl.scan_csv(filepath)
    
    return lf

# Load data
lf = load_data(DATA_FILE)

# Check schema and row count
schema = lf.collect_schema()
print(f"Schema: {schema}")

total_rows = lf.select(pl.len()).collect().item()
print(f"Total rows: {total_rows:,}")

In [None]:
# Optional: Convert CSV to Parquet for faster future loads
def csv_to_parquet(csv_path: str, parquet_path: str = None):
    """
    Convert CSV to Parquet format (streaming, memory-efficient).
    """
    if parquet_path is None:
        parquet_path = csv_path.replace('.csv', '.parquet')
    
    print(f"Converting {csv_path} to {parquet_path}...")
    con = duckdb.connect()
    con.execute(f"""
        COPY (SELECT * FROM '{csv_path}') 
        TO '{parquet_path}' 
        (FORMAT PARQUET, COMPRESSION ZSTD)
    """)
    print("Done!")
    return parquet_path

# Uncomment to convert:
# csv_to_parquet(DATA_FILE)

---
## 3. Basic Statistics

In [None]:
def compute_basic_stats(lf: pl.LazyFrame) -> dict:
    """
    Compute basic statistics for numeric columns.
    """
    num_cols = ['theta1', 'theta2', 'theta3', 'phi01', 'phi20', 'phi21']
    
    stats = lf.select(
        [pl.len().alias("rows")]
        + [pl.col(c).min().alias(f"{c}_min") for c in num_cols]
        + [pl.col(c).mean().alias(f"{c}_mean") for c in num_cols]
        + [pl.col(c).std(ddof=1).alias(f"{c}_std") for c in num_cols]
        + [pl.col(c).max().alias(f"{c}_max") for c in num_cols]
    ).collect()
    
    return stats

stats = compute_basic_stats(lf)
print("\nNumeric Summary:")
print(stats)

In [None]:
def compute_correlations(lf: pl.LazyFrame) -> pl.DataFrame:
    """
    Compute pairwise Pearson correlations between numeric columns.
    """
    num_cols = ['theta1', 'theta2', 'theta3', 'phi01', 'phi20', 'phi21']
    
    pair_exprs = []
    for i, a in enumerate(num_cols):
        for b in num_cols[i+1:]:
            pair_exprs.append(pl.corr(pl.col(a), pl.col(b)).alias(f"{a}_vs_{b}"))
    
    corr_row = lf.select(pair_exprs).collect()
    return corr_row.transpose(include_header=True, header_name="pair", column_names=["r"])

correlations = compute_correlations(lf)
print("\nPairwise Correlations (Pearson r):")
print(correlations)

---
## 4. Sign Classification

Classify configurations by sign of key quantities:
- d = θ₁ − θ₃ (selection differential)
- q = φ₀₁ − φ₂₁ (inter-layer correlation differential)

In [None]:
def classify_by_signs(lf: pl.LazyFrame, eps: float = 1e-8) -> pl.DataFrame:
    """
    Count configurations by sign(θ₁−θ₃) × sign(φ₀₁−φ₂₁).
    
    Returns 3×3 contingency table with counts and percentages.
    """
    counts_dq = (
        lf.with_columns(
            d = pl.col("theta1") - pl.col("theta3"),
            q = pl.col("phi01") - pl.col("phi21"),
        )
        .with_columns(
            s_d = pl.when(pl.col("d") > eps).then(pl.lit(">0"))
                   .when(pl.col("d") < -eps).then(pl.lit("<0"))
                   .otherwise(pl.lit("=0")),
            s_q = pl.when(pl.col("q") > eps).then(pl.lit(">0"))
                   .when(pl.col("q") < -eps).then(pl.lit("<0"))
                   .otherwise(pl.lit("=0")),
        )
        .group_by(["s_d", "s_q"])
        .agg(pl.len().alias("n"))
        .collect()
    )
    
    # Create full 3×3 grid with labels
    labels = pl.DataFrame({
        "s_d": [">0", ">0", ">0", "=0", "=0", "=0", "<0", "<0", "<0"],
        "s_q": [">0", "=0", "<0", ">0", "=0", "<0", ">0", "=0", "<0"],
    }).with_columns(
        case_id = pl.arange(1, 10),
        case_math = pl.concat_str([
            pl.lit("θ₁−θ₃"),
            pl.col("s_d"),
            pl.lit(" & φ₀₁−φ₂₁"),
            pl.col("s_q"),
        ], separator="")
    )
    
    # Join and compute percentages
    joined = labels.join(counts_dq, on=["s_d", "s_q"], how="left").with_columns(
        pl.col("n").fill_null(0)
    )
    
    total = int(joined["n"].sum())
    result = (
        joined
        .with_columns((pl.col("n") / pl.lit(total) * 100).alias("percent"))
        .select(["case_id", "s_d", "s_q", "case_math", "n", "percent"])
        .sort("case_id")
    )
    
    return result

sign_table = classify_by_signs(lf, EPS)
print("\nSign Classification Table:")
print(sign_table)

total = sign_table["n"].sum()
print(f"\nTotal: {total:,} | Percent sum: {sign_table['percent'].sum():.2f}%")

---
## 5. Selection Gradient Analysis

Compute the partial derivatives of the critical ratio r*:
- ∂r*/∂b = φ₂₀/θ₂
- ∂r*/∂c = (φ₀₁−φ₂₁)/θ₂

In [None]:
def compute_slope_stats(lf: pl.LazyFrame) -> pl.DataFrame:
    """
    Compute selection gradient statistics.
    """
    return (
        lf.with_columns(
            drdb = pl.col("phi20") / pl.col("theta2"),
            drdc = (pl.col("phi01") - pl.col("phi21")) / pl.col("theta2"),
        )
        .select(
            pl.len().alias("n"),
            pl.col("drdb").mean().alias("drdb_mean"),
            pl.col("drdb").std(ddof=1).alias("drdb_std"),
            pl.col("drdc").mean().alias("drdc_mean"),
            pl.col("drdc").std(ddof=1).alias("drdc_std"),
        )
        .collect()
    )

slope_stats = compute_slope_stats(lf)
print("\nOverall Selection Gradient Statistics:")
print(slope_stats)

In [None]:
def slopes_by_theta_triplet(lf: pl.LazyFrame) -> pl.DataFrame:
    """
    Aggregate slopes by (θ₁, θ₂, θ₃) triplet.
    """
    return (
        lf.group_by(["theta1", "theta2", "theta3"])
        .agg(
            pl.len().alias("n"),
            (pl.col("phi20") / pl.col("theta2")).mean().alias("drdb_mean"),
            (pl.col("phi20") / pl.col("theta2")).std(ddof=1).alias("drdb_std"),
            ((pl.col("phi01") - pl.col("phi21")) / pl.col("theta2")).mean().alias("drdc_mean"),
            ((pl.col("phi01") - pl.col("phi21")) / pl.col("theta2")).std(ddof=1).alias("drdc_std"),
        )
        .sort(["theta1", "theta2", "theta3"])
        .collect()
    )

theta_agg = slopes_by_theta_triplet(lf)
print(f"\nSlopes by (θ₁, θ₂, θ₃) triplet ({theta_agg.height} groups):")
print(theta_agg.head(10))

---
## 6. Visualizations

In [None]:
def plot_slope_histogram(lf: pl.LazyFrame, sample_size: int = 100000):
    """
    Create 2D histogram of selection gradients.
    """
    # Sample data for histogram
    sample = (
        lf.with_columns(
            drdb = pl.col("phi20") / pl.col("theta2"),
            drdc = (pl.col("phi01") - pl.col("phi21")) / pl.col("theta2"),
        )
        .select(["drdb", "drdc"])
        .collect()
        .sample(n=min(sample_size, lf.select(pl.len()).collect().item()), seed=42)
    )
    
    x = sample["drdb"].to_numpy()
    y = sample["drdc"].to_numpy()
    
    # Create histogram
    fig, ax = plt.subplots(figsize=(8, 7))
    
    H, xedges, yedges = np.histogram2d(x, y, bins=100)
    vmax = np.percentile(H, 99.5)
    extent = [xedges.min(), xedges.max(), yedges.min(), yedges.max()]
    
    im = ax.imshow(H.T, origin='lower', extent=extent, aspect='auto',
                   cmap='viridis', vmin=0, vmax=vmax, interpolation='bilinear')
    
    # Colorbar
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="4%", pad=0.08)
    fig.colorbar(im, cax=cax, label="Count")
    
    # Labels
    ax.set_xlabel(r"$\partial r^* / \partial b$", fontsize=12)
    ax.set_ylabel(r"$\partial r^* / \partial c$", fontsize=12)
    ax.set_title("Distribution of Selection Gradients", fontweight='bold')
    ax.axhline(0, color='white', lw=0.5, ls='--', alpha=0.7)
    ax.axvline(0, color='white', lw=0.5, ls='--', alpha=0.7)
    
    plt.tight_layout()
    
    if SAVE_FIGURES:
        plt.savefig("fig_slope_histogram.png", dpi=FIG_DPI, bbox_inches='tight', facecolor='white')
        print("Saved: fig_slope_histogram.png")
    
    plt.show()
    return H, extent

H, extent = plot_slope_histogram(lf)

In [None]:
def plot_sign_distribution(sign_table: pl.DataFrame):
    """
    Bar plot of sign classification distribution.
    """
    fig, ax = plt.subplots(figsize=(12, 5))
    
    # Prepare data
    labels = sign_table["case_math"].to_list()
    counts = sign_table["n"].to_numpy()
    percents = sign_table["percent"].to_numpy()
    
    # Color by cooperation outcome
    colors = []
    for s_d, s_q in zip(sign_table["s_d"].to_list(), sign_table["s_q"].to_list()):
        if s_d == ">0":  # d > 0 favors cooperation
            colors.append('#4DAF4A')  # Green
        elif s_d == "<0" and s_q == ">0":  # d < 0 but q > 0 can help
            colors.append('#377EB8')  # Blue
        else:
            colors.append('#E41A1C')  # Red
    
    x = np.arange(len(labels))
    bars = ax.bar(x, percents, color=colors, edgecolor='black', alpha=0.8)
    
    # Labels on bars
    for bar, pct, cnt in zip(bars, percents, counts):
        if pct > 0:
            ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
                    f'{pct:.1f}%\n({cnt:,})', ha='center', va='bottom', fontsize=8)
    
    ax.set_xticks(x)
    ax.set_xticklabels([l.replace(' & ', '\n') for l in labels], fontsize=9)
    ax.set_ylabel("Percentage of Configurations")
    ax.set_title("Distribution by Sign of (θ₁−θ₃) and (φ₀₁−φ₂₁)", fontweight='bold')
    
    # Legend
    from matplotlib.patches import Patch
    legend_elements = [
        Patch(facecolor='#4DAF4A', label='d>0 (cooperation favored)'),
        Patch(facecolor='#377EB8', label='d<0, q>0 (conditional)'),
        Patch(facecolor='#E41A1C', label='Other'),
    ]
    ax.legend(handles=legend_elements, loc='upper right')
    
    plt.tight_layout()
    
    if SAVE_FIGURES:
        plt.savefig("fig_sign_distribution.png", dpi=FIG_DPI, bbox_inches='tight', facecolor='white')
        print("Saved: fig_sign_distribution.png")
    
    plt.show()

plot_sign_distribution(sign_table)

In [None]:
def plot_theta_vs_phi(lf: pl.LazyFrame, sample_size: int = 50000):
    """
    Scatter plots of theta vs phi relationships.
    """
    sample = lf.collect().sample(n=min(sample_size, lf.select(pl.len()).collect().item()), seed=42)
    
    fig, axes = plt.subplots(1, 3, figsize=(14, 4))
    
    # θ₂ vs φ₂₀
    ax = axes[0]
    ax.scatter(sample["theta2"].to_numpy(), sample["phi20"].to_numpy(), 
               alpha=0.1, s=1, c='#377EB8')
    ax.set_xlabel(r"$\theta_2$")
    ax.set_ylabel(r"$\phi_{20}$")
    ax.set_title(r"$\theta_2$ vs $\phi_{20}$")
    ax.axhline(0, color='gray', lw=0.5, ls='--')
    ax.axvline(0, color='gray', lw=0.5, ls='--')
    
    # θ₁−θ₃ vs φ₀₁−φ₂₁
    ax = axes[1]
    d = sample["theta1"].to_numpy() - sample["theta3"].to_numpy()
    q = sample["phi01"].to_numpy() - sample["phi21"].to_numpy()
    ax.scatter(d, q, alpha=0.1, s=1, c='#E41A1C')
    ax.set_xlabel(r"$\theta_1 - \theta_3$")
    ax.set_ylabel(r"$\phi_{01} - \phi_{21}$")
    ax.set_title(r"Selection differential vs inter-layer")
    ax.axhline(0, color='gray', lw=0.5, ls='--')
    ax.axvline(0, color='gray', lw=0.5, ls='--')
    
    # φ₂₀ vs φ₂₁
    ax = axes[2]
    ax.scatter(sample["phi20"].to_numpy(), sample["phi21"].to_numpy(),
               alpha=0.1, s=1, c='#4DAF4A')
    ax.set_xlabel(r"$\phi_{20}$")
    ax.set_ylabel(r"$\phi_{21}$")
    ax.set_title(r"Inter-layer correlations")
    ax.axhline(0, color='gray', lw=0.5, ls='--')
    ax.axvline(0, color='gray', lw=0.5, ls='--')
    # Add y=x line
    lims = [min(ax.get_xlim()[0], ax.get_ylim()[0]), 
            max(ax.get_xlim()[1], ax.get_ylim()[1])]
    ax.plot(lims, lims, 'k--', alpha=0.3, lw=1)
    
    plt.tight_layout()
    
    if SAVE_FIGURES:
        plt.savefig("fig_theta_phi_scatter.png", dpi=FIG_DPI, bbox_inches='tight', facecolor='white')
        print("Saved: fig_theta_phi_scatter.png")
    
    plt.show()

plot_theta_vs_phi(lf)

---
## 7. Summary Report

In [None]:
def generate_summary(lf: pl.LazyFrame, sign_table: pl.DataFrame):
    """
    Generate summary statistics report.
    """
    total = sign_table["n"].sum()
    
    # Cooperation favored: d > 0
    d_pos = sign_table.filter(pl.col("s_d") == ">0")["n"].sum()
    d_pos_pct = d_pos / total * 100
    
    # d < 0 but q > 0 (can still favor cooperation)
    d_neg_q_pos = sign_table.filter(
        (pl.col("s_d") == "<0") & (pl.col("s_q") == ">0")
    )["n"].sum()
    d_neg_q_pos_pct = d_neg_q_pos / total * 100
    
    # Potentially favorable total
    favorable = d_pos + d_neg_q_pos
    favorable_pct = favorable / total * 100
    
    print("=" * 60)
    print("ENUMERATION SUMMARY REPORT")
    print("=" * 60)
    print(f"\nTotal unique configurations: {total:,}")
    print(f"\nCooperation Analysis:")
    print(f"  θ₁−θ₃ > 0 (directly favored):     {d_pos:>10,} ({d_pos_pct:>5.1f}%)")
    print(f"  θ₁−θ₃ < 0 & φ₀₁−φ₂₁ > 0 (cond.): {d_neg_q_pos:>10,} ({d_neg_q_pos_pct:>5.1f}%)")
    print(f"  ─────────────────────────────────────────────────")
    print(f"  Potentially favorable:            {favorable:>10,} ({favorable_pct:>5.1f}%)")
    print("\n" + "=" * 60)

generate_summary(lf, sign_table)

---
## 8. Export Results

In [None]:
# Export sign classification table
sign_table.write_csv("sign_classification.csv")
print("Saved: sign_classification.csv")

# Export theta aggregation
theta_agg.write_csv("theta_aggregation.csv")
print("Saved: theta_aggregation.csv")