# Bias Neuron Comparison: Can SOEN Learn Linear Regression?

This notebook compares two SOEN model architectures on a linear regression task:

## Model A: No Bias Input (Original)
```
x ──[J_in]──→ SingleDendrite ──[J_out]──→ y
```
- Trainable: J_in, J_out (2 params)
- φ = J_in · x + φ_offset (fixed)

## Model B: With Bias Input (New)
```
x ──[J_α]──┐
            ├──→ SingleDendrite ──→ y  (J_out fixed to 1)
1 ──[J_β]──┘
```
- Trainable: J_α, J_β (2 params)
- φ = J_α · x + J_β · 1 + φ_offset
- J_β acts as learnable bias!

## Task
Learn y = 2.0·x + 0.5 (linear regression with slope and intercept)

In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

from soen_toolkit.core import (
    ConnectionConfig,
    LayerConfig,
    SimulationConfig,
    SOENModelCore,
)

torch.manual_seed(42)
np.random.seed(42)

print(f"PyTorch version: {torch.__version__}")

## 1. Generate Linear Regression Data

Target: y = α·x + β where α=2.0, β=0.5

In [None]:
# Ground truth parameters
TRUE_ALPHA = 2.0  # slope
TRUE_BETA = 0.5   # intercept

# Data generation
N_SAMPLES = 100
SEQ_LEN = 50

# Input values (within SOEN operating range)
x_values = torch.linspace(0.05, 0.20, N_SAMPLES)

# Create input sequences (constant over time)
X_data = x_values.unsqueeze(1).unsqueeze(2).expand(-1, SEQ_LEN, 1).clone()

# Target outputs
y_data = TRUE_ALPHA * x_values + TRUE_BETA
y_data = y_data.unsqueeze(1)  # [N, 1]

print(f"Task: y = {TRUE_ALPHA}·x + {TRUE_BETA}")
print(f"Input shape: {X_data.shape}")
print(f"Target shape: {y_data.shape}")
print(f"Input range: [{x_values.min():.3f}, {x_values.max():.3f}]")
print(f"Target range: [{y_data.min():.3f}, {y_data.max():.3f}]")

# Visualize
plt.figure(figsize=(8, 5))
plt.scatter(x_values.numpy(), y_data.squeeze().numpy(), alpha=0.6, label='Data')
plt.plot(x_values.numpy(), TRUE_ALPHA * x_values.numpy() + TRUE_BETA, 'r-', 
         linewidth=2, label=f'y = {TRUE_ALPHA}x + {TRUE_BETA}')
plt.xlabel('Input x')
plt.ylabel('Target y')
plt.title('Linear Regression Task')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 2. Define Both Model Architectures

In [None]:
def build_model_A(j_in=0.15, j_out=1.0):
    """
    Model A: Original architecture (no bias input)
    
    x ──[J_in]──→ SingleDendrite ──[J_out]──→ y
    
    Trainable: J_in, J_out (2 params)
    """
    sim_cfg = SimulationConfig(
        dt=50.0,
        input_type="state",
        track_phi=True,
        track_power=False,
    )
    
    # Input: 1 channel
    layer0 = LayerConfig(
        layer_id=0,
        layer_type="Input",
        params={"dim": 1},
    )
    
    # SingleDendrite: 1 neuron
    layer1 = LayerConfig(
        layer_id=1,
        layer_type="SingleDendrite",
        params={
            "dim": 1,
            "solver": "FE",
            "source_func": "Heaviside_fit_state_dep",
            "phi_offset": 0.02,
            "bias_current": 1.98,
            "gamma_plus": 0.0005,
            "gamma_minus": 1e-6,
            "learnable_params": {
                "phi_offset": False,
                "bias_current": False,
                "gamma_plus": False,
                "gamma_minus": False,
            },
        },
    )
    
    # Output: 1 channel
    layer2 = LayerConfig(
        layer_id=2,
        layer_type="Input",
        params={"dim": 1},
    )
    
    # Connection 0→1: J_in (learnable)
    conn01 = ConnectionConfig(
        from_layer=0, to_layer=1,
        connection_type="all_to_all",
        learnable=True,
        params={"init": "constant", "value": j_in},
    )
    
    # Connection 1→2: J_out (learnable)
    conn12 = ConnectionConfig(
        from_layer=1, to_layer=2,
        connection_type="all_to_all",
        learnable=True,
        params={"init": "constant", "value": j_out},
    )
    
    model = SOENModelCore(
        sim_config=sim_cfg,
        layers_config=[layer0, layer1, layer2],
        connections_config=[conn01, conn12],
    )
    
    return model


def build_model_B(j_alpha=0.15, j_beta=0.1):
    """
    Model B: With bias input
    
    x ──[J_α]──┐
               ├──→ SingleDendrite ──[1.0]──→ y
    1 ──[J_β]──┘
    
    Trainable: J_α, J_β (2 params)
    J_out is fixed to 1.0
    """
    sim_cfg = SimulationConfig(
        dt=50.0,
        input_type="state",
        track_phi=True,
        track_power=False,
    )
    
    # Input: 2 channels (x and bias=1)
    layer0 = LayerConfig(
        layer_id=0,
        layer_type="Input",
        params={"dim": 2},  # Channel 0: x, Channel 1: bias (always 1)
    )
    
    # SingleDendrite: 1 neuron
    layer1 = LayerConfig(
        layer_id=1,
        layer_type="SingleDendrite",
        params={
            "dim": 1,
            "solver": "FE",
            "source_func": "Heaviside_fit_state_dep",
            "phi_offset": 0.02,
            "bias_current": 1.98,
            "gamma_plus": 0.0005,
            "gamma_minus": 1e-6,
            "learnable_params": {
                "phi_offset": False,
                "bias_current": False,
                "gamma_plus": False,
                "gamma_minus": False,
            },
        },
    )
    
    # Output: 1 channel
    layer2 = LayerConfig(
        layer_id=2,
        layer_type="Input",
        params={"dim": 1},
    )
    
    # Connection 0→1: [J_α, J_β] (2x1 matrix, both learnable)
    # This creates a 2x1 weight matrix where:
    #   - Weight[0] = J_α (multiplies x)
    #   - Weight[1] = J_β (multiplies bias=1)
    conn01 = ConnectionConfig(
        from_layer=0, to_layer=1,
        connection_type="all_to_all",  # 2 inputs → 1 neuron = 2 weights
        learnable=True,
        params={
            "init": "constant", 
            "value": j_alpha,  # Initial value for all weights
        },
    )
    
    # Connection 1→2: Fixed to 1.0 (not learnable)
    conn12 = ConnectionConfig(
        from_layer=1, to_layer=2,
        connection_type="all_to_all",
        learnable=False,  # FIXED!
        params={"init": "constant", "value": 1.0},
    )
    
    model = SOENModelCore(
        sim_config=sim_cfg,
        layers_config=[layer0, layer1, layer2],
        connections_config=[conn01, conn12],
    )
    
    return model


# Build and inspect both models
print("=" * 60)
print("MODEL A: No Bias Input")
print("=" * 60)
model_A = build_model_A()
print(f"Layers: {[l.dim for l in model_A.layers]}")
print("Trainable parameters:")
for name, p in model_A.named_parameters():
    if p.requires_grad:
        print(f"  {name}: shape={list(p.shape)}, value={p.data.numpy().flatten()}")
print(f"Total trainable: {sum(p.numel() for p in model_A.parameters() if p.requires_grad)}")

print("\n" + "=" * 60)
print("MODEL B: With Bias Input")
print("=" * 60)
model_B = build_model_B()
print(f"Layers: {[l.dim for l in model_B.layers]}")
print("Trainable parameters:")
for name, p in model_B.named_parameters():
    if p.requires_grad:
        print(f"  {name}: shape={list(p.shape)}, value={p.data.numpy().flatten()}")
print(f"Total trainable: {sum(p.numel() for p in model_B.parameters() if p.requires_grad)}")

## 3. Prepare Input Data for Both Models

In [None]:
# Model A input: just x
X_model_A = X_data.clone()  # [N, T, 1]

# Model B input: [x, 1] (concatenate bias channel)
bias_channel = torch.ones(N_SAMPLES, SEQ_LEN, 1)  # Constant 1
X_model_B = torch.cat([X_data, bias_channel], dim=2)  # [N, T, 2]

print(f"Model A input shape: {X_model_A.shape}")
print(f"Model B input shape: {X_model_B.shape}")
print(f"\nModel B input sample [0, 0, :]: {X_model_B[0, 0, :].numpy()}")
print(f"  Channel 0 (x): {X_model_B[0, 0, 0].item():.4f}")
print(f"  Channel 1 (bias): {X_model_B[0, 0, 1].item():.4f}")

## 4. Training Function

In [None]:
def train_model(model, X_train, y_train, n_epochs=200, lr=0.05, verbose=True):
    """
    Train a SOEN model on regression task.
    """
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    
    history = {'loss': [], 'params': []}
    
    for epoch in range(n_epochs):
        optimizer.zero_grad()
        
        # Forward pass
        final_hist, _ = model(X_train)
        y_pred = final_hist[:, -1, :]  # Take final timestep
        
        # Loss
        loss = criterion(y_pred, y_train)
        
        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        # Record
        history['loss'].append(loss.item())
        params = [p.clone().detach().numpy().flatten() for p in model.parameters() if p.requires_grad]
        history['params'].append(params)
        
        if verbose and (epoch + 1) % 50 == 0:
            print(f"  Epoch {epoch+1}: Loss = {loss.item():.6f}")
    
    return history

## 5. Train Both Models

In [None]:
N_EPOCHS = 300
LR = 0.05

# Train Model A
print("Training Model A (no bias input)...")
model_A = build_model_A(j_in=0.1, j_out=0.5)
history_A = train_model(model_A, X_model_A, y_data, n_epochs=N_EPOCHS, lr=LR)

print("\nTraining Model B (with bias input)...")
model_B = build_model_B(j_alpha=0.1, j_beta=0.1)
history_B = train_model(model_B, X_model_B, y_data, n_epochs=N_EPOCHS, lr=LR)

print("\n" + "=" * 60)
print("TRAINING COMPLETE")
print("=" * 60)
print(f"Model A final loss: {history_A['loss'][-1]:.6f}")
print(f"Model B final loss: {history_B['loss'][-1]:.6f}")

## 6. Compare Loss Curves

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

epochs = range(N_EPOCHS)

# Linear scale
ax1 = axes[0]
ax1.plot(epochs, history_A['loss'], 'b-', linewidth=2, label='Model A (no bias)')
ax1.plot(epochs, history_B['loss'], 'r-', linewidth=2, label='Model B (with bias)')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('MSE Loss')
ax1.set_title('Training Loss Comparison (Linear Scale)')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Log scale
ax2 = axes[1]
ax2.plot(epochs, history_A['loss'], 'b-', linewidth=2, label='Model A (no bias)')
ax2.plot(epochs, history_B['loss'], 'r-', linewidth=2, label='Model B (with bias)')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('MSE Loss (log scale)')
ax2.set_yscale('log')
ax2.set_title('Training Loss Comparison (Log Scale)')
ax2.legend()
ax2.grid(True, alpha=0.3, which='both')

plt.tight_layout()
plt.show()

# Report improvement
improvement = (history_A['loss'][-1] - history_B['loss'][-1]) / history_A['loss'][-1] * 100
print(f"\nModel B achieves {improvement:.1f}% lower loss than Model A")

## 7. Compare Predictions

In [None]:
# Get predictions from both models
model_A.eval()
model_B.eval()

with torch.no_grad():
    pred_A = model_A(X_model_A)[0][:, -1, :].squeeze().numpy()
    pred_B = model_B(X_model_B)[0][:, -1, :].squeeze().numpy()

x_plot = x_values.numpy()
y_true = y_data.squeeze().numpy()

# Plot
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# Model A predictions
ax1 = axes[0]
ax1.scatter(x_plot, y_true, alpha=0.5, label='True', color='gray')
ax1.scatter(x_plot, pred_A, alpha=0.7, label='Model A prediction', color='blue')
ax1.plot(x_plot, TRUE_ALPHA * x_plot + TRUE_BETA, 'k--', linewidth=2, label=f'y = {TRUE_ALPHA}x + {TRUE_BETA}')
ax1.set_xlabel('Input x')
ax1.set_ylabel('Output y')
ax1.set_title(f'Model A: No Bias\nFinal Loss = {history_A["loss"][-1]:.6f}')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Model B predictions
ax2 = axes[1]
ax2.scatter(x_plot, y_true, alpha=0.5, label='True', color='gray')
ax2.scatter(x_plot, pred_B, alpha=0.7, label='Model B prediction', color='red')
ax2.plot(x_plot, TRUE_ALPHA * x_plot + TRUE_BETA, 'k--', linewidth=2, label=f'y = {TRUE_ALPHA}x + {TRUE_BETA}')
ax2.set_xlabel('Input x')
ax2.set_ylabel('Output y')
ax2.set_title(f'Model B: With Bias\nFinal Loss = {history_B["loss"][-1]:.6f}')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Side-by-side comparison
ax3 = axes[2]
ax3.plot(x_plot, y_true, 'k-', linewidth=3, label='True target', alpha=0.8)
ax3.plot(x_plot, pred_A, 'b--', linewidth=2, label='Model A (no bias)')
ax3.plot(x_plot, pred_B, 'r--', linewidth=2, label='Model B (with bias)')
ax3.set_xlabel('Input x')
ax3.set_ylabel('Output y')
ax3.set_title('Direct Comparison')
ax3.legend()
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 8. Analyze the Learned Parameters

In [None]:
print("=" * 60)
print("LEARNED PARAMETERS")
print("=" * 60)

print("\nModel A (no bias):")
params_A = list(model_A.parameters())
j_in_A = params_A[0].item()
j_out_A = params_A[1].item()
print(f"  J_in = {j_in_A:.4f}")
print(f"  J_out = {j_out_A:.4f}")
print(f"  Signal flow: φ = {j_in_A:.4f}·x + 0.02 → s → y = {j_out_A:.4f}·s")

print("\nModel B (with bias):")
params_B = list(model_B.parameters())
j_weights = params_B[0].data.numpy().flatten()
j_alpha = j_weights[0]
j_beta = j_weights[1] if len(j_weights) > 1 else 0
print(f"  J_α (x weight) = {j_alpha:.4f}")
print(f"  J_β (bias weight) = {j_beta:.4f}")
print(f"  Signal flow: φ = {j_alpha:.4f}·x + {j_beta:.4f}·1 + 0.02 → s → y = s")

print("\n" + "=" * 60)
print("TARGET RELATIONSHIP")
print("=" * 60)
print(f"  y = {TRUE_ALPHA}·x + {TRUE_BETA}")
print(f"\nNote: The SOEN model has nonlinear dynamics, so it cannot")
print(f"perfectly represent a linear function. The bias input helps")
print(f"by providing an independent offset term.")

## 9. Visualize the SingleDendrite Transfer Function

In [None]:
# Show the nonlinear transfer function of SingleDendrite
# by varying input and measuring steady-state output

def measure_transfer_function(model, x_range, seq_len=100, is_model_B=False):
    """Measure the input-output relationship of a trained model."""
    model.eval()
    outputs = []
    
    for x_val in x_range:
        if is_model_B:
            # Model B: [x, 1]
            x_input = torch.tensor([[[x_val, 1.0]]]).expand(-1, seq_len, -1)
        else:
            # Model A: [x]
            x_input = torch.tensor([[[x_val]]]).expand(-1, seq_len, -1)
        
        with torch.no_grad():
            final_hist, _ = model(x_input)
            y_out = final_hist[0, -1, 0].item()
        outputs.append(y_out)
    
    return np.array(outputs)

# Extended input range to see full transfer function
x_extended = np.linspace(0.0, 0.3, 100)

y_transfer_A = measure_transfer_function(model_A, x_extended, is_model_B=False)
y_transfer_B = measure_transfer_function(model_B, x_extended, is_model_B=True)
y_linear = TRUE_ALPHA * x_extended + TRUE_BETA

# Plot
plt.figure(figsize=(10, 6))
plt.plot(x_extended, y_linear, 'k-', linewidth=3, label=f'Target: y = {TRUE_ALPHA}x + {TRUE_BETA}', alpha=0.7)
plt.plot(x_extended, y_transfer_A, 'b-', linewidth=2, label='Model A (no bias)')
plt.plot(x_extended, y_transfer_B, 'r-', linewidth=2, label='Model B (with bias)')

# Mark training region
plt.axvspan(0.05, 0.20, alpha=0.2, color='green', label='Training region')

plt.xlabel('Input x', fontsize=12)
plt.ylabel('Output y', fontsize=12)
plt.title('Transfer Functions: SOEN Models vs Linear Target', fontsize=14)
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print("\nObservation:")
print("  The SingleDendrite introduces nonlinearity, so neither model can")
print("  perfectly match the linear target. However, Model B (with bias)")
print("  has more flexibility to approximate the offset term.")

## 10. Residual Analysis

In [None]:
# Compute residuals
residuals_A = y_true - pred_A
residuals_B = y_true - pred_B

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Residuals vs x
ax1 = axes[0]
ax1.scatter(x_plot, residuals_A, alpha=0.7, label=f'Model A (std={residuals_A.std():.4f})', color='blue')
ax1.scatter(x_plot, residuals_B, alpha=0.7, label=f'Model B (std={residuals_B.std():.4f})', color='red')
ax1.axhline(y=0, color='black', linestyle='--', linewidth=1)
ax1.set_xlabel('Input x')
ax1.set_ylabel('Residual (True - Predicted)')
ax1.set_title('Residuals vs Input')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Residual histogram
ax2 = axes[1]
ax2.hist(residuals_A, bins=20, alpha=0.6, label=f'Model A (mean={residuals_A.mean():.4f})', color='blue')
ax2.hist(residuals_B, bins=20, alpha=0.6, label=f'Model B (mean={residuals_B.mean():.4f})', color='red')
ax2.axvline(x=0, color='black', linestyle='--', linewidth=1)
ax2.set_xlabel('Residual')
ax2.set_ylabel('Count')
ax2.set_title('Residual Distribution')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nResidual Statistics:")
print(f"  Model A: mean={residuals_A.mean():.4f}, std={residuals_A.std():.4f}, MSE={np.mean(residuals_A**2):.6f}")
print(f"  Model B: mean={residuals_B.mean():.4f}, std={residuals_B.std():.4f}, MSE={np.mean(residuals_B**2):.6f}")

## 11. Summary & Conclusions

In [None]:
print("=" * 70)
print("SUMMARY: BIAS NEURON COMPARISON")
print("=" * 70)

print(f"\nTask: Learn y = {TRUE_ALPHA}·x + {TRUE_BETA}")

print("\n" + "-" * 70)
print("MODEL A: No Bias Input")
print("-" * 70)
print(f"  Architecture: x ──[J_in]──→ SingleDendrite ──[J_out]──→ y")
print(f"  Trainable params: J_in, J_out (2 params)")
print(f"  Final loss: {history_A['loss'][-1]:.6f}")
print(f"  Limitation: Cannot independently control slope and offset")

print("\n" + "-" * 70)
print("MODEL B: With Bias Input")
print("-" * 70)
print(f"  Architecture: [x, 1] ──[J_α, J_β]──→ SingleDendrite ──→ y")
print(f"  Trainable params: J_α, J_β (2 params)")
print(f"  Final loss: {history_B['loss'][-1]:.6f}")
print(f"  Advantage: J_β provides independent bias control")

print("\n" + "-" * 70)
print("KEY FINDINGS")
print("-" * 70)

if history_B['loss'][-1] < history_A['loss'][-1]:
    improvement = (history_A['loss'][-1] - history_B['loss'][-1]) / history_A['loss'][-1] * 100
    print(f"  ✓ Model B achieves {improvement:.1f}% lower loss")
    print(f"  ✓ Bias input improves regression capability")
else:
    print(f"  • Model A performs comparably or better")
    print(f"  • Bias may not be necessary for this task")

print(f"\n  ⚠ Neither model achieves perfect linear regression because")
print(f"    the SingleDendrite introduces nonlinear dynamics.")
print(f"    The models learn the best nonlinear approximation.")

print("\n" + "-" * 70)
print("HARDWARE COMPATIBILITY")
print("-" * 70)
print(f"  ✓ Both models use only connection weights (J values)")
print(f"  ✓ No internal neuron parameters are trained")
print(f"  ✓ Model B's bias input is just an extra input channel")
print(f"    (constant current source in hardware)")

print("\n" + "=" * 70)