# Network Complexity Study: Can More Neurons Learn Linear Regression?

The SingleDendrite has nonlinear dynamics, so a single neuron cannot perfectly learn y = αx + β.

**Question:** Can we approximate linear functions by adding more neurons?

This is the fundamental principle of neural networks - the **Universal Approximation Theorem** states that a network with enough hidden units can approximate any continuous function.

## Architectures to Compare

All architectures use **only trainable connection weights (J)** - hardware compatible.

| Model | Architecture | Hidden Neurons | Trainable Params |
|-------|-------------|----------------|------------------|
| A1 | 1 → 1 → 1 | 1 | 2 |
| A2 | 1 → 2 → 1 | 2 | 4 |
| A3 | 1 → 3 → 1 | 3 | 6 |
| A5 | 1 → 5 → 1 | 5 | 10 |
| A10 | 1 → 10 → 1 | 10 | 20 |
| Deep | 1 → 3 → 3 → 1 | 6 (2 layers) | 15 |

In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

from soen_toolkit.core import (
    ConnectionConfig,
    LayerConfig,
    SimulationConfig,
    SOENModelCore,
)

torch.manual_seed(42)
np.random.seed(42)

print(f"PyTorch version: {torch.__version__}")

## 1. Linear Regression Task

Target: y = 2.0·x + 0.5

In [None]:
# Ground truth
TRUE_ALPHA = 2.0
TRUE_BETA = 0.5

# Data
N_SAMPLES = 100
SEQ_LEN = 50

x_values = torch.linspace(0.05, 0.20, N_SAMPLES)
X_data = x_values.unsqueeze(1).unsqueeze(2).expand(-1, SEQ_LEN, 1).clone()
y_data = (TRUE_ALPHA * x_values + TRUE_BETA).unsqueeze(1)

print(f"Task: y = {TRUE_ALPHA}·x + {TRUE_BETA}")
print(f"Input range: [{x_values.min():.3f}, {x_values.max():.3f}]")
print(f"Target range: [{y_data.min():.3f}, {y_data.max():.3f}]")

plt.figure(figsize=(8, 5))
plt.scatter(x_values.numpy(), y_data.squeeze().numpy(), alpha=0.6)
plt.plot(x_values.numpy(), TRUE_ALPHA * x_values.numpy() + TRUE_BETA, 'r-', lw=2)
plt.xlabel('x')
plt.ylabel('y')
plt.title(f'Target: y = {TRUE_ALPHA}x + {TRUE_BETA}')
plt.grid(True, alpha=0.3)
plt.show()

## 2. Model Builder Function

Generic function to build SOEN models with varying complexity.

In [None]:
def build_soen_model(hidden_dims, dt=50.0):
    """
    Build a SOEN model with specified hidden layer dimensions.
    
    Args:
        hidden_dims: List of hidden layer sizes, e.g., [3] or [3, 3]
        
    Architecture:
        1 (input) → hidden_dims[0] → hidden_dims[1] → ... → 1 (output)
    
    All connections are trainable. All SingleDendrite params are fixed.
    """
    sim_cfg = SimulationConfig(
        dt=dt,
        input_type="state",
        track_phi=False,
        track_power=False,
    )
    
    layers = []
    connections = []
    
    # Layer 0: Input (dim=1)
    layers.append(LayerConfig(
        layer_id=0,
        layer_type="Input",
        params={"dim": 1},
    ))
    
    # Hidden layers (SingleDendrite)
    prev_dim = 1
    for i, hidden_dim in enumerate(hidden_dims):
        layer_id = i + 1
        
        layers.append(LayerConfig(
            layer_id=layer_id,
            layer_type="SingleDendrite",
            params={
                "dim": hidden_dim,
                "solver": "FE",
                "source_func": "Heaviside_fit_state_dep",
                "phi_offset": 0.02,
                "bias_current": 1.98,
                "gamma_plus": 0.0005,
                "gamma_minus": 1e-6,
                "learnable_params": {
                    "phi_offset": False,
                    "bias_current": False,
                    "gamma_plus": False,
                    "gamma_minus": False,
                },
            },
        ))
        
        # Connection from previous layer
        connections.append(ConnectionConfig(
            from_layer=layer_id - 1,
            to_layer=layer_id,
            connection_type="all_to_all",
            learnable=True,
            params={
                "init": "xavier_uniform",
            },
        ))
        
        prev_dim = hidden_dim
    
    # Output layer (dim=1)
    output_layer_id = len(hidden_dims) + 1
    layers.append(LayerConfig(
        layer_id=output_layer_id,
        layer_type="Input",
        params={"dim": 1},
    ))
    
    # Connection to output
    connections.append(ConnectionConfig(
        from_layer=output_layer_id - 1,
        to_layer=output_layer_id,
        connection_type="all_to_all",
        learnable=True,
        params={
            "init": "xavier_uniform",
        },
    ))
    
    model = SOENModelCore(
        sim_config=sim_cfg,
        layers_config=layers,
        connections_config=connections,
    )
    
    return model


def count_params(model):
    """Count trainable parameters."""
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


# Test the builder
print("Testing model builder...")
test_configs = [
    ([1], "1→1→1"),
    ([2], "1→2→1"),
    ([3], "1→3→1"),
    ([5], "1→5→1"),
    ([10], "1→10→1"),
    ([3, 3], "1→3→3→1"),
]

for hidden_dims, name in test_configs:
    model = build_soen_model(hidden_dims)
    n_params = count_params(model)
    layer_dims = [l.dim for l in model.layers]
    print(f"  {name}: layers={layer_dims}, trainable_params={n_params}")

## 3. Training Function

In [None]:
def train_model(model, X_train, y_train, n_epochs=500, lr=0.01, verbose=False):
    """
    Train a SOEN model.
    """
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    
    losses = []
    
    for epoch in range(n_epochs):
        optimizer.zero_grad()
        
        final_hist, _ = model(X_train)
        y_pred = final_hist[:, -1, :]
        
        loss = criterion(y_pred, y_train)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        losses.append(loss.item())
        
        if verbose and (epoch + 1) % 100 == 0:
            print(f"  Epoch {epoch+1}: Loss = {loss.item():.6f}")
    
    return losses


def evaluate_model(model, X_test, y_test):
    """
    Evaluate model and return predictions.
    """
    model.eval()
    with torch.no_grad():
        final_hist, _ = model(X_test)
        y_pred = final_hist[:, -1, :].squeeze().numpy()
    
    y_true = y_test.squeeze().numpy()
    mse = np.mean((y_pred - y_true) ** 2)
    
    return y_pred, mse

## 4. Train All Architectures

In [None]:
# Define architectures to compare
ARCHITECTURES = {
    "1 neuron": [1],
    "2 neurons": [2],
    "3 neurons": [3],
    "5 neurons": [5],
    "10 neurons": [10],
    "20 neurons": [20],
    "3→3 (deep)": [3, 3],
    "5→5 (deep)": [5, 5],
}

N_EPOCHS = 500
LR = 0.02

results = {}

print("Training all architectures...")
print("=" * 60)

for name, hidden_dims in ARCHITECTURES.items():
    print(f"\nTraining: {name}")
    
    # Build model
    model = build_soen_model(hidden_dims)
    n_params = count_params(model)
    
    # Train
    losses = train_model(model, X_data, y_data, n_epochs=N_EPOCHS, lr=LR, verbose=False)
    
    # Evaluate
    y_pred, mse = evaluate_model(model, X_data, y_data)
    
    results[name] = {
        'hidden_dims': hidden_dims,
        'n_params': n_params,
        'losses': losses,
        'final_loss': losses[-1],
        'y_pred': y_pred,
        'mse': mse,
        'model': model,
    }
    
    print(f"  Params: {n_params}, Final Loss: {losses[-1]:.6f}")

print("\n" + "=" * 60)
print("Training complete!")

## 5. Compare Loss Curves

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
colors = plt.cm.viridis(np.linspace(0, 1, len(results)))

# Linear scale
ax1 = axes[0]
for (name, res), color in zip(results.items(), colors):
    ax1.plot(res['losses'], label=f"{name} ({res['n_params']} params)", color=color, lw=2)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('MSE Loss')
ax1.set_title('Training Loss (Linear Scale)')
ax1.legend(fontsize=8)
ax1.grid(True, alpha=0.3)

# Log scale
ax2 = axes[1]
for (name, res), color in zip(results.items(), colors):
    ax2.plot(res['losses'], label=f"{name}", color=color, lw=2)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('MSE Loss (log)')
ax2.set_yscale('log')
ax2.set_title('Training Loss (Log Scale)')
ax2.legend(fontsize=8)
ax2.grid(True, alpha=0.3, which='both')

plt.tight_layout()
plt.show()

## 6. Compare Predictions

In [None]:
x_plot = x_values.numpy()
y_true = y_data.squeeze().numpy()

# Plot predictions for each architecture
n_models = len(results)
cols = 4
rows = (n_models + cols - 1) // cols

fig, axes = plt.subplots(rows, cols, figsize=(16, 4*rows))
axes = axes.flatten()

for idx, (name, res) in enumerate(results.items()):
    ax = axes[idx]
    
    ax.scatter(x_plot, y_true, alpha=0.4, s=20, color='gray', label='Target')
    ax.plot(x_plot, TRUE_ALPHA * x_plot + TRUE_BETA, 'k--', lw=2, label='y=2x+0.5')
    ax.scatter(x_plot, res['y_pred'], alpha=0.6, s=20, color='blue', label='Prediction')
    
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_title(f"{name}\nMSE={res['mse']:.6f}, Params={res['n_params']}")
    ax.legend(fontsize=7)
    ax.grid(True, alpha=0.3)

# Hide unused subplots
for idx in range(n_models, len(axes)):
    axes[idx].set_visible(False)

plt.tight_layout()
plt.show()

## 7. Performance vs Complexity

In [None]:
# Extract data for plotting
names = list(results.keys())
n_params = [results[n]['n_params'] for n in names]
final_losses = [results[n]['final_loss'] for n in names]

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Bar chart of final losses
ax1 = axes[0]
bars = ax1.bar(range(len(names)), final_losses, color=colors)
ax1.set_xticks(range(len(names)))
ax1.set_xticklabels(names, rotation=45, ha='right')
ax1.set_ylabel('Final MSE Loss')
ax1.set_title('Final Loss by Architecture')
ax1.grid(True, alpha=0.3, axis='y')

# Add value labels
for bar, loss in zip(bars, final_losses):
    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
             f'{loss:.4f}', ha='center', va='bottom', fontsize=8)

# Scatter: params vs loss
ax2 = axes[1]
ax2.scatter(n_params, final_losses, s=100, c=colors, edgecolors='black', zorder=5)
for i, name in enumerate(names):
    ax2.annotate(name, (n_params[i], final_losses[i]), 
                 textcoords="offset points", xytext=(5, 5), fontsize=8)
ax2.set_xlabel('Number of Trainable Parameters')
ax2.set_ylabel('Final MSE Loss')
ax2.set_title('Loss vs Model Complexity')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 8. Transfer Function Comparison

In [None]:
# Extended input range
x_extended = torch.linspace(0.0, 0.30, 200)
X_extended = x_extended.unsqueeze(1).unsqueeze(2).expand(-1, SEQ_LEN, 1).clone()

plt.figure(figsize=(12, 7))

# True linear function
plt.plot(x_extended.numpy(), TRUE_ALPHA * x_extended.numpy() + TRUE_BETA, 
         'k-', lw=3, label=f'Target: y = {TRUE_ALPHA}x + {TRUE_BETA}', alpha=0.8)

# Each model's transfer function
for (name, res), color in zip(results.items(), colors):
    model = res['model']
    model.eval()
    with torch.no_grad():
        final_hist, _ = model(X_extended)
        y_ext = final_hist[:, -1, :].squeeze().numpy()
    plt.plot(x_extended.numpy(), y_ext, '--', color=color, lw=2, 
             label=f"{name} (MSE={res['mse']:.4f})")

# Mark training region
plt.axvspan(0.05, 0.20, alpha=0.15, color='green', label='Training region')

plt.xlabel('Input x', fontsize=12)
plt.ylabel('Output y', fontsize=12)
plt.title('Transfer Functions: Different SOEN Architectures', fontsize=14)
plt.legend(loc='upper left', fontsize=9)
plt.grid(True, alpha=0.3)
plt.show()

## 9. Summary Table

In [None]:
import pandas as pd

# Create summary table
summary_data = []
for name, res in results.items():
    # Compute R² score
    y_true = y_data.squeeze().numpy()
    y_pred = res['y_pred']
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - y_true.mean()) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    
    summary_data.append({
        'Architecture': name,
        'Hidden Dims': str(res['hidden_dims']),
        'Trainable Params': res['n_params'],
        'Final MSE': f"{res['mse']:.6f}",
        'R² Score': f"{r2:.4f}",
    })

df = pd.DataFrame(summary_data)
df = df.sort_values('Trainable Params')

print("=" * 80)
print("NETWORK COMPLEXITY STUDY: SUMMARY")
print("=" * 80)
print(f"\nTask: Learn y = {TRUE_ALPHA}·x + {TRUE_BETA}")
print(f"Training epochs: {N_EPOCHS}")
print(f"Learning rate: {LR}")
print()
print(df.to_string(index=False))
print("=" * 80)

## 10. Best Model Analysis

In [None]:
# Find best model
best_name = min(results, key=lambda x: results[x]['mse'])
best_res = results[best_name]
worst_name = max(results, key=lambda x: results[x]['mse'])
worst_res = results[worst_name]

print(f"Best architecture: {best_name}")
print(f"  MSE: {best_res['mse']:.6f}")
print(f"  Params: {best_res['n_params']}")

print(f"\nWorst architecture: {worst_name}")
print(f"  MSE: {worst_res['mse']:.6f}")
print(f"  Params: {worst_res['n_params']}")

improvement = (worst_res['mse'] - best_res['mse']) / worst_res['mse'] * 100
print(f"\nImprovement: {improvement:.1f}% reduction in MSE")

# Plot best vs worst
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

x_plot = x_values.numpy()
y_true = y_data.squeeze().numpy()

for ax, (name, res), title in zip(axes, 
                                   [(worst_name, worst_res), (best_name, best_res)],
                                   ['Worst (Simplest)', 'Best']):
    ax.scatter(x_plot, y_true, alpha=0.5, s=30, color='gray', label='Target')
    ax.plot(x_plot, TRUE_ALPHA * x_plot + TRUE_BETA, 'k--', lw=2)
    ax.scatter(x_plot, res['y_pred'], alpha=0.7, s=30, color='blue', label='Prediction')
    
    # Show residuals
    for i in range(0, len(x_plot), 10):
        ax.plot([x_plot[i], x_plot[i]], [y_true[i], res['y_pred'][i]], 
                'r-', alpha=0.3, lw=1)
    
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_title(f"{title}: {name}\nMSE={res['mse']:.6f}, Params={res['n_params']}")
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 11. Conclusions

In [None]:
print("=" * 70)
print("CONCLUSIONS")
print("=" * 70)

# Analyze trend
wide_models = ['1 neuron', '2 neurons', '3 neurons', '5 neurons', '10 neurons', '20 neurons']
wide_losses = [results[n]['mse'] for n in wide_models if n in results]

print("\n1. EFFECT OF WIDTH (more neurons in hidden layer):")
for name in wide_models:
    if name in results:
        print(f"   {name}: MSE = {results[name]['mse']:.6f}")

if len(wide_losses) > 1:
    if wide_losses[-1] < wide_losses[0]:
        print(f"   → More neurons HELPS (MSE reduced by {(wide_losses[0]-wide_losses[-1])/wide_losses[0]*100:.1f}%)")
    else:
        print(f"   → More neurons does NOT significantly help")

print("\n2. EFFECT OF DEPTH (multiple hidden layers):")
if '3→3 (deep)' in results and '3 neurons' in results:
    shallow = results['3 neurons']['mse']
    deep = results['3→3 (deep)']['mse']
    print(f"   3 neurons (1 layer): MSE = {shallow:.6f}")
    print(f"   3→3 (2 layers):      MSE = {deep:.6f}")
    if deep < shallow:
        print(f"   → Depth HELPS")
    else:
        print(f"   → Depth does NOT help significantly")

print("\n3. HARDWARE IMPLICATIONS:")
print(f"   Best architecture: {best_name}")
print(f"   Required neurons: {sum(best_res['hidden_dims'])} SingleDendrite(s)")
print(f"   Required connections: {best_res['n_params']} trainable weights")

print("\n4. KEY INSIGHT:")
print("   The SingleDendrite's nonlinear dynamics mean that even with many")
print("   neurons, perfect linear regression may not be achievable.")
print("   However, more neurons provide better APPROXIMATION of the linear function.")

print("\n" + "=" * 70)