 # Universality Verification Experiment for Scaling Law (N-Scaling)


In [None]:
# Download the dataset on the first run
from torchvision import datasets, transforms
try:
    datasets.FashionMNIST('./data', train=True, download=True, transform=transforms.ToTensor())
    datasets.FashionMNIST('./data', train=False, download=True, transform=transforms.ToTensor())
    print("FashionMNIST dataset is ready.")
except Exception as e:
    print(f"Could not download dataset. Error: {e}")

In [None]:
import sys
import os

# Add the experiment's directory to the Python path
workspace_path = "/N-Scaling" # Please adjust this path according to your setup
if workspace_path not in sys.path:
    sys.path.append(workspace_path)
    print(f"Added path: {workspace_path}")
else:
    print(f"Path {workspace_path} is already in the Python path")

# Confirm the existence of the logic module file
file_path = os.path.join(workspace_path, "MLP_N_logic.py") # Find and replace with the logic module for the desired model
if os.path.exists(file_path):
    print(f"✓ File exists: {file_path}")
else:
    print(f"✗ File not found: {file_path}")

# Attempt to import the module
try:
    import MLP_N_logic # Replace with the logic module for the desired model
    print("✓ Module imported successfully!")
    
    # Check for the presence of the necessary function
    if hasattr(MLP_N_logic, 'run_training_task_N_scaling'):
        print("✓ Found function: run_training_task_N_scaling")
        run_training_task_N_scaling = MLP_N_logic.run_training_task_N_scaling
    else:
        print("✗ Function not found: run_training_task_N_scaling")
        
except ImportError as e:
    print(f"✗ Import failed: {e}")

In [None]:
import numpy as np

# --- 1. N-Scaling Experiment Configuration ---
N_SCALING_CONFIG = {
    "dataset_size": 10000,
    
    # Range and number of target parameter counts N
    "target_N_range": (800, 35000),
    "num_models": 25,
    
    "fixed_epochs": 80,
    "seed": 42,
}

# --- 2. Base Experiment Parameters ---
BASE_CONFIG = {
    "batch_size": 128,
    "learning_rate": 0.001,
    "analysis_sample_size": 30, 
    "w1": 0.5, 
    "w2": 0.5,
}
BASE_CONFIG['epochs'] = N_SCALING_CONFIG['fixed_epochs']
BASE_CONFIG['dataset_size'] = N_SCALING_CONFIG['dataset_size']

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.stats import linregress
import torch
import os

from MLP_N_logic import run_training_task_N_scaling, TheoryAnalyzer # Replace with the logic module for the desired model

def generate_hidden_configs(target_range, num_points):
    """Intelligently generate unique (h1, h2) architecture configurations based on the target N range."""
    target_n_values = np.logspace(np.log10(target_range[0]), np.log10(target_range[1]), num_points)
    hidden_size_configs = []
    seen_configs = set()
    for target_n in target_n_values:
        # This is a simplified inverse function to find h1, h2 for a target N
        a, b, c = 0.5, 789, -target_n
        h1 = (-b + np.sqrt(b**2 - 4*a*c)) / (2*a)
        h1 = max(1, int(round(h1)))
        h2 = max(1, int(round(h1 / 2)))
        config_tuple = (h1, h2)
        if config_tuple not in seen_configs:
            hidden_size_configs.append(config_tuple)
            seen_configs.add(config_tuple)
    return hidden_size_configs

if __name__ == '__main__':
    seed = N_SCALING_CONFIG['seed']
    hidden_configs = generate_hidden_configs(N_SCALING_CONFIG['target_N_range'], N_SCALING_CONFIG['num_models'])
    
    print(f"Generated {len(hidden_configs)} unique model configurations to test.")
    
    tasks = [(seed, h_config, BASE_CONFIG, 0 if torch.cuda.is_available() else -1) for h_config in hidden_configs]
    
    results = []
    for task_args in tqdm(tasks, desc=f"Running Direct N-Scaling for Seed {seed}"):
        result = run_training_task_N_scaling(task_args)
        if result: results.append(result)
    
    if not results:
        print("No results generated.")
    else:
        df = pd.DataFrame(results).sort_values('num_params_N')
        df = df.drop_duplicates(subset=['num_params_N']).reset_index(drop=True)
        
        # Define the "valid regime" as the data points up to the minimum test loss
        min_loss_idx = df['final_test_loss'].idxmin()
        df_valid = df.loc[:min_loss_idx].copy()
        
        plt.style.use('seaborn-v0_8-whitegrid')
        fig, axes = plt.subplots(1, 5, figsize=(40, 8))
        fig.suptitle(f'Cognitive Investment Model - N-Scaling (D={N_SCALING_CONFIG["dataset_size"]}) (Seed={seed}, Epochs={BASE_CONFIG["epochs"]})', fontsize=24, y=0.98)
        
        # --- Fitting Functions ---
        def power_law_fit(x, y):
            mask = (y > 0) & (x > 0) & np.isfinite(y) & np.isfinite(x)
            if mask.sum() < 2: return 0, 1, 0, np.full_like(x, np.nan, dtype=float)
            lx, ly = np.log10(x[mask]), np.log10(y[mask])
            s, i, r, p, _ = linregress(lx, ly); r2 = r**2
            return r2, p, s, 10**(s*np.log10(x)+i)
        
        def logarithmic_fit(x, y):
            mask = (x > 0) & np.isfinite(y) & np.isfinite(x)
            if mask.sum() < 2: return 0, 1, 0, 0, np.full_like(x, np.nan, dtype=float)
            log_x = np.log10(x[mask])
            y_masked = y[mask]
            slope, intercept, r_value, p_value, _ = linregress(log_x, y_masked)
            r_squared = r_value**2
            y_fit = slope * np.log10(x) + intercept
            return r_squared, p_value, slope, intercept, y_fit

        # --- Data Preparation for Reducible Metrics ---
        L_inf = df_valid['final_test_loss'].min()
        df_valid['reducible_loss'] = df_valid['final_test_loss'] - L_inf
        hsie_0 = df_valid['final_hsie'].min() # For N-Scaling, H'sie grows from a baseline H_0
        df_valid['reducible_hsie'] = df_valid['final_hsie'] - hsie_0
        htse_0 = df_valid['final_htse'].min() # H'tse also has a baseline H_0
        df_valid['reducible_htse'] = df_valid['final_htse'] - htse_0

        x_N_valid = df_valid['num_params_N'].values
        
        # --- Plot 1: Performance vs. N ---
        ax0 = axes[0]
        r2_1, p_1, s_1, fit_1 = power_law_fit(x_N_valid, df_valid['reducible_loss'])
        ax0.set_title('Law 1: Performance vs. N')
        ax0.plot(df['num_params_N'], df['final_test_loss'], 'o', color='blue', label='All Data') # Show all points
        ax0.plot(df_valid['num_params_N'], df_valid['final_test_loss'], 'o', color='dodgerblue', label='Valid Regime') # Highlight valid regime
        ax0.plot(x_N_valid, fit_1 + L_inf, '--', color='red', label='Fit on Valid Regime')
        ax0.text(0.95, 0.95, f'$R^2={r2_1:.2f}, p={p_1:.1e}$\n$L-L_\infty \propto N^{{{s_1:.2f}}}$', ha='right', va='top', transform=ax0.transAxes, bbox=dict(fc='wheat', alpha=0.5))
        ax0.set_ylabel('Final Test Loss', fontsize=12)
        ax0.legend()

        # --- Plot 2: Abstraction ---
        r2_2, p_2, s_2, fit_2 = power_law_fit(x_N_valid, df_valid['reducible_htse'])
        axes[1].set_title('Component 1: Abstraction')
        axes[1].plot(x_N_valid, df_valid['final_htse'], 's', color='green')
        axes[1].plot(x_N_valid, fit_2 + htse_0, '--', color='purple')
        axes[1].text(0.95, 0.05, f'$R^2={r2_2:.2f}, p={p_2:.1e}$\n$H-H_0 \propto N^{{{s_2:.2f}}}$', ha='right', va='bottom', transform=axes[1].transAxes, bbox=dict(fc='wheat', alpha=0.5))
        axes[1].set_ylabel("Final H'_TSE", fontsize=12)
        
        # --- Plot 3: Compression (LOGARITHMIC FIT) ---
        r2_3, p_3, a_3, b_3, fit_3 = logarithmic_fit(x_N_valid, df_valid['reducible_hsie'])
        axes[2].set_title('Component 2: Compression')
        axes[2].plot(x_N_valid, df_valid['final_hsie'], '^', color='orange')
        axes[2].plot(x_N_valid, fit_3 + hsie_0, '--', color='darkcyan')
        text_3 = (f'$R^2={r2_3:.3f}, p={p_3:.1e}$\n' f'$H-H_0 \propto \log(N)$')
        axes[2].text(0.95, 0.95, text_3, ha='right', va='top', transform=axes[2].transAxes, bbox=dict(fc='wheat', alpha=0.5))
        axes[2].set_ylabel("Final H'_SIE", fontsize=12)
        
        # --- Plot 4: Internal Cost Trend ---
        htse_red_sq = np.maximum(0, df_valid['reducible_htse'])**2
        hsie_red_sq = np.maximum(0, df_valid['reducible_hsie'])**2
        df_valid['L_ideal_reducible'] = np.sqrt(BASE_CONFIG['w1'] * htse_red_sq + BASE_CONFIG['w2'] * hsie_red_sq)
        axes[3].plot(x_N_valid, df_valid['L_ideal_reducible'], 'p', color='purple')
        axes[3].set_title('Internal Cost $\mathcal{L}_{ideal, red}$ Trend')
        axes[3].set_ylabel('Reducible Ideal Norm', fontsize=12)

        # --- Plot 5: The Core Law ---
        r2_4, p_4, s_4, fit_4 = power_law_fit(df_valid['L_ideal_reducible'], df_valid['reducible_loss'])
        axes[4].set_title('The Core Law: Performance vs. Cost')
        axes[4].plot(df_valid['L_ideal_reducible'], df_valid['reducible_loss'], 'd', color='black')
        axes[4].plot(df_valid['L_ideal_reducible'], fit_4, '--', color='magenta')
        axes[4].text(0.95, 0.95, f'Core Law:\n$R^2={r2_4:.4f}, p={p_4:.1e}$\n$L_{{red}} \propto \mathcal{{L}}_{{ideal, red}}^{{{s_4:.2f}}}$', ha='right', va='top', transform=axes[4].transAxes, bbox=dict(fc='wheat', alpha=0.5))
        axes[4].set_ylabel('Reducible Test Loss', fontsize=12)

        # --- Final Formatting ---
        for i in range(5):
            if i < 4:
                axes[i].set_xlabel('Number of Parameters (N)')
            else:
                axes[i].set_xlabel('Reducible Ideal Norm $\mathcal{L}_{ideal, red}$')
            axes[i].set_xscale('log')
            # For N-Scaling, the log-growth of H'sie is best visualized on a linear y-axis
            if i == 2:
                axes[i].set_yscale('linear')
            else:
                axes[i].set_yscale('log')
            axes[i].grid(True, which='both', linestyle='--')
        
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        
        # --- Save results ---
        output_img_path = f"N_scaling_D{N_SCALING_CONFIG['dataset_size']}_seed_{seed}_epochs_{BASE_CONFIG['epochs']}.png"
        output_csv_path = f"N_scaling_D{N_SCALING_CONFIG['dataset_size']}_results_seed_{seed}_epochs_{BASE_CONFIG['epochs']}.csv"
        
        plt.savefig(output_img_path, dpi=150)
        df.to_csv(output_csv_path, index=False)
        
        print(f"Plot saved to: {output_img_path}")
        print(f"Full experiment data saved to: {output_csv_path}")
        
        plt.show()
