In [None]:
# -*- coding: utf-8 -*-
"""
Final Data Processing and Plotting Script for Cognitive Thermodynamics

This script serves as the final, definitive analysis tool for our research. It
separates the data analysis from the raw experiment generation, which is a key
scientific best practice.

It performs the following critical steps:
1.  **Loads Raw Data**: It loads the results JSON file from a path specified
    via a command-line argument, making the script portable.
2.  **Normalization**: It normalizes H_TSE' and H_SIE' to a comparable range.
3.  **Calculates Weighted Load**: It computes the "Total Cognitive Load" norm.
4.  **Generates Publication-Ready Plots**: It creates the main and supplementary
    plots for the paper, with polished legends and labels for maximum clarity.
"""
import json
import os
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import linregress
from sklearn.preprocessing import MinMaxScaler
import argparse

def load_results(file_path):
    """Loads results from the specified JSON file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Error: Results file not found at the specified path: {file_path}")
        return None
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from the file: {file_path}")
        return None


def process_data(results):
    """Normalizes data and calculates the weighted cognitive load."""
    if not results:
        return {}

    # DEBUG: Handle scalar and list-of-list data separately for robustness.
    # Process scalar values into numpy arrays
    scalar_keys = [k for k in results[0].keys() if k != 'energy_distribution']
    data = {key: np.array([r.get(key, 0) for r in results]) for key in scalar_keys}

    # Handle the list of lists 'energy_distribution' separately to preserve its structure
    if 'energy_distribution' in results[0]:
        data['energy_distribution'] = [r.get('energy_distribution', []) for r in results]

    # Initialize scalers
    htse_scaler = MinMaxScaler()
    hsie_scaler = MinMaxScaler()

    # Reshape for scaler and fit_transform
    data['htse_norm'] = htse_scaler.fit_transform(data['htse'].reshape(-1, 1)).flatten()
    data['hsie_norm'] = hsie_scaler.fit_transform(data['hsie'].reshape(-1, 1)).flatten()

    # Calculate the weighted norm (Total Cognitive Load)
    w1, w2 = 0.5, 0.5
    data['cognitive_load_norm'] = np.sqrt(
        (w1 * data['htse_norm'])**2 + (w2 * data['hsie_norm'])**2
    )

    return data

def plot_definitive_results(data, output_dir="."):
    """Generates the final 2x3 'six-act narrative' plot for the paper."""
    if not data:
        print("No data to plot for definitive results.")
        return

    gens = data['generation']

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, axes = plt.subplots(2, 3, figsize=(24, 13)) # Increased height slightly
    fig.suptitle('Definitive Experiment: The Thermodynamic Collapse of a Closed Cognitive System', fontsize=24, y=0.98)

    # Plot 1: Performance Degradation
    axes[0, 0].plot(gens, data['accuracy'], marker='o', c='r')
    axes[0, 0].set_title('1. Informational Collapse', fontsize=16)
    axes[0, 0].set_ylabel('Accuracy on Real Test Set (%)', fontsize=14)

    # Plot 2: System Temperature (ICE)
    axes[0, 1].plot(gens, data['inter_conceptual_entropy'], marker='x', c='darkred')
    axes[0, 1].set_title('2. Semantic Heat Death (ICE)', fontsize=16)
    axes[0, 1].set_ylabel('System Temperature (Entropy)', fontsize=14)

    # Plot 3: Organizational Collapse (SO)
    axes[0, 2].plot(gens, data['so'], marker='d', c='purple', alpha=0.6, label='Raw SO')
    so_smooth = sm.nonparametric.lowess(data['so'], gens, frac=0.4)[:, 1]
    axes[0, 2].plot(gens, so_smooth, c='indigo', lw=2.5, label='SO Trend (LOWESS)')
    axes[0, 2].set_title('3. Organizational Collapse (SO)', fontsize=16)
    axes[0, 2].set_ylabel('Avg. Specialization Similarity', fontsize=14)
    axes[0, 2].legend(loc='lower right', fontsize='small')

    # Plot 4: The Entropy Trade-off (Normalized)
    axes[1, 0].plot(gens, data['htse_norm'], marker='s', ls='--', c='b', alpha=0.7, label="H_TSE' (Normalized)")
    axes[1, 0].plot(gens, data['hsie_norm'], marker='^', ls=':', c='g', alpha=0.7, label="H_SIE' (Normalized)")
    axes[1, 0].set_title("4. The Entropy Trade-off (Normalized)", fontsize=16)
    axes[1, 0].set_ylabel("Normalized Entropy Value", fontsize=14)
    axes[1, 0].legend(loc='upper right', fontsize='small')

    # Plot 5: Statistical Proof of Collapse (for Normalized Total Cognitive Load)
    latter_half_idx = len(gens) // 2
    if latter_half_idx > 1:
        loads_smooth = sm.nonparametric.lowess(data['cognitive_load_norm'], gens, frac=0.4)[:, 1]
        slope, intercept, _, p_value, _ = linregress(gens[latter_half_idx:], loads_smooth[latter_half_idx:])

        axes[1, 1].scatter(gens, data['cognitive_load_norm'], c='purple', alpha=0.2, label='Raw Normalized Load')
        axes[1, 1].plot(gens, loads_smooth, c='indigo', lw=2.5, label='Smoothed Trend')
        axes[1, 1].plot(gens[latter_half_idx:], intercept + slope*gens[latter_half_idx:], 'r', lw=2, label='Linear Fit on Trend')
        axes[1, 1].set_title("5. Statistical Proof: The Second Law", fontsize=16)
        axes[1, 1].legend(loc='lower right', fontsize='small')
        result_text = f"Trend Analysis (Gens {latter_half_idx}-{len(gens)-1}):\nSlope = {slope:+.5f}\np-value = {p_value:.4f}"
        if p_value < 0.05:
            result_text += "\n(Statistically Significant)"
            axes[1, 1].set_facecolor('#e6ffe6')
        else:
            result_text += "\n(Not Significant)"
            axes[1, 1].set_facecolor('#ffe6e6')

        axes[1, 1].text(0.05, 0.95, result_text, transform=axes[1, 1].transAxes, fontsize=11,
                       verticalalignment='top', bbox=dict(boxstyle='round,pad=0.5', fc='wheat', alpha=0.7))

    # Plot 6: Cognitive Energy Distribution
    if 'energy_distribution' in data and len(data['energy_distribution']) > 0:
        # The data processing function now ensures this is a clean list of lists
        energy_data = [np.array(item) for item in data['energy_distribution'] if isinstance(item, list) and len(item) > 0]
        if len(energy_data) >= 2:
            energy_gen0, energy_gen_final = energy_data[0], energy_data[-1]
            if energy_gen0.size > 0 and energy_gen_final.size > 0:
                axes[1, 2].hist(energy_gen0, bins=20, alpha=0.7, label=f'Healthy State (Gen 0)', color='blue', density=True)
                axes[1, 2].hist(energy_gen_final, bins=20, alpha=0.7, label=f'Collapsed State (Gen {len(gens)-1})', color='red', density=True)
                axes[1, 2].set_yscale('log')
                axes[1, 2].legend(fontsize='small')
    axes[1, 2].set_title('6. Cognitive Energy Distribution', fontsize=16)
    axes[1, 2].set_xlabel("Cognitive Energy Level (E_cog)", fontsize=14)
    axes[1, 2].set_ylabel("Probability Density", fontsize=14)

    for ax in axes.flat:
        if ax != axes[1, 2]:
            ax.set_xlabel('Generation', fontsize=14)
        ax.grid(True, which="both", ls="--")
        ax.tick_params(axis='both', which='major', labelsize=12)
    axes[1, 1].set_ylabel("Total Cognitive Load (Normalized Norm)", fontsize=14)

    plt.subplots_adjust(left=0.05, right=0.98, top=0.9, bottom=0.08, hspace=0.3, wspace=0.25)

    plot_path = os.path.join(output_dir, "definitive_main_plot.png")
    plt.savefig(plot_path, dpi=300)
    print(f"\nMain narrative plot saved to: {plot_path}")

    plt.show()

def plot_supplementary_results(data, output_dir="."):
    """Generates the supplementary plot with detailed statistical analyses."""
    if not data or 'so' not in data:
        print("Skipping supplementary plots due to missing data ('so').")
        return

    gens = data['generation']
    htses = data['htse']
    hsies = data['hsie']
    sos = data['so']

    cces = data.get('cce', np.zeros_like(gens))
    if len(cces) != len(gens): cces = np.zeros_like(gens)

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Supplementary Material: Detailed Statistical Analysis of Key Metrics', fontsize=20, y=0.98)

    latter_half_idx = len(gens) // 2
    if latter_half_idx <= 1:
        print("Not enough data points for trend analysis.")
        return

    # Supp 1: H_TSE' Trend Analysis
    htse_smooth = sm.nonparametric.lowess(htses, gens, frac=0.4)[:, 1]
    slope_tse, _, _, p_tse, _ = linregress(gens[latter_half_idx:], htse_smooth[latter_half_idx:])
    axes[0, 0].scatter(gens, htses, alpha=0.3, label='Raw Data');
    axes[0, 0].plot(gens, htse_smooth, color='navy', lw=2.5, label='Smoothed Trend')
    axes[0, 0].set_title(f"H_TSE' Trend (p={p_tse:.3f})", fontsize=14);
    axes[0, 0].set_ylabel("Grounding Cost (H_TSE')", fontsize=12);
    axes[0, 0].legend(loc='best', fontsize='small')

    # Supp 2: H_SIE' Trend Analysis
    hsie_smooth = sm.nonparametric.lowess(hsies, gens, frac=0.4)[:, 1]
    slope_sie, _, _, p_sie, _ = linregress(gens[latter_half_idx:], hsie_smooth[latter_half_idx:])
    axes[0, 1].scatter(gens, hsies, alpha=0.3, color='g', label='Raw Data');
    axes[0, 1].plot(gens, hsie_smooth, color='darkgreen', lw=2.5, label='Smoothed Trend')
    axes[0, 1].set_title(f"H_SIE' Trend (p={p_sie:.3f})", fontsize=14);
    axes[0, 1].set_ylabel("Structural Complexity (H_SIE')", fontsize=12);
    axes[0, 1].legend(loc='best', fontsize='small')

    # Supp 3: SO Trend Analysis
    so_smooth = sm.nonparametric.lowess(sos, gens, frac=0.4)[:, 1]
    slope_so, _, _, p_so, _ = linregress(gens[latter_half_idx:], so_smooth[latter_half_idx:])
    axes[1, 0].scatter(gens, sos, alpha=0.3, color='purple', label='Raw Data');
    axes[1, 0].plot(gens, so_smooth, color='indigo', lw=2.5, label='Smoothed Trend')
    axes[1, 0].set_title(f"SO Trend (p={p_so:.3f})", fontsize=14);
    axes[1, 0].set_ylabel("Organizational Collapse (SO)", fontsize=12);
    axes[1, 0].legend(loc='best', fontsize='small')

    # Supp 4: CCE Trend Analysis
    if 'cce' in data and len(data['cce']) == len(gens) and np.any(cces):
        cce_smooth = sm.nonparametric.lowess(cces, gens, frac=0.4)[:, 1]
        slope_cce, _, _, p_cce, _ = linregress(gens[latter_half_idx:], cce_smooth[latter_half_idx:])
        axes[1, 1].scatter(gens, cces, alpha=0.3, color='teal', label='Raw Data');
        axes[1, 1].plot(gens, cce_smooth, color='darkcyan', lw=2.5, label='Smoothed Trend')
        axes[1, 1].set_title(f"CCE Trend (p={p_cce:.3f})", fontsize=14);
        axes[1, 1].set_ylabel("Avg. Neuron Specialization (CCE)", fontsize=12);
        axes[1, 1].legend(loc='best', fontsize='small')
    else:
        axes[1, 1].text(0.5, 0.5, "CCE data not available\nor all zeros.", ha='center', va='center')
        axes[1, 1].set_title("CCE Trend", fontsize=14)


    for ax in axes.flat:
        ax.set_xlabel('Generation', fontsize=12)
        ax.grid(True, which="both", ls="--")
        ax.tick_params(axis='both', which='major', labelsize=10)

    plt.subplots_adjust(left=0.07, right=0.97, top=0.9, bottom=0.08, hspace=0.3, wspace=0.3)

    plot_path_supp = os.path.join(output_dir, "supplementary_plots.png")
    plt.savefig(plot_path_supp, dpi=300)
    print(f"Supplementary plots saved to: {plot_path_supp}")
    plt.show()


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Process and plot the results of the Cognitive Thermodynamics experiment.')
    parser.add_argument('--json_path', type=str, required=True,
                        help='The full path to the results JSON file (e.g., "results_sgd_focused/sgd_focused_results.json").')
    args = parser.parse_args()

    raw_results = load_results(args.json_path)
    if raw_results:
        if isinstance(raw_results, dict):
            print("Multiple runs detected in JSON, analyzing the first one found.")
            for key in raw_results:
                if isinstance(raw_results[key], list) and len(raw_results[key]) > 0:
                    processed_data = process_data(raw_results[key])
                    break
        else:
            processed_data = process_data(raw_results)

        output_folder = "final_analysis"
        os.makedirs(output_folder, exist_ok=True)

        plot_definitive_results(processed_data, output_dir=output_folder)
        plot_supplementary_results(processed_data, output_dir=output_folder)