In [None]:
using JLD2
using Plots
using LinearAlgebra
using Statistics
using LaTeXStrings

data = load(joinpath(@__DIR__, "high_prec_data.jld2"))
println("Data loaded. Available keys: ", length(keys(data)))    

In [None]:
"""
Finds the index `chi_eff` where the Schmidt coefficients drop below the simulation noise floor

- `spectrum`: Vector of Schmidt coefficients (singular values).
- `noise_floor`: The threshold below which we consider data to be DMRG noise. 
   For Double Precision, 1e-14 is a safe bound above Machine Epsilon.

Returns: The effective bond dimension index.
"""

function identify_cutoff(spectrum::Vector{Float64}; noise_floor=1e-14)
    # Sort just in case, though SVD usually returns sorted
    s_sorted = sort(spectrum, rev=true)
    
    # Find the last index where value > noise_floor
    cutoff_idx = findlast(x -> x > noise_floor, s_sorted)
    
    if isnothing(cutoff_idx)
        return length(s_sorted) # Keep all if none are below noise
    end
    
    return cutoff_idx
end


In [None]:
# Container for analysis results
analysis_results = []

# Parameters (ensure these match the generation script)
Ns = [10, 14, 18, 22, 26] 
sigmas = [0.1, 0.3]

for N in Ns
    for σ in sigmas
        key = "N=$N/sigma=$σ"
        if haskey(data, key)
            spectrum = data[key]
            
            # Step A: Find the Cutoff (Physics vs Noise)
            chi_eff = identify_cutoff(spectrum, noise_floor=1e-12) 
            
            # Step B: Calculate Metrics
            metrics = calculate_tail_metrics(spectrum, chi_eff)
            
            # Step C: Observable Bound (Example: Energy per site, norm approx 1)
            obs_err = observable_error_bound(1.0, metrics)
            
            push!(analysis_results, Dict(
                "N" => N,
                "sigma" => σ,
                "chi_eff" => chi_eff,
                "trunc_err" => metrics["truncation_err"],
                "fidelity" => metrics["fidelity"],
                "obs_error_bound" => obs_err
            ))
        end
    end
end

# Display a sample result
println("Analysis Complete. Example result for largest N:")
display(filter(x -> x["N"] == Ns[end], analysis_results))

In [None]:

"""
Calculates the truncation error and fidelity if the tail were cut off.

Returns a Dict containing:
- `truncation_err`: Sum of squared coefficients of the tail (1 - \sum \lambda_{kept}^2).
- `fidelity`: The retained weight (1 - \text{truncation\_err}).
- `bound_factor`: The factor \sqrt{1-F} used for observable bounds.
"""
function calculate_tail_metrics(spectrum::Vector{Float64}, cutoff_idx::Int)
    s_sorted = sort(spectrum, rev=true)
    
    # Ensure cutoff is within bounds
    valid_cut = min(cutoff_idx, length(s_sorted))
    
    # The tail is everything AFTER the cutoff
    tail = s_sorted[valid_cut+1:end]
    
    # Truncation Error: Sum of squares of the tail
    trunc_err = sum(tail .^ 2)
    
    # find the overlap squared of the truncated state with original - fidelity
    # F = |<psi_all|psi_trunc>|^2 = Sum(lambda_kept^2)
    # This is equivalent to 1 - trunc_err 
    fidelity = 1.0 - trunc_err
    
    return Dict(
        "truncation_err" => trunc_err,
        "fidelity" => fidelity,
        "bound_factor" => sqrt(trunc_err) # sqrt(1-F)
    )
end


In [None]:

"""
Calculates the strict upper bound on the error of an observable expectation value.
Formula: $|\langle\hat{O}\rangle_{\text{all}} - \langle\hat{O}\rangle_{\text{trunc}}| \leq 2||\hat{O}||\sqrt{ 1-F }$
"""
function observable_error_bound(obs_norm::Float64, metrics::Dict)
    # bound_factor is sqrt(1-F) calculated in metrics
    return 2 * obs_norm * metrics["bound_factor"]
end

In [None]:
"""
Plots the scaling of effective bond dimension and tail weight with N
fits predictive equations for χ_eff(N) for each σ
"""

function plot_scaling_and_predict(results)
    
    p1 = plot(title="Effective Bond Dim (χ_eff) vs N", 
              xlabel="N", ylabel="χ_eff (Log Scale)", yscale=:log10, legend=:topleft)
    
    p2 = plot(title="Tail Weight (Truncation Error) vs N", 
              xlabel="N", ylabel="Error (Log Scale)", yscale=:log10)

    unique_sigmas = unique(map(x -> x["sigma"], results))
    predictors = Dict()

    for σ in unique_sigmas
        subset = filter(x -> x["sigma"] == σ, results)
        sort!(subset, by = x -> x["N"])
        
        Ns = Float64[d["N"] for d in subset]
        chis = Float64[d["chi_eff"] for d in subset]
        errs = [d["trunc_err"] for d in subset]
        
        # Plotting
        plot!(p1, Ns, chis, label="σ=$σ", marker=:circle, lw=2)
        plot!(p2, Ns, errs, label="σ=$σ", marker=:square, lw=2)
        
        # Fitting log(chi) = m*N + c
        # This assumes Volume Law growth (Exponential Bond Dim)
        log_chis = log.(chis)
        A = hcat(Ns, ones(length(Ns)))
        coeffs = A \ log_chis
        
        m, c = coeffs[1], coeffs[2]
        
        # Store a lambda function for prediction
        predictors[σ] = (n_val) -> exp(m * n_val + c)
        
        println("Predictor for σ=$σ: χ_eff(N) ≈ exp($(round(m, digits=3)) * N + $(round(c, digits=3)))")
    end
    
    display(plot(p1, p2, layout=(1,2), size=(900, 400)))
    return predictors
end

# Run Plotting and Get Predictors
predictors = plot_scaling_and_predict(analysis_results)

# Predict cutoff for N
pred_chi_50 = predictors[0.1](50)
println("\nPredicted Cutoff for N=50 (σ=0.1): ", round(Int, pred_chi_50))

In [None]:
"""
Calculates the strict upper bound on the error for Total Magnetization and Energy if the tail (after cutoff_idx) is discarded

using |<O> - <O>_trunc| <= 2 * ||O|| * sqrt(1 - Fidelity)
"""
function calculate_observable_bounds(spectrum::Vector{Float64}, cutoff_idx::Int, N::Int; J=1.0, Delta=1.0)
    # Calculate the Discarded Weight (1 - Fidelity)
    # This is the sum of squares of the singular values in the tail
    full_norm = sum(spectrum.^2)
    
    # Sort descending
    s_sorted = sort(spectrum, rev=true)
    
    # Identify tail
    if cutoff_idx >= length(s_sorted)
        discarded_weight = 0.0
    else
        tail = s_sorted[cutoff_idx+1:end]
        discarded_weight = sum(tail.^2)
    end
    
    # The error bound factor is sqrt(1 - F) = sqrt(epsilon)
    # If spectrum was normalised, discarded_weight is exactly epsilon.
    # We normalize just in case.
    epsilon = discarded_weight / full_norm
    bound_factor = sqrt(epsilon)

    #Operator Norms ||O||
    
    # Magnetization: M = Sum(Sz_i). 
    # Max eigenvalue of Sz is 0.5. Sum is 0.5 * N.
    norm_M = N / 2.0
    
    # Hamiltonian (XXZ): H = Sum J(SxSx + SySy + Delta*SzSz)
    # Term bounds: ||SzSz|| = 0.25, ||SxSx|| = 0.25
    # Max energy per bond approx |J|*(0.5 + 0.25*|Delta|) roughly, 
    # but a safe upper bound is N * |J| * (1 + |Delta|) for spin 1/2 systems.
    # A looser but safe bound for fully connected (worst case) vs chain:
    # Assuming the specific Fully Connected Weighted logic from your files:
    # The user file has fully connected structure. Max eigenvalue scales with N^2 if fully connected, 
    # or N if nearest neighbor.
    # CAUTION: If fully connected, Norm scales as N^2.
    # Based on your file `mach_prec.jl`, it loops i=1:N, j=i+1:N. That is N^2 terms.
    num_bonds = N*(N-1)/2
    norm_H = num_bonds * abs(J) * max(1.0, abs(Delta)) # Safe upper bound

    # 3. Calculate Errors
    err_M = 2 * norm_M * bound_factor
    err_H = 2 * norm_H * bound_factor

    return Dict(
        "N" => N,
        "chi_kept" => cutoff_idx,
        "discarded_weight" => discarded_weight,
        "bound_factor_sqrt_eps" => bound_factor,
        "max_error_Magnetization" => err_M,
        "max_error_Energy" => err_H
    )
end

In [None]:


function analyze_physics_vs_noise(data, N_target, sigma_target)
    key_noisy = "N=$N_target/sigma=$sigma_target"
    key_clean = "N=$N_target/sigma=0.0" # Compare against clean system
    
    if !haskey(data, key_noisy) 
        println("Data for $key_noisy not found.")
        return
    end

    s_noisy = sort(data[key_noisy], rev=true)
    
    # If clean data exists, use it, otherwise just analyze noisy
    has_clean = haskey(data, key_clean)
    s_clean = has_clean ? sort(data[key_clean], rev=true) : []

    # We look at the tail between 1e-6 and 1e-15
    # If it's noise, it should be flat or random.
    # If it's physics (Anderson/Disorder), it usually follows log-linear decay.
    
    p1 = plot(title="Schmidt Spectrum Tail (N=$N_target)", 
              xlabel="Index", ylabel="Log(λ²)", legend=:topright)
    
    plot!(p1, log10.(s_noisy.^2), label="Noisy (σ=$sigma_target)", lw=2, color=:red)
    if has_clean
        plot!(p1, log10.(s_clean.^2), label="Clean (σ=0)", lw=2, color=:blue, linestyle=:dash)
    end

    # Add Machine Precision Floor marker
    hline!(p1, [log10(1e-16)], label="Machine Epsilon", color=:black, linestyle=:dot)

    # We calculate Cumulative Entropy and Cumulative Fidelity as function of Bond Dim
    function get_cumulatives(s_in)
        # Normalise
        probs = s_in.^2
        probs = probs ./ sum(probs)
        
        # Cumulative Sum (Fidelity)
        cum_fidelity = cumsum(probs)
        
        # Cumulative Entropy
        # S_current = - Sum(p log p) up to index i
        # We compute this iteratively
        cum_entropy = zeros(Float64, length(probs))
        current_ent = 0.0
        for i in 1:length(probs)
            p = probs[i]
            if p > 0
                current_ent -= p * log(p)
            end
            cum_entropy[i] = current_ent
        end
        # Normalize entropy to 1.0 for comparison
        cum_entropy ./= cum_entropy[end]
        return cum_fidelity, cum_entropy
    end

    fid_noisy, ent_noisy = get_cumulatives(s_noisy)
    
    # Determine Cutoff Point for High Fidelity (e.g., 1 - 1e-10)
    target_fid = 1.0 - 1e-10
    idx_fid = findfirst(x -> x >= target_fid, fid_noisy)
    
    # Determine Cutoff Point for High Entropy (e.g., 99.9% of total entropy)
    target_ent = 0.999
    idx_ent = findfirst(x -> x >= target_ent, ent_noisy)

    p2 = plot(title="Why Bond Dim Spikes", 
              xlabel="Bond Dimension χ", ylabel="Normalized Accumulation", legend=:bottomright)
    
    plot!(p2, ent_noisy, label="Entropy Accumulation", lw=2, color=:green)
    plot!(p2, fid_noisy, label="Fidelity Accumulation", lw=2, color=:purple)
    
    # Mark the divergence
    vline!(p2, [idx_ent], label="99.9% Entropy (χ=$idx_ent)", color=:green, linestyle=:dash)
    vline!(p2, [idx_fid], label="1-1e-10 Fidelity (χ=$idx_fid)", color=:purple, linestyle=:dash)
    
    display(plot(p1, p2, layout=(1,2), size=(1000, 400)))
    
    println("--- Analysis Results for N=$N_target, σ=$sigma_target ---")
    println("Bond Dimension needed for 99.9% Entropy: $idx_ent")
    println("Bond Dimension needed for 1e-10 Error:   $idx_fid")
    println("Ratio (Spike Factor): $(round(idx_fid/idx_ent, digits=2))x")
    println("Explanation: You need $idx_fid states to simulate the physics correctly,")
    println("but only $idx_ent states to get the entropy right.")
end

analyze_physics_vs_noise(data, 22, 0.1)