# Introduction

This notebook is designed to extract statistical insights from both the datasets and the codomains of the trained KAN models. 

In all experiments, we introduced additive Gaussian noise with zero mean and a variance of 1% (equivalent to ±0.02 in the case of a sinusoidal signal).

The following code analyzes how many of the trained codomains stay within specified variance bounds. Specifically, we evaluate three thresholds:
- 1.0 × variance
- 1.5 × variance
- 2.0 × variance

Scroll down to view the results of this analysis.

**Note:** If you'd like to run this notebook with your own data, please make sure to execute the following scripts beforehand:
- `performance-boundary-validation.py`
- `performance-boundary-validation-cuda.py`
and move their output in the `results` folder.

Be aware that these scripts are computationally intensive and may take several hours to complete.


In [25]:
%matplotlib widget
import math
import random
import sys
from IPython.display import display
import re
import os
import numpy as np
import pandas as pd
import torch

mean = 0
variance = 0.02

In [26]:
def parse_table(text):
    lines = text.strip().split("\n")
    data = []
    
    for line in lines[2:]:  # Skip the header lines
        match = re.match(r'\|\s*(\d+)\s*\|\s*\(([^,]+), ([^\)]+)\)\s*\|\s*\(([^,]+), ([^\)]+)\)\s*\|', line)
        if match:
            iteration = int(match.group(1))
            data_codomain = (float(match.group(2)), float(match.group(3)))
            kan_codomain = (float(match.group(4)), float(match.group(5)))
            
            data.append({
                "Iteration": iteration,
                "Data Codomain": data_codomain,
                "KAN Codomain": kan_codomain
            })
    
    return data

def performance_statistics(parsed_data, key, mean, variance, real_codomain=(-1, 1)):
    codomains = []
    deviations = []
    for entry in parsed_data:
        x, y = entry[key]
        codomains.append((x, y))
        deviation_x = abs(x - real_codomain[0])
        deviation_y = abs(y - real_codomain[1])
        deviations.append((deviation_x, deviation_y))
    
    codomains = np.array(codomains)
    deviations = np.array(deviations)
    mean_codomain = np.mean(codomains, axis=0)
    var_codomain = np.var(codomains, axis=0)
    mean_deviation = np.mean(deviations, axis=0)
    max_deviation = np.max(deviations, axis=0)
    min_deviation = np.min(deviations, axis=0)

    below_variance_x = np.sum(codomains[:, 0] >= (real_codomain[0] - variance)) / len(codomains) * 100
    below_variance_y = np.sum(codomains[:, 1] <= (real_codomain[1] + variance)) / len(codomains) * 100
    
    stats = {
        "Real Codomain": real_codomain,
        "Mean Codomain": mean_codomain,
        "Var Codomain": var_codomain,
        "Mean Deviation": mean_deviation,
        "Max Deviation": max_deviation,
        "Min Deviation": min_deviation,
        "Bounding Variance": [real_codomain[0]*variance, real_codomain[1]*variance],
        "% Below Variance": [below_variance_x, below_variance_y]
    }

    df = pd.DataFrame(stats, index=["X_{0}", "X_{1}"])
    display(df)
    return df

directory = "./results"
for bounding_variance in [1.0*variance, 1.5*variance, 2.0*variance]:
    for file in os.listdir(directory):
        file_path = os.path.join(directory, file)
        print(f"Bound: {bounding_variance} - Analyzing: {file_path}")
        with open(file_path, "r") as f:
            text = f.read()
            parsed_data = parse_table(text)
            print("Data Statistics:")
            data_statistics = performance_statistics(parsed_data, "Data Codomain", mean, bounding_variance, (-1, 1))

            print("KAN Statistics:")
            kan_statistics = performance_statistics(parsed_data, "KAN Codomain", mean, bounding_variance, (-1, 1))


Bound: 0.02 - Analyzing: ./results/boundary_validation_cuda_iterations_1000_samples_10000.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.450587,0.002458,0.450587,0.710043,0.339633,-0.02,0.0
X_{1},1,1.447814,0.002391,0.447814,0.698521,0.34981,0.02,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-0.998635,2.2e-05,0.003893,0.018426,4e-06,-0.02,100.0
X_{1},1,0.99865,2.1e-05,0.003826,0.019161,8e-06,0.02,100.0


Bound: 0.02 - Analyzing: ./results/boundary_validation_iterations_10000_samples_1000.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.348449,0.003434,0.348449,0.784003,0.183972,-0.02,0.0
X_{1},1,1.347725,0.003381,0.347725,0.704363,0.193665,0.02,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-0.998956,0.000204,0.008848,0.175141,2.741814e-06,-0.02,97.21
X_{1},1,1.000489,0.000204,0.008845,0.198352,3.576279e-07,0.02,95.65


Bound: 0.02 - Analyzing: ./results/boundary_validation_iterations_1000_samples_1000.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.348982,0.00353,0.348982,0.561913,0.224597,-0.02,0.0
X_{1},1,1.346455,0.003132,0.346455,0.645956,0.215666,0.02,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.000154,0.000139,0.008957,0.074506,5e-06,-0.02,95.1
X_{1},1,0.999465,0.000147,0.009279,0.047238,1.9e-05,0.02,95.1


Bound: 0.02 - Analyzing: ./results/kan_iterations_output_var_1_test_1.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.34551,0.003077,0.34551,0.602351,0.206962,-0.02,0.0
X_{1},1,1.352241,0.003763,0.352241,0.599259,0.204819,0.02,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-0.994354,0.000512,0.012805,0.146107,1.9e-05,-0.02,98.2
X_{1},1,1.001755,0.000251,0.00993,0.14625,3e-05,0.02,92.4


Bound: 0.02 - Analyzing: ./results/kan_iterations_output_var_1_test_2.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.042115,8.3e-05,0.042115,0.079231,0.020793,-0.02,0.0
X_{1},1,1.042317,8.5e-05,0.042317,0.086808,0.018049,0.02,0.2


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.003377,0.000215,0.006807,0.135254,6e-06,-0.02,89.9
X_{1},1,0.992461,0.000428,0.009753,0.147233,4e-06,0.02,99.1


Bound: 0.03 - Analyzing: ./results/boundary_validation_cuda_iterations_1000_samples_10000.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.450587,0.002458,0.450587,0.710043,0.339633,-0.03,0.0
X_{1},1,1.447814,0.002391,0.447814,0.698521,0.34981,0.03,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-0.998635,2.2e-05,0.003893,0.018426,4e-06,-0.03,100.0
X_{1},1,0.99865,2.1e-05,0.003826,0.019161,8e-06,0.03,100.0


Bound: 0.03 - Analyzing: ./results/boundary_validation_iterations_10000_samples_1000.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.348449,0.003434,0.348449,0.784003,0.183972,-0.03,0.0
X_{1},1,1.347725,0.003381,0.347725,0.704363,0.193665,0.03,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-0.998956,0.000204,0.008848,0.175141,2.741814e-06,-0.03,99.01
X_{1},1,1.000489,0.000204,0.008845,0.198352,3.576279e-07,0.03,98.28


Bound: 0.03 - Analyzing: ./results/boundary_validation_iterations_1000_samples_1000.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.348982,0.00353,0.348982,0.561913,0.224597,-0.03,0.0
X_{1},1,1.346455,0.003132,0.346455,0.645956,0.215666,0.03,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.000154,0.000139,0.008957,0.074506,5e-06,-0.03,98.4
X_{1},1,0.999465,0.000147,0.009279,0.047238,1.9e-05,0.03,99.1


Bound: 0.03 - Analyzing: ./results/kan_iterations_output_var_1_test_1.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.34551,0.003077,0.34551,0.602351,0.206962,-0.03,0.0
X_{1},1,1.352241,0.003763,0.352241,0.599259,0.204819,0.03,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-0.994354,0.000512,0.012805,0.146107,1.9e-05,-0.03,99.8
X_{1},1,1.001755,0.000251,0.00993,0.14625,3e-05,0.03,95.5


Bound: 0.03 - Analyzing: ./results/kan_iterations_output_var_1_test_2.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.042115,8.3e-05,0.042115,0.079231,0.020793,-0.03,7.0
X_{1},1,1.042317,8.5e-05,0.042317,0.086808,0.018049,0.03,7.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.003377,0.000215,0.006807,0.135254,6e-06,-0.03,93.1
X_{1},1,0.992461,0.000428,0.009753,0.147233,4e-06,0.03,99.6


Bound: 0.04 - Analyzing: ./results/boundary_validation_cuda_iterations_1000_samples_10000.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.450587,0.002458,0.450587,0.710043,0.339633,-0.04,0.0
X_{1},1,1.447814,0.002391,0.447814,0.698521,0.34981,0.04,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-0.998635,2.2e-05,0.003893,0.018426,4e-06,-0.04,100.0
X_{1},1,0.99865,2.1e-05,0.003826,0.019161,8e-06,0.04,100.0


Bound: 0.04 - Analyzing: ./results/boundary_validation_iterations_10000_samples_1000.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.348449,0.003434,0.348449,0.784003,0.183972,-0.04,0.0
X_{1},1,1.347725,0.003381,0.347725,0.704363,0.193665,0.04,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-0.998956,0.000204,0.008848,0.175141,2.741814e-06,-0.04,99.35
X_{1},1,1.000489,0.000204,0.008845,0.198352,3.576279e-07,0.04,99.22


Bound: 0.04 - Analyzing: ./results/boundary_validation_iterations_1000_samples_1000.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.348982,0.00353,0.348982,0.561913,0.224597,-0.04,0.0
X_{1},1,1.346455,0.003132,0.346455,0.645956,0.215666,0.04,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.000154,0.000139,0.008957,0.074506,5e-06,-0.04,100.0
X_{1},1,0.999465,0.000147,0.009279,0.047238,1.9e-05,0.04,99.9


Bound: 0.04 - Analyzing: ./results/kan_iterations_output_var_1_test_1.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.34551,0.003077,0.34551,0.602351,0.206962,-0.04,0.0
X_{1},1,1.352241,0.003763,0.352241,0.599259,0.204819,0.04,0.0


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-0.994354,0.000512,0.012805,0.146107,1.9e-05,-0.04,99.9
X_{1},1,1.001755,0.000251,0.00993,0.14625,3e-05,0.04,99.2


Bound: 0.04 - Analyzing: ./results/kan_iterations_output_var_1_test_2.txt
Data Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.042115,8.3e-05,0.042115,0.079231,0.020793,-0.04,44.4
X_{1},1,1.042317,8.5e-05,0.042317,0.086808,0.018049,0.04,42.2


KAN Statistics:


Unnamed: 0,Real Codomain,Mean Codomain,Var Codomain,Mean Deviation,Max Deviation,Min Deviation,Bounding Variance,% Below Variance
X_{0},-1,-1.003377,0.000215,0.006807,0.135254,6e-06,-0.04,96.6
X_{1},1,0.992461,0.000428,0.009753,0.147233,4e-06,0.04,99.6


The following is just to test and verify that the noise mean and variance are the one selected above

In [27]:
random.seed(None)
random_seed = random.randint(-sys.maxsize, sys.maxsize)
torch.manual_seed(random_seed)
noise = torch.empty(1000).normal_(mean=mean,std=np.sqrt(variance)) # mean = 0, var = 1%

mean_codomain = torch.mean(noise)
var_codomain = torch.var(noise)

print("Mean Codomain:", mean_codomain)
print("Variance Codomain:", var_codomain)

Mean Codomain: tensor(0.0073)
Variance Codomain: tensor(0.0204)
