# Experiment 1: Varying Randman

## Generate randman

In [None]:
from src.RandmanFunctions import RandmanConfig, generate_and_save_randman

data_configs = [{'nb_classes': 2, 'nb_units': 3, 'alpha': 3, 'dim_manifold': 1},
                {'nb_classes': 5, 'nb_units': 3, 'alpha': 3, 'dim_manifold': 1},
                {'nb_classes': 2, 'nb_units': 10, 'alpha': 3, 'dim_manifold': 1},
                {'nb_classes': 2, 'nb_units': 3, 'alpha': 1, 'dim_manifold': 1},
                {'nb_classes': 2, 'nb_units': 3, 'alpha': 3, 'dim_manifold': 2}]

In [None]:
for data_config in data_configs:
    generate_and_save_randman(RandmanConfig(**data_config))

## Generate problem

### BBOB problem
What BBOB problems should be generated? 
* 24 functions
* dims: 2, 160, 640 
* 3 instances 

In [None]:
from src.LandscapeAnalysis import BBOBProblemConfig
def generate_bbob():
    # Generate all combinations of BBOB problems with the given configurations
    functions = list(range(1, 25))  # 24 functions
    dimensions = [2, 160, 640]      # 3 dimensions
    instances = [1, 2, 3]           # 3 instances

    for function in functions:
        for dim in dimensions:
            for instance in instances:
                problem_config = BBOBProblemConfig(function, instance, dim)
                problem_config.write_to_db(db_path='data/landscape-analysis.db')
generate_bbob()

### Generate samples and match with problems

How many samples should be used? Why not 30. 

In [None]:
import sqlite3
from math import ceil, log2
from src.LandscapeAnalysis import ParameterSampleConfig, assign_samples_to_problem, BBOBProblemConfig

def assign_samples_to_bbob():
    # Connect to the SQLite database
    db_path = "data/landscape-analysis.db"
    con = sqlite3.connect(db_path)
    cur = con.cursor()

    cur.execute("SELECT id, dim FROM bbob_problems")
    rows = cur.fetchall()
    con.close()

    for problem_id, dim in rows:
        nb_samples = 2**(ceil(log2(50 * dim)))  # sobol's sample size must be a power of 2
        max_nb_versions = 30
        lower_bound = -5 # bbob are defined on [-5, 5]
        upper_bound = 5
        sample_config = ParameterSampleConfig(dim, nb_samples, "sobol", lower_bound, upper_bound)
        problem_config = BBOBProblemConfig.lookup_by_id(problem_id)
        assign_samples_to_problem(problem_config, sample_config, max_nb_versions, 'data/samples', 'data/landscape-analysis.db')
assign_samples_to_bbob()
  

## Calculate loss

In [None]:
from src.LandscapeAnalysis import get_next_available_id, calculate_and_save_loss

while (id := get_next_available_id('loss_filename')) is not None:
    calculate_and_save_loss(id)

## Extract Features

In [None]:
from pflacco.classical_ela_features import *
from src.LandscapeAnalysis import get_next_available_id, calculate_and_save_features
while (id := get_next_available_id('ic_h_max')) is not None:
    calculate_and_save_features(id, calculate_information_content)

Column 'ic_h_max' added to 'loss_surfaces' table by get_next_available_id()


In [None]:
from pflacco.classical_ela_features import *
import numpy as np

def find_feature_name():
    from pflacco.sampling import create_initial_sample

    # Arbitrary objective function
    def objective_function(x):
        return np.random.rand()

    dim = 3
    # Create inital sample using latin hyper cube sampling
    X = create_initial_sample(dim, sample_type = 'lhs')
    # Calculate the objective values of the initial sample
    # using an arbitrary objective function
    y = X.apply(lambda x: objective_function(x), axis = 1)

    # Compute the 3 feature sets from the classical ELA features which are solely based on the initial sample
    ela_meta = calculate_ela_meta(X, y)
    print(ela_meta)
find_feature_name()

{'r20_ela_meta.lin_simple.adj_r2': -0.005409156310980379, 'r20_ela_meta.lin_simple.intercept': 0.4715494876893619, 'r20_ela_meta.lin_simple.coef.min': 0.0020344682591832736, 'r20_ela_meta.lin_simple.coef.max': 0.08213140207973536, 'r20_ela_meta.lin_simple.coef.max_by_min': 40.36995991901421, 'r20_ela_meta.lin_w_interact.adj_r2': -0.019009364039383136, 'r20_ela_meta.quad_simple.adj_r2': -0.0057795330142678125, 'r20_ela_meta.quad_simple.cond': 5.64899122378976, 'r20_ela_meta.quad_w_interact.adj_r2': 0.002901501956317998, 'r20_ela_meta.costs_runtime': 0.015}


# Experiment 2: Varying SNN

## Generate nn problems

The dataset would be the easiest where `nb_input`= 10, `nb_classes`=2, `alpha`=3. The `randman_id`=16.

In [None]:
from src.RandmanFunctions import RandmanConfig
from src.Models import RandmanSNNConfig
from src.LandscapeAnalysis import generate_randman_problem

def add_nn_problem_configs():
    # The easiest randman
    randman_config = RandmanConfig.lookup_by_id(16) 

    ## Now the SNN configurations
    # constants in this experiment
    BETA = 0.95
    LEARN_BETA = True
    PARAMETER_TYPE = 'weights'
    
    # variables in this experiment
    nb_hidden_1_list = [100, 100, 30, 100]
    recurrent_list = [False, True, False, False]
    nb_hidden_2_list = [-1, -1, -1, 10]
    for i in range(len(nb_hidden_1_list)):
        snn_config = RandmanSNNConfig(
            nb_hidden_1=nb_hidden_1_list[i],
            nb_hidden_2=nb_hidden_2_list[i],
            beta=BETA,
            learn_beta=LEARN_BETA,
            recurrent=recurrent_list[i],
            parameter_type=PARAMETER_TYPE
        )
        generate_randman_problem(randman_config, snn_config, loss_fn= 'cross_entropy', db_path='data/landscape-analysis.db')

add_nn_problem_configs()

## Generate samples and assign to problems

Let's me try to stick with 8192 samples for this experiment becuase the highest dim is 11202.

In [None]:
from src.LandscapeAnalysis import ParameterSampleConfig, NNProblemConfig, assign_samples_to_problem
def generate_and_assign():
    NB_SAMPLES = 8192
    LOWER_BOUND = -2
    UPPER_BOUND = 2
    
    NB_VERSIONS = 30
    for problem_id in (5, 6, 7, 8):
        problem_config = NNProblemConfig.lookup_by_id(problem_id)
        sample_config = ParameterSampleConfig(
            dim = problem_config.dim,
            nb_sample = NB_SAMPLES,
            method = 'sobol',
            lower_bound = LOWER_BOUND,
            upper_bound = UPPER_BOUND
        )
        assign_samples_to_problem(problem_config, sample_config, NB_VERSIONS, 'data/new_samples', 'data/landscape-analysis.db')
generate_and_assign()

### A problem to think about

Let's assume that the ELA features can relieably distinguish different problems, good. But what if the reason is, as we saw from the bbob problems, solely due to the dimensionality. What is the consequences of that, then? First of all, 

In [1]:
from src.LandscapeAnalysis import calculate_and_save_loss
calculate_and_save_loss(6632)

filepath data/randman\aa13286ca3664a0cbb1ae916d917ba93.pt
calculating loss for loss_surface_id 6632 with 8192 samples
Accuracy: 49.5%, Avg loss: 12.145149 

Accuracy: 50.5%, Avg loss: 3.699378 

Accuracy: 48.8%, Avg loss: 2.253944 

Accuracy: 49.3%, Avg loss: 1.372035 

Accuracy: 49.5%, Avg loss: 4.821850 

Accuracy: 50.5%, Avg loss: 6.054319 

Accuracy: 45.1%, Avg loss: 2.965990 

Accuracy: 40.8%, Avg loss: 4.377857 

Accuracy: 49.5%, Avg loss: 3.386114 

Accuracy: 51.6%, Avg loss: 2.526793 

Accuracy: 50.1%, Avg loss: 2.664882 

Accuracy: 58.9%, Avg loss: 5.772611 

Accuracy: 50.2%, Avg loss: 11.803179 

Accuracy: 53.0%, Avg loss: 2.352435 

Accuracy: 55.7%, Avg loss: 2.269084 



KeyboardInterrupt: 