In [1]:
!pip install pflacco



In [2]:
from utils import *
import os.path
from scipy.stats import qmc
from tqdm import tqdm
import polars as pl

In [3]:
save_dir = 'lio_problem'
create_directory_if_not_exist(save_dir)

In [4]:
sample_size = 250 #times dimention
problem_dim = 5

In [5]:
from pflacco.classical_ela_features import *

In [6]:
sample_file_norm = f'{save_dir}/samples__dim_{problem_dim}__samplesize_{sample_size}.norm.parquet'

In [7]:
df = pl.scan_parquet(sample_file_norm).collect()

In [8]:
df

problem,instance,y,optimum,x_0,x_1,x_2,x_3,x_4
i64,i64,f64,f64,f64,f64,f64,f64,f64
1,1,0.577262,79.48,-3.149596,1.601646,4.805492,-2.859105,2.913973
1,1,0.318008,79.48,0.78446,1.718528,3.697858,2.556515,2.751843
1,1,0.169762,79.48,-0.651181,-0.647084,-1.350508,-3.267599,-4.495132
1,1,0.458729,79.48,-3.057518,-2.772905,4.3904,-4.642849,-2.411301
1,1,0.116464,79.48,1.708115,1.237669,-0.079004,2.538148,1.013913
…,…,…,…,…,…,…,…,…
24,15,0.705769,149.81,3.550095,-2.349579,0.610341,-4.475076,-4.671802
24,15,0.460143,149.81,-2.14731,-4.21336,-0.549176,-1.346791,-0.135708
24,15,0.485692,149.81,-4.200217,2.243036,-3.145434,-0.55592,-1.302764
24,15,0.396352,149.81,-3.610818,-0.501484,-2.538735,4.095798,0.147821


In [9]:
ela_save_dir = f'{save_dir}/ela'
create_directory_if_not_exist(ela_save_dir)

In [10]:
for r in tqdm(df.select(['problem', 'instance']).unique().sort(['instance']).rows(named=True)):
    instance = r['instance']
    problem = r['problem']
    save_file = f'{ela_save_dir}/p_{problem}__i_{instance}.parquet'
    
    if os.path.exists(save_file):
        continue
    
    sdf = df.filter(pl.col('problem')==problem).filter(pl.col('instance')==instance)
    X = sdf.select(['x_0', 'x_1', 'x_2', 'x_3', 'x_4']).to_numpy()
    y = sdf['y'].to_numpy()
    
    methods = [
        calculate_cm_angle, 
        calculate_cm_conv, 
        calculate_cm_grad, 
        calculate_dispersion, 
        calculate_ela_conv, 
        calculate_ela_curvate, 
        calculate_ela_distribution, 
        calculate_ela_level, 
        calculate_ela_local, 
        calculate_ela_meta, 
        calculate_information_content, 
        calculate_limo, 
        calculate_nbc, 
        calculate_pca, 
    ]

    features = {}
    for method in methods:
        try:
            fe = method(X, y)
            features.update(fe)
        except:
            #print('try with bounds', method)
            try:
                fe = method(X, y, lower_bound=-5, upper_bound=5)
                features.update(fe)
            except:
                pass

    features['problem'] = problem
    features['instance'] = instance
    
    l = features
    features = pl.DataFrame([features])
    features = features.with_columns([
        pl.col('*').cast(pl.Float64)
    ]).with_columns([
        pl.col('problem').cast(pl.Int64),  # Keep 'problem' as Int64
        pl.col('instance').cast(pl.Int64)  # Keep 'instance' as Int64
    ])
    #print(problem, features.shape)
    if features.shape != (1, 94):
        print(problem, features.shape, l)
    else:
        features.write_parquet(save_file)

100%|██████████| 360/360 [00:00<00:00, 27140.28it/s]


In [11]:
#pl.scan_parquet(f'{ela_save_dir}/*').collect()