In [16]:
import os
import pickle
import pandas as pd
import optuna

In [17]:
path = "../out/predictor"

In [18]:


# Define the folder structure with the corresponding subdirectory names
structure = {
    'linear': {
         'load': '20250204_141500',
         'PV': '20250204_184612',
         'price': '20250204_222016'
    },
    'xgboost': {
         'load': '20250128_182124',
         'PV': '20250129_211746',
         'price': '20250131_074032'
    },
    'recurrent-net': {
         'load': '20250201_203130',
         'PV': '20250202_122543',
         'price': '20250203_061524'
    },
    'time-mixer': {
         'load': '20250128_151138',
         'PV': '20250129_071826',
         'price': '20250131_143014'
    },
    'times-net': {
         'load': '20250128_135659',
         'PV': '20250131_104458',
         'price': '20250203_150716'
    }
}

In [19]:
def get_param_importance_table(key: str) -> pd.DataFrame:
    """
    Given a key from the structure dictionary and a base path,
    load study.pkl for each metric (load, PV, price) from the folder:
    os.path.join(base_path, key, <folder_name>),
    compute optuna parameter importances, and return a DataFrame
    with columns: parameter, load, PV, price.
    
    Unavailable values are set to NaN.
    """
    if key not in structure:
        raise ValueError(f"Key '{key}' not found in structure dictionary.")
    
    metrics = ['load', 'PV', 'price']
    importance_dict = {}
    
    for metric in metrics:
        folder_id = structure[key].get(metric)
        if folder_id is None:
            continue
        # Construct the folder path
        folder_path = os.path.join(path, key, folder_id)
        study_path = os.path.join(folder_path, 'study.pkl')
        
        if not os.path.exists(study_path):
            print(f"study.pkl not found in {folder_path}. Skipping {metric}.")
            continue
        
        # Load the study
        with open(study_path, 'rb') as f:
            study = pickle.load(f)
        
        # Get parameter importances as a dictionary
        param_imp = optuna.importance.get_param_importances(study)
        importance_dict[metric] = param_imp

    # Compute union of parameter names from all metrics
    all_params = set()
    for imp in importance_dict.values():
        all_params.update(imp.keys())
    
    # Build table rows
    rows = []
    for param in sorted(all_params):
        row = {'parameter': param}
        for metric in metrics:
            row[metric] = importance_dict.get(metric, {}).get(param, None)
        rows.append(row)
    
    # Create DataFrame
    df = pd.DataFrame(rows, columns=["parameter"] + metrics)
    df["average"] = (df[metrics].mean(axis=1) * 100).round(1)
    df_sorted = df.sort_values(by='average', ascending=False, na_position='last')
    return df_sorted[["parameter","average"]]

In [20]:
get_param_importance_table('linear')

Unnamed: 0,parameter,average
1,learning_rate,67.7
0,hidden_dim,32.3


In [21]:
get_param_importance_table('xgboost')

Unnamed: 0,parameter,average
2,learning_rate,94.4
7,subsample,2.6
3,max_depth,2.4
1,gamma,0.5
0,colsample_bytree,0.0
4,min_child_weight,0.0
5,reg_alpha,0.0
6,reg_lambda,0.0


In [22]:
get_param_importance_table('recurrent-net')

Unnamed: 0,parameter,average
1,learning_rate,76.7
4,units,8.0
2,num_layers,5.8
3,rnn_type,5.0
0,dropout,4.4


In [23]:
get_param_importance_table('time-mixer')

Unnamed: 0,parameter,average
7,learning_rate,81.9
1,d_model,5.3
5,dropout,3.0
2,decomp_method,2.0
3,down_sampling_layers,1.9
8,top_k,1.6
0,d_ff,1.4
4,down_sampling_window,1.4
6,e_layers,1.4


In [24]:
get_param_importance_table('times-net')

Unnamed: 0,parameter,average
5,num_kernels,22.6
0,d_ff,20.8
6,top_k,17.9
3,e_layers,16.4
1,d_model,9.6
4,learning_rate,7.2
2,dropout,5.5
