In [14]:
import sys, os
sys.path.insert(0, os.path.abspath(".."))

In [15]:
import pandas as pd
import sys, os

project_path = os.path.abspath('..')
src_path = os.path.abspath(os.path.join('..', 'src'))
utils_path = os.path.join(src_path,'utils')

if project_path not in sys.path:
    sys.path.append(project_path)

if utils_path not in sys.path:
    sys.path.append(utils_path)

if src_path not in sys.path:
    sys.path.append(src_path) 

import loading_data
from visualize_utils.analyzing import  get_cv_results
from ResultsHandler import transform_feature_set_column,select_methods
import ResultsHandler

In [16]:
def get_cv_seed_df(random_seed_id,config_names,result_from,custom_dict= None, optimize=None,):

    if result_from == 'cv':
        rename_columns =False
        aggre =False
    elif result_from == 'blind_test':
        rename_columns = False
        aggre = True
    
    if custom_dict is not None:
        rename_columns = custom_dict['rename_columns']
        aggre = custom_dict['aggre']

    metric_dict = {'cv':{'test_mean':'test_weighted_mae_mean','test_std':'test_weighted_mae_std',
                         'train_mean':'train_weighted_mae_mean','train_std':'train_weighted_mae_std'},
                   'blind_test':{'test_mean':'mean_MAE_test','test_std':'std_MAE_test',
                                 'train_mean':'mean_MAE_train','train_std':'std_MAE_train'}}
    train_mean = metric_dict[result_from]['train_mean']
    train_std = metric_dict[result_from]['train_std']

    test_mean = metric_dict[result_from]['test_mean']
    test_std = metric_dict[result_from]['test_std']
        
    

    config_paths = [os.path.abspath(f'../configs/{name}.yaml') for name in config_names]
    cv_seed_df = pd.DataFrame()
    for seed in random_seed_id:
        cv_config_df = pd.DataFrame()
        for config_path in config_paths:
            config = loading_data.load_config_file(config_path)
            config['target_column'] = ['homo_lumo_gap_min','formation_energy_per_site']
            try:
                cv_config_df = pd.concat([cv_config_df, get_cv_results(config,result_dirname=f'results_{seed}',optimize=optimize)],ignore_index=True)
            except FileNotFoundError as e:
                print(f"{seed}")
                print(f" {e}")
        cv_config_df['random_seed_id'] = seed
        cv_seed_df = pd.concat([cv_seed_df,cv_config_df],ignore_index=True)


    if rename_columns:
        cv_seed_df.rename(columns={'test_weighted_mae_mean':'mean_MAE_test','test_weighted_mae_std':'std_MAE_test',
                                    'train_weighted_mae_mean':'mean_MAE_train','train_weighted_mae_std':'std_MAE_train',},
                        inplace=True)
    if aggre:
        cv_seed_df = ResultsHandler.calculate_aggregate_results(cv_seed_df)

    cv_seed_df = ResultsHandler.calculate_test_train_ratio(cv_seed_df,test_col=test_mean,error_test_col=test_std,
                                                                      train_col=train_mean,error_train_col=train_std)
    
    cv_seed_df = transform_feature_set_column(cv_seed_df,include_feature_name=True)
    
    cv_seed_df.sort_values(by='feature_set',inplace=True)

    return cv_seed_df

                        
                        
        

In [17]:
def main(cv_seed_df,target,results_from,R=3):
    metric_dict = {'cv':{'metric':'test_weighted_mae_mean','metric_error':'test_weighted_mae_std'},
                   'blind_test':{'metric':'mean_MAE_test','metric_error':'std_MAE_test'}}
    metric = metric_dict[results_from]['metric']
    metric_error = metric_dict[results_from]['metric_error']
    chem_df = select_methods(cv_seed_df,chem_part='chem')
    chem_target_df = chem_df[chem_df['target_column']==target]
    pivot_chem_target_df = chem_target_df.pivot_table(columns=['dist_part','distance'],index='en_chem',values=[metric,metric_error])
    display(pivot_chem_target_df.round(R))

In [18]:
random_seed_id = [2]
config_names = ['r4_config_allv2']
config_paths = [os.path.abspath(f'../configs/{name}.yaml') for name in config_names]

In [19]:
cv_optimize_df = get_cv_seed_df(random_seed_id,config_names,result_from='cv',optimize='selected_best_random_100')

In [20]:
main(cv_optimize_df,'formation_energy_per_site',results_from='cv')

Unnamed: 0_level_0,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std
dist_part,alldist,alldist,alldist,alldist,no,no,no,no,alldist,alldist,alldist,alldist,no,no,no,no
distance,emd,hellinger,no,tvd,emd,hellinger,no,tvd,emd,hellinger,no,tvd,emd,hellinger,no,tvd
en_chem,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
no,0.147,0.147,0.148,0.147,0.149,0.148,0.129,0.149,0.003,0.002,0.003,0.003,0.003,0.004,0.008,0.004
vpa_divi,0.142,0.142,0.143,0.141,0.144,0.145,0.137,0.145,0.005,0.005,0.005,0.004,0.004,0.005,0.008,0.006
vpa_mult,0.143,0.142,0.143,0.142,0.145,0.145,0.137,0.145,0.004,0.006,0.004,0.003,0.005,0.004,0.007,0.005
vpa_subs,0.141,0.141,0.143,0.142,0.144,0.146,0.133,0.146,0.004,0.006,0.004,0.004,0.005,0.005,0.006,0.006


In [21]:
main(cv_optimize_df,'homo_lumo_gap_min',results_from='cv')

Unnamed: 0_level_0,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_mean,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std,test_weighted_mae_std
dist_part,alldist,alldist,alldist,alldist,no,no,no,no,alldist,alldist,alldist,alldist,no,no,no,no
distance,emd,hellinger,no,tvd,emd,hellinger,no,tvd,emd,hellinger,no,tvd,emd,hellinger,no,tvd
en_chem,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
no,0.122,0.122,0.123,0.122,0.128,0.127,0.122,0.127,0.005,0.006,0.005,0.005,0.007,0.007,0.004,0.008
vpa_divi,0.122,0.122,0.123,0.122,0.127,0.126,0.124,0.126,0.005,0.005,0.006,0.005,0.006,0.005,0.006,0.006
vpa_mult,0.122,0.122,0.122,0.122,0.127,0.126,0.124,0.126,0.005,0.005,0.006,0.005,0.006,0.006,0.005,0.005
vpa_subs,0.123,0.123,0.124,0.123,0.127,0.126,0.124,0.126,0.005,0.005,0.005,0.005,0.006,0.006,0.006,0.006
