In [2]:
import numpy as np
import pandas as pd
import tqdm
from time import time
import os

import configs
import datasets
from ensemble_DV_core_subset import RandomForestClassifierDV_subset, RandomForestRegressorDV_subset
from ensemble_DV_core_original import RandomForestClassifierDV_original, RandomForestRegressorDV_original
from data_valuation import DataValuation
import utils_eval
import matplotlib.pyplot as plt

# Main loop

In [3]:
'''
all experiments include: 
"noisy","mask&rank","error"

all eval_experiments include: 
"noisy","point_removal","mask","rank","feature_removal","error"
'''

experiment = 'noisy'
eval_experiments = ['noisy']

all_config = configs.config000CR(experiment)[1]
runpath = r'C:\Users\yf-su\Desktop\XAI\simulation_experiments\%s_experiments'%(experiment)
if not os.path.exists(runpath):
    os.makedirs(runpath)
    
for run_id in range(len(all_config)):
    config = all_config[run_id]
    print(run_id)
    problem = config['problem']
    dataset = config['dataset']
    dargs_list = config['dargs_list']

    for dargs_ind in range(len(dargs_list)):
        dargs = dargs_list[dargs_ind]
        (X, y), (X_val, y_val), (X_test, y_test), \
                                noisy_index, beta_true, error_index, \
                                error_row_index, X_original = \
                                datasets.load_data(problem,dataset,**dargs)
        data_valuation_engine=DataValuation(X=X, y=y, 
                                            X_val=X_val, y_val=y_val, 
                                            X_test=X_test, y_test=y_test,
                                            problem=problem, dargs=dargs)
        
        if experiment == 'noisy':
            data_valuation_engine.compute_data_shap()
        data_valuation_engine.compute_feature_shap(subset_ratio_list=['varying'])
        if experiment in ['mask&rank','feature_removal','error']:
            data_valuation_engine.prepare_learn_oob()
            data_valuation_engine.prepare_baseline(SHAP_size=1000)
        data_valuation_engine.evaluate_data_values(noisy_index, beta_true, error_index, X_test, y_test, 
                                                    experiments=eval_experiments, error_row_index=error_row_index)
        data_valuation_engine.save_results(runpath, dataset, dargs_ind, noisy_index, beta_true)

0
------------------------------
{'experiment': 'noisy', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 20)
Val X: (100, 20)
Test X: (3000, 20)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 0.
------------------------------
{'experiment': 'noisy', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 20, 'n_

Train X: (5000, 100)
Val X: (500, 100)
Test X: (3000, 100)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 0.
------------------------------
{'experiment': 'noisy', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 100, 'n_trees': 1000, 'rho': 0.2, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (5000, 100)
Val X: (500, 100)
Test X: (3000, 100)
------------------------------
Start: KNN_Shapley computati

------------------------------
{'experiment': 'noisy', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0.2, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 1}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (5000, 20)
Val X: (500, 20)
Test X: (3000, 20)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 1.
------------------------------
{'experiment': 'noisy', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 20, 'n_

Train X: (1000, 100)
Val X: (100, 100)
Test X: (3000, 100)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 2.
------------------------------
{'experiment': 'noisy', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 100, 'n_trees': 1000, 'rho': 0.6, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 2}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 100)
Val X: (100, 100)
Test X: (3000, 100)
------------------------------
Start: KNN_Shapley computati

------------------------------
{'experiment': 'noisy', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0.6, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 3}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 20)
Val X: (100, 20)
Test X: (3000, 20)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 3.
------------------------------
{'experiment': 'noisy', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 100, 'n

Train X: (5000, 100)
Val X: (500, 100)
Test X: (3000, 100)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 3.
4
------------------------------
{'experiment': 'noisy', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 4}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 20)
Val X: (100, 20)
Test X: (3000, 20)
------------------------------
Start: KNN_Shapley computation
D

------------------------------
{'experiment': 'noisy', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 100, 'n_trees': 1000, 'rho': 0, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 4}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (5000, 100)
Val X: (500, 100)
Test X: (3000, 100)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 4.
------------------------------
{'experiment': 'noisy', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 100, 

Train X: (5000, 20)
Val X: (500, 20)
Test X: (3000, 20)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 5.
------------------------------
{'experiment': 'noisy', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0.2, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 5}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (5000, 20)
Val X: (500, 20)
Test X: (3000, 20)
------------------------------
Start: KNN_Shapley computation
Done

------------------------------
{'experiment': 'noisy', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 100, 'n_trees': 1000, 'rho': 0.2, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 6}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 100)
Val X: (100, 100)
Test X: (3000, 100)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 6.
------------------------------
{'experiment': 'noisy', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 100

Train X: (1000, 20)
Val X: (100, 20)
Test X: (3000, 20)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 7.
------------------------------
{'experiment': 'noisy', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0.6, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 7}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 20)
Val X: (100, 20)
Test X: (3000, 20)
------------------------------
Start: KNN_Shapley computation
Done

------------------------------
{'experiment': 'noisy', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 100, 'n_trees': 1000, 'rho': 0.6, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 7}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (5000, 100)
Val X: (500, 100)
Test X: (3000, 100)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 7.
8
------------------------------
{'experiment': 'noisy', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 2

Train X: (5000, 20)
Val X: (500, 20)
Test X: (3000, 20)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 8.
------------------------------
{'experiment': 'noisy', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 100, 'n_trees': 1000, 'rho': 0, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 8}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (5000, 100)
Val X: (500, 100)
Test X: (3000, 100)
------------------------------
Start: KNN_Shapley computation
Do

------------------------------
{'experiment': 'noisy', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 9}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (5000, 20)
Val X: (500, 20)
Test X: (3000, 20)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\noisy_experiments, run_id: 9.
------------------------------
{'experiment': 'noisy', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 20, 'n_tr

In [4]:
'''
all experiments include: 
"noisy","mask&rank","error"

all eval_experiments include: 
"noisy","point_removal","mask","rank","feature_removal","error"
'''

experiment = 'mask&rank'
eval_experiments = ['mask','rank']

all_config = configs.config000CR(experiment)[1]
runpath = r'C:\Users\yf-su\Desktop\XAI\simulation_experiments\%s_experiments'%(experiment)
if not os.path.exists(runpath):
    os.makedirs(runpath)
    
for run_id in range(len(all_config)):
    config = all_config[run_id]
    print(run_id)
    problem = config['problem']
    dataset = config['dataset']
    dargs_list = config['dargs_list']

    for dargs_ind in range(len(dargs_list)):
        dargs = dargs_list[dargs_ind]
        (X, y), (X_val, y_val), (X_test, y_test), \
                                noisy_index, beta_true, error_index, \
                                error_row_index, X_original = \
                                datasets.load_data(problem,dataset,**dargs)
        data_valuation_engine=DataValuation(X=X, y=y, 
                                            X_val=X_val, y_val=y_val, 
                                            X_test=X_test, y_test=y_test,
                                            problem=problem, dargs=dargs)
        
        if experiment == 'noisy':
            data_valuation_engine.compute_data_shap()
        data_valuation_engine.compute_feature_shap(subset_ratio_list=['varying'])
        if experiment in ['mask&rank','feature_removal','error']:
            data_valuation_engine.prepare_learn_oob()
            data_valuation_engine.prepare_baseline(SHAP_size=1000)
        data_valuation_engine.evaluate_data_values(noisy_index, beta_true, error_index, X_test, y_test, 
                                                    experiments=eval_experiments, error_row_index=error_row_index)
        data_valuation_engine.save_results(runpath, dataset, dargs_ind, noisy_index, beta_true)

7
------------------------------
{'experiment': 'mask&rank', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0, 'is_noisy': None, 'mask_ratio': 0.5, 'base': 3, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 7}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 20)
Val X: (100, 20)
Test X: (3000, 20)
------------------------------
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
Start: Learn-OOB computation
Done: Learn-OOB computation
Start: SHAP computation
Done: SHAP computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\mask&rank_experiments, run_id: 7.
------------------------------
{'experiment': 'mask&rank', 'n_train': 1000

------------------------------
{'experiment': 'mask&rank', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 100, 'n_trees': 1000, 'rho': 0.2, 'is_noisy': None, 'mask_ratio': 0.8, 'base': 3, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 7}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 100)
Val X: (100, 100)
Test X: (3000, 100)
------------------------------
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
Start: Learn-OOB computation
Done: Learn-OOB computation
Start: SHAP computation
Done: SHAP computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\mask&rank_experiments, run_id: 7.
------------------------------
{'experiment': 'mask&rank', 'n_train': 

------------------------------
{'experiment': 'mask&rank', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 100, 'n_trees': 1000, 'rho': 0, 'is_noisy': None, 'mask_ratio': 0.5, 'base': 3, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 7}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (5000, 100)
Val X: (500, 100)
Test X: (3000, 100)
------------------------------
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
Start: Learn-OOB computation
Done: Learn-OOB computation
Start: SHAP computation
Done: SHAP computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\mask&rank_experiments, run_id: 7.
------------------------------
{'experiment': 'mask&rank', 'n_train': 50

------------------------------
{'experiment': 'mask&rank', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0.2, 'is_noisy': None, 'mask_ratio': 0.8, 'base': 3, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 8}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 20)
Val X: (100, 20)
Test X: (3000, 20)
------------------------------
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
Start: Learn-OOB computation
Done: Learn-OOB computation
Start: SHAP computation
Done: SHAP computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\mask&rank_experiments, run_id: 8.
------------------------------
{'experiment': 'mask&rank', 'n_train': 1000

------------------------------
{'experiment': 'mask&rank', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0, 'is_noisy': None, 'mask_ratio': 0.5, 'base': 3, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 8}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (5000, 20)
Val X: (500, 20)
Test X: (3000, 20)
------------------------------
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
Start: Learn-OOB computation
Done: Learn-OOB computation
Start: SHAP computation
Done: SHAP computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\mask&rank_experiments, run_id: 8.
------------------------------
{'experiment': 'mask&rank', 'n_train': 5000, 

------------------------------
{'experiment': 'mask&rank', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 100, 'n_trees': 1000, 'rho': 0.2, 'is_noisy': None, 'mask_ratio': 0.8, 'base': 3, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 8}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (5000, 100)
Val X: (500, 100)
Test X: (3000, 100)
------------------------------
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
Start: Learn-OOB computation
Done: Learn-OOB computation
Start: SHAP computation
Done: SHAP computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\mask&rank_experiments, run_id: 8.
------------------------------
{'experiment': 'mask&rank', 'n_train': 

------------------------------
{'experiment': 'mask&rank', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 100, 'n_trees': 1000, 'rho': 0, 'is_noisy': None, 'mask_ratio': 0.5, 'base': 3, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 9}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 100)
Val X: (100, 100)
Test X: (3000, 100)
------------------------------
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
Start: Learn-OOB computation
Done: Learn-OOB computation
Start: SHAP computation
Done: SHAP computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\mask&rank_experiments, run_id: 9.
------------------------------
{'experiment': 'mask&rank', 'n_train': 10

------------------------------
{'experiment': 'mask&rank', 'n_train': 5000, 'n_val': 500, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0.2, 'is_noisy': None, 'mask_ratio': 0.8, 'base': 3, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 9}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (5000, 20)
Val X: (500, 20)
Test X: (3000, 20)
------------------------------
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
Start: Learn-OOB computation
Done: Learn-OOB computation
Start: SHAP computation
Done: SHAP computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\simulation_experiments\mask&rank_experiments, run_id: 9.
------------------------------
{'experiment': 'mask&rank', 'n_train': 5000

# Debug

In [None]:
experiment = 'noisy'
all_config = configs.config000CR(experiment)[1]
run_id = 0
config = all_config[run_id]
problem = config['problem']
dataset = config['dataset']
dargs_list = config['dargs_list']
dargs_ind = 0
dargs = dargs_list[dargs_ind]

dargs['n_train'] = 1000
dargs['input_dim'] = 20
dargs['n_trees'] = 1000
# dargs['rho'] = 0.5

(X, y), (X_val, y_val), (X_test, y_test), noisy_index, beta_true, error_index, error_row_index, X_original = \
                                                    datasets.load_data(problem,dataset,**dargs)
# engine initialization
data_valuation_engine=DataValuation(X=X, y=y, 
                                    X_val=X_val, y_val=y_val, 
                                    X_test=X_test, y_test=y_test,
                                    problem=problem, dargs=dargs)
if experiment == 'noisy':
    data_valuation_engine.compute_data_shap()
data_valuation_engine.compute_feature_shap(subset_ratio_list=['varying'])
data_valuation_engine.prepare_learn_oob()

------------------------------
{'experiment': 'noisy', 'n_train': 1000, 'n_val': 100, 'n_test': 3000, 'input_dim': 20, 'n_trees': 1000, 'rho': 0, 'is_noisy': 0.1, 'mask_ratio': None, 'base': None, 'error_row_rate': None, 'error_col_rate': None, 'error_mech': None, 'model_family': 'Tree', 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 20)
Val X: (100, 20)
Test X: (3000, 20)
------------------------------
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Beta_Shapley computation
Start: marginal contribution computation


In [5]:
eval_experiments = ['noisy']
data_valuation_engine.evaluate_data_values(noisy_index, beta_true, error_index, X_test, y_test, 
                                                    experiments=eval_experiments)

In [6]:
data_valuation_engine.noisy_detect_dict

{'Meta_Data': ['Recall', 'Kmeans_label'],
 'Results': {'KNN_Shapley': [0.3, 0.19354838709677422],
  'Data_Shapley': [0.1, 0.15873015873015872],
  'Data-OOB': [0.3, 0.27999999999999997],
  'Df-OOB-data-varying': [0.1, 0.17543859649122806]}}

In [4]:
data_valuation_engine.time_dict

defaultdict(list,
            {'KNN_Shapley': 0.002989530563354492,
             'Data_Shapley': 4.785084247589111})

In [7]:
data_valuation_engine.mask_detect_dict

{'Meta_Data': ['Recall', 'Kmeans_label'],
 'Results': {'Df-OOB-feature-varying': [0.8, 0.8333333333333334],
  'Learn-OOB': [0.6, 0.7692307692307692]}}

In [8]:
data_valuation_engine.rank_dict

{'Meta_Data': ['Corr'],
 'Results': {'Df-OOB-feature-varying': (0.8363636363636363,),
  'Learn-OOB': (0.7939393939393938,)}}