In [1]:
import numpy as np
import pandas as pd
import tqdm
from time import time
import os

import datasets
from ensemble_DV_core import RandomForestClassifierDV, RandomForestRegressorDV
from ensemble_DV_core_original import RandomForestClassifierDV_original, RandomForestRegressorDV_original
from data_valuation import DataValuation
import utils_eval
import configs

from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
import sage
import shap
import xgboost as xgb

import configs
config = configs.config000CR()[1][0]
problem = config['problem']
dataset = config['dataset']
dargs_list = config['dargs_list']
dargs_ind = 0
dargs = dargs_list[dargs_ind]

if dataset != 'gaussian':
    loo_run=True
    betashap_run=True
    AME_run=True 
    lasso_run=True
    boosting_run=True
    treeshap_run=True
    removal_run=True
    simple_run=False
else:
    loo_run=False
    betashap_run=False
    AME_run=False
    lasso_run=False
    boosting_run=False
    treeshap_run=False
    removal_run=True
    simple_run=False

print(len(dargs_list))
(X, y), (X_val, y_val), (X_test, y_test), noisy_index, beta_true = datasets.load_data('clf','gaussian',**dargs)

18
------------------------------
{'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.5, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 25)
Val X: (100, 25)
Test X: (3000, 25)
------------------------------


In [2]:
def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def get_values(d,dim):
    values = []
    for i in range(dim):
        values.append(d.get("f%d"%i,0))
    return values

def learn_oob(X_y, oob, global_method = 'SHAP'):
    X_y_train, X_y_test, oob_train, oob_test = train_test_split(X_y, oob, test_size=int(0.1 * X_y.shape[0]), random_state=0)
    X_y_train, X_y_val, oob_train, oob_val = train_test_split(X_y_train, oob_train, test_size=int(0.1 * X_y.shape[0]), random_state=0)

    dtrain = xgb.DMatrix(X_y_train, label=oob_train)
    dval = xgb.DMatrix(X_y_val, label=oob_val)
    dtest = xgb.DMatrix(X_y_test, label=oob_test)
    
    params = {
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'learning_rate': 0.01,
        'random_state':0
    }

    model = xgb.train(
        params, dtrain, num_boost_round=1000, 
        evals=[(dtrain, 'train'), (dval, 'eval')], 
        early_stopping_rounds=10, 
        verbose_eval=0
    )
    y_pred = model.predict(dtest)
    score_mse = mean_squared_error(oob_test, y_pred)
    score_mape = mape(oob_test, y_pred)
    
    if global_method == 'SHAP':
        explainer = shap.TreeExplainer(model)
        shap_values = explainer(X_y_test)
        global_importance = np.abs(shap_values.values).mean(axis=0)
    elif global_method == 'SAGE':
        imputer = sage.MarginalImputer(model, X_y_test[:512])
        estimator = sage.PermutationEstimator(imputer, 'mse')
        global_importance = estimator(X_y_test, oob_test,bar=False).values
    else:
        raise NotImplementedError('Not implemented yet!')
    
    weight_importance = np.array(get_values(model.get_score(importance_type='weight'),X_y_train.shape[1]))
    gain_importance = np.array(get_values(model.get_score(importance_type='gain'),X_y_train.shape[1]))
    
    return {'X_y_data':(X_y_train,X_y_val,X_y_test),
            'oob_data':(oob_train,oob_val,oob_test),
            'score_mse':score_mse,
            'score_mape':score_mape,
            'learn_feature_importance':global_importance[:-1],
            'weight_importance':weight_importance[:-1],
            'gain_importance':gain_importance[:-1],
           }

def learn_oob_without_y(X_y, oob, global_method = 'SHAP'):
    def split_X_y(data):
        X = data[:,:-1]
        y = data[:,-1]
        return X,y
    
    X_y_train, X_y_val, X_y_test = X_y
    oob_train, oob_val, oob_test = oob
    
    X_train,y_train = split_X_y(X_y_train)
    X_val,y_val = split_X_y(X_y_val)
    X_test,y_test = split_X_y(X_y_test)

    dtrain = xgb.DMatrix(X_train, label=oob_train)
    dval = xgb.DMatrix(X_val, label=oob_val)
    dtest = xgb.DMatrix(X_test, label=oob_test)
    
    params = {
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'learning_rate': 0.01,
        'random_state':0
    }

    model = xgb.train(
        params, dtrain, num_boost_round=1000, 
        evals=[(dtrain, 'train'), (dval, 'eval')], 
        early_stopping_rounds=10, 
        verbose_eval=0
    )
    y_pred = model.predict(dtest)
    score_mse = mean_squared_error(oob_test, y_pred)
    score_mape = mape(oob_test, y_pred)
    
    if global_method == 'SHAP':
        explainer = shap.TreeExplainer(model)
        shap_values = explainer(X_test)
        global_importance = np.abs(shap_values.values).mean(axis=0)
    elif global_method == 'SAGE':
        imputer = sage.MarginalImputer(model, X_test[:512])
        estimator = sage.PermutationEstimator(imputer, 'mse')
        global_importance = estimator(X_test, oob_test,bar=False).values
    else:
        raise NotImplementedError('Not implemented yet!')
    
    weight_importance = np.array(get_values(model.get_score(importance_type='weight'),X_train.shape[1]))
    gain_importance = np.array(get_values(model.get_score(importance_type='gain'),X_train.shape[1]))
    
    return {'X_y_data':(X_train,y_train,X_val,y_val,X_test,y_test),
            'score_mse':score_mse,
            'score_mape':score_mape,
            'learn_feature_importance':global_importance,
            'weight_importance':weight_importance,
            'gain_importance':gain_importance,
           }

def base_learn_oob(X_y_data, global_method = 'SHAP'):
    def split_X_y(data):
        X = data[:,:-1]
        y = data[:,-1]
        return X,y
    
    X_y_train,X_y_val,X_y_test = X_y_data

    base_X_train,base_y_train = split_X_y(X_y_train)
    base_X_val,base_y_val = split_X_y(X_y_val)
    base_X_test,base_y_test = split_X_y(X_y_test)

    base_dtrain = xgb.DMatrix(base_X_train, label=base_y_train)
    base_dval = xgb.DMatrix(base_X_val, label=base_y_val)
    base_dtest = xgb.DMatrix(base_X_test, label=base_y_test)

    base_params = {
        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'learning_rate': 0.01,
        'random_state':0
    }

    base_model = xgb.train(
        base_params, base_dtrain, num_boost_round=1000, 
        evals=[(base_dtrain, 'train'), (base_dval, 'eval')], 
        early_stopping_rounds=10, 
        verbose_eval=0
    )
    
    base_y_pred = (base_model.predict(base_dtest) >= 0.5).astype(int)
    acc = accuracy_score(base_y_test, base_y_pred)

    if global_method == 'SHAP':
        base_explainer = shap.TreeExplainer(base_model)
        base_shap_values = base_explainer(base_X_test)
        base_global_importance = np.abs(base_shap_values.values).mean(axis=0)
    elif global_method == 'SAGE':
        base_imputer = sage.MarginalImputer(base_model, base_X_test[:512])
        base_estimator = sage.PermutationEstimator(base_imputer, 'mse')
        base_global_importance = base_estimator(base_X_test, base_y_test,bar=False).values
    else:
        raise NotImplementedError('Not implemented yet!')
        
    base_weight_importance = np.array(get_values(base_model.get_score(importance_type='weight'),base_X_train.shape[1]))
    base_gain_importance = np.array(get_values(base_model.get_score(importance_type='gain'),base_X_train.shape[1]))
        
    return {'score_acc':acc, 'learn_feature_importance':base_global_importance,
            'weight_importance':base_weight_importance,'gain_importance':base_gain_importance}

In [3]:
n_sim = 10

n_dargs_ind_list = []
for n in range(n_sim):
    for dargs_ind in range(len(dargs_list)):
        n_dargs_ind_list.append((n,dargs_ind))

In [23]:
for idx in range(len(n_dargs_ind_list)):
    n_dargs_ind = n_dargs_ind_list[idx]
    print('*'*50)

    n,dargs_ind = n_dargs_ind
    
    print("round:%s"%n)
    runpath = r'C:\Users\yf-su\Desktop\XAI\run_path_%s'%n
    if not os.path.exists(runpath):
        os.makedirs(runpath)
    
    print('-'*50)
    dargs = dargs_list[dargs_ind]
    print("current dargs:",dargs_ind, dargs)
    np.random.seed()
    (X, y), (X_val, y_val), (X_test, y_test), noisy_index, beta_true = datasets.load_data('clf','gaussian',**dargs)
    print(y[:10])
    data_valuation_engine=DataValuation(X=X, y=y, 
                                            X_val=X_val, y_val=y_val, 
                                            problem=problem, dargs=dargs)
    data_valuation_engine.evalute_rf_models(X_test, y_test)
    data_valuation_engine.compute_data_shap(loo_run=loo_run, 
                                                    betashap_run=betashap_run)
    data_valuation_engine.compute_feature_shap(AME_run=AME_run,
                                               lasso_run=lasso_run, 
                                               boosting_run=boosting_run,
                                               treeshap_run=treeshap_run,
                                               simple_run=simple_run)

    X_y = np.concatenate((X,y.reshape(-1,1)), axis=1)
    oob = data_valuation_engine.data_value_dict['Df-OOB-data']

    learn = learn_oob(X_y, oob, global_method = 'SHAP')
    learn_without_y = learn_oob_without_y(learn['X_y_data'], learn['oob_data'], global_method = 'SHAP')
    base_learn = base_learn_oob(learn['X_y_data'], global_method = 'SHAP')

    data_valuation_engine.feature_value_dict['Learn-OOB'] = learn['learn_feature_importance']
    data_valuation_engine.feature_value_dict['Weight'] = learn['weight_importance']
    data_valuation_engine.feature_value_dict['Gain'] = learn['gain_importance']

    data_valuation_engine.feature_value_dict['Learn-OOB-without-y'] = learn_without_y['learn_feature_importance']
    data_valuation_engine.feature_value_dict['Weight-without-y'] = learn_without_y['weight_importance']
    data_valuation_engine.feature_value_dict['Gain-without-y'] = learn_without_y['gain_importance']

    data_valuation_engine.feature_value_dict['Base-Learn-OOB'] = base_learn['learn_feature_importance']
    data_valuation_engine.feature_value_dict['Base-Weight'] = base_learn['weight_importance']
    data_valuation_engine.feature_value_dict['Base-Gain'] = base_learn['gain_importance']

    data_valuation_engine.learn_dict['mape'] = learn['score_mape']
    data_valuation_engine.learn_dict['mse'] = learn['score_mse']
    data_valuation_engine.learn_dict['acc(base)'] = base_learn['score_acc']

    data_valuation_engine.evaluate_data_values(noisy_index, beta_true, X_test, y_test, removal_run=removal_run)
    data_valuation_engine.save_results(runpath, dataset, dargs_ind, noisy_index, beta_true)

    n,dargs_ind = n_dargs_ind_list[idx - 18]
    past = np.load(r"C:\Users\yf-su\Desktop\XAI\run_path_%d\run_id0_%d.pkl"%(n,dargs_ind), allow_pickle = True)
    if (data_valuation_engine.feature_value_dict['Weight'] == past['feature_value']['Weight']).all():
        raise
    del X, y, X_val, y_val, X_test, y_test, X_y, oob
    del data_valuation_engine, learn, learn_without_y, base_learn

**************************************************
round:2
--------------------------------------------------
current dargs: 14 {'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (10000, 50)
Val X: (1000, 50)
Test X: (3000, 50)
------------------------------
[0 0 0 0 1 0 0 0 0 1]
RF 0.753
RF_original 0.754
gap 0.001
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
---------

RF 0.819
RF_original 0.836
gap 0.017
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_3, run_id: 0.
**************************************************
round:3
--------------------------------------------------
current dargs: 4 {'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.7, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.7, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (10

RF 0.768
RF_original 0.764
gap -0.004
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_3, run_id: 0.
**************************************************
round:3
--------------------------------------------------
current dargs: 12 {'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.5, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.5, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train 

RF 0.752
RF_original 0.754
gap 0.002
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_4, run_id: 0.
**************************************************
round:4
--------------------------------------------------
current dargs: 2 {'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (10

RF 0.813
RF_original 0.835
gap 0.022
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_4, run_id: 0.
**************************************************
round:4
--------------------------------------------------
current dargs: 10 {'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.7, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.7, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X

Train X: (10000, 100)
Val X: (1000, 100)
Test X: (3000, 100)
------------------------------
[0 0 1 1 0 0 0 0 1 0]
RF 0.834
RF_original 0.863
gap 0.029
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_4, run_id: 0.
**************************************************
round:5
--------------------------------------------------
current dargs: 0 {'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.5, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.5, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
-----------

Train X: (1000, 100)
Val X: (100, 100)
Test X: (3000, 100)
------------------------------
[0 0 0 0 1 1 1 1 0 1]
RF 0.786
RF_original 0.787
gap 0.001
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_5, run_id: 0.
**************************************************
round:5
--------------------------------------------------
current dargs: 8 {'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 100, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 100, 'run_id': 0}
-----------

**************************************************
round:5
--------------------------------------------------
current dargs: 15 {'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.5, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 100, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.5, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 100, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (10000, 100)
Val X: (1000, 100)
Test X: (3000, 100)
------------------------------
[0 1 0 1 0 0 0 0 0 0]
RF 0.832
RF_original 0.858
gap 0.026
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
----

RF 0.749
RF_original 0.750
gap 0.001
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_6, run_id: 0.
**************************************************
round:6
--------------------------------------------------
current dargs: 5 {'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (10

Train X: (10000, 50)
Val X: (1000, 50)
Test X: (3000, 50)
------------------------------
[1 1 0 1 1 1 0 0 0 1]
RF 0.783
RF_original 0.813
gap 0.030
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_6, run_id: 0.
**************************************************
round:6
--------------------------------------------------
current dargs: 13 {'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.7, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.7, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
---------

**************************************************
round:7
--------------------------------------------------
current dargs: 2 {'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (1000, 25)
Val X: (100, 25)
Test X: (3000, 25)
------------------------------
[1 0 0 0 0 1 0 0 1 1]
RF 0.703
RF_original 0.708
gap 0.005
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
----------------

RF 0.795
RF_original 0.819
gap 0.024
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_7, run_id: 0.
**************************************************
round:7
--------------------------------------------------
current dargs: 10 {'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.7, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.7, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X

Train X: (10000, 100)
Val X: (1000, 100)
Test X: (3000, 100)
------------------------------
[1 0 1 0 1 0 0 0 1 0]
RF 0.783
RF_original 0.807
gap 0.024
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_7, run_id: 0.
**************************************************
round:8
--------------------------------------------------
current dargs: 0 {'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.5, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.5, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 25, 'run_id': 0}
-----------

Train X: (1000, 100)
Val X: (100, 100)
Test X: (3000, 100)
------------------------------
[1 0 0 1 1 1 0 1 1 1]
RF 0.754
RF_original 0.767
gap 0.013
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_8, run_id: 0.
**************************************************
round:8
--------------------------------------------------
current dargs: 8 {'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 100, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 100, 'run_id': 0}
-----------

**************************************************
round:8
--------------------------------------------------
current dargs: 15 {'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.5, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 100, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.5, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 100, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (10000, 100)
Val X: (1000, 100)
Test X: (3000, 100)
------------------------------
[1 1 1 1 1 1 1 0 0 0]
RF 0.806
RF_original 0.842
gap 0.036
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
----

RF 0.766
RF_original 0.774
gap 0.009
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_9, run_id: 0.
**************************************************
round:9
--------------------------------------------------
current dargs: 5 {'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 1000, 'n_val': 100, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.9, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
--------------------------------------------------
GAUSSIAN-C
--------------------------------------------------
Train X: (10

Train X: (10000, 50)
Val X: (1000, 50)
Test X: (3000, 50)
------------------------------
[1 1 0 0 0 1 1 0 0 1]
RF 0.799
RF_original 0.838
gap 0.039
Start: KNN_Shapley computation
Done: KNN_Shapley computation
Start: Data-OOB computation
Done: Data-OOB computation
Start: DF-OOB computation
Done: DF-OOB computation
--------------------------------------------------
Save results
--------------------------------------------------
Done! path: C:\Users\yf-su\Desktop\XAI\run_path_9, run_id: 0.
**************************************************
round:9
--------------------------------------------------
current dargs: 13 {'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.7, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
------------------------------
{'n_data_to_be_valued': 10000, 'n_val': 1000, 'n_test': 3000, 'n_trees': 800, 'masked_ratio': 0.7, 'is_noisy': 0.1, 'model_family': 'Tree', 'input_dim': 50, 'run_id': 0}
---------