In [1]:
import pandas as pd
import numpy as np
from utils.utils_py import compute_loco, compute_lazy
from permfit_python.permfit_py import permfit
from permfit_python.permfit_py_RF import permfit_RF
from dcrt import dcrt_zero
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri, numpy2ri
pandas2ri.activate()
numpy2ri.activate()

# Load required files
robjects.r('''source('data/data_gen.R')''')
robjects.r('''source('utils/compute_methods.R')''')

generate_data = robjects.globalenv['generate_data']

# Import the used methods in the paper (Fig. 3)
cpi_knockoff = robjects.globalenv['compute_cpi']
conditional_RF = robjects.globalenv['compute_strobl']
d0crt = robjects.globalenv['compute_d0crt']
marginal = robjects.globalenv['compute_marginal']

ModuleNotFoundError: No module named 'inflate_non_null'

## Choice of the method

In [None]:

  
  d0crt_results <- sandbox$dcrt_zero(
    sim_data[, -1],
    as.numeric(sim_data$y),
    loss = loss,
    screening = FALSE,
    statistic = statistic,
    ntree = ntree,
    type_prob = prob_type,
    refit = refit,
    scaled_statistics = scaled_statistics,
    verbose = TRUE,
    random_state = seed
  )
  
  return(data.frame(
    method = ifelse(scaled_statistics,
                    "d0CRT_scaled",
                    "d0CRT"
    ),
    importance = d0crt_results[[3]],
    p_value = d0crt_results[[2]],
    score = d0crt_results[[4]]
  ))

In [2]:
def d0crt(sim_data,
          seed=2021,
          loss = "least_square",
          statistic = "residual",
          ntree = 100,
          prob_type = "regression",
          verbose = False,
          scaled_statistics = False,
          refit = False):
    print("Applying d0CRT Method")
    d0crt_results = dcrt_zero(sim_data.iloc[:, :1],
                              sim_data.y,
                              loss = loss,
                              screening = False,
                              statistic = statistic,
                              ntree = ntree,
                              type_prob = prob_type,
                              refit = refit,
                              scaled_statistics = scaled_statistics,
                              verbose = True,
                              random_state = seed
                             )
    print(d0crt_results)
    res = {}
    res['method'] = "d0CRT_scaled" if scaled_statistics else "d0CRT"
    # res['importance'] = d0crt_results

    
def permfit_dnn(sim_data,
                prob_type='regression',
                n_perm=100,
                n_jobs=1,
                nominal={'nominal':[], 'binary':[], 'ordinal':[]}):
    print("Applying Permfit-DNN Method")
    imp = permfit(X_train=sim_data.iloc[:, 1:],
                  y_train=sim_data.y,
                  prob_type=prob_type,
                  conditional=False,
                  k_fold=2,
                  n_perm=n_perm,
                  n_jobs=n_jobs,
                  list_nominal=nominal)
    res = {}
    res['method'] = 'Permfit-DNN'
    res['importance'] = imp['importance']
    res['p_value'] = imp['pval']
    score = imp['score']
    return pd.DataFrame(res)

def cpi_dnn(sim_data,
                prob_type='regression',
                n_perm=100,
                n_jobs=1,
                nominal={'nominal':[], 'binary':[], 'ordinal':[]}):
    print("Applying Permfit-DNN Method")
    imp = permfit(X_train=sim_data.iloc[:, 1:],
                  y_train=sim_data.y,
                  prob_type=prob_type,
                  conditional=True,
                  k_fold=2,
                  n_perm=n_perm,
                  n_jobs=n_jobs,
                  list_nominal=nominal)
    res = {}
    res['method'] = 'Permfit-DNN'
    res['importance'] = imp['importance']
    res['p_value'] = imp['pval']
    score = imp['score']
    return pd.DataFrame(res)

def cpi_rf(sim_data,
                prob_type='regression',
                n_perm=100,
                n_jobs=1,
                nominal={'nominal':[], 'binary':[], 'ordinal':[]}):
    print("Applying Permfit-DNN Method")
    imp = permfit_RF(X_train=sim_data.iloc[:, 1:],
                     y_train=sim_data.y,
                     prob_type=prob_type,
                     conditional=True,
                     k_fold=2,
                     n_perm=n_perm,
                     n_jobs=n_jobs,
                     list_nominal=nominal)
    res = {}
    res['method'] = 'Permfit-DNN'
    res['importance'] = imp['importance']
    res['p_value'] = imp['pval']
    score = imp['score']
    return pd.DataFrame(res)

def lazy(sim_data, prob_type='regression'):
    print("Applying Lazy Method")
    imp = compute_lazy(np.array(sim_data.iloc[:, 1:]),
                       np.array(sim_data.y))
    p_value = []
    for i in range(len(imp['ub_list'])):
        if ((0 <= imp['ub_list'][i]) and (0 >= imp['lb_list'][i])):
            p_value.append(0)
        else:
            p_value.append(1)
    res = {}
    res['method'] = 'lazyVI'
    res['importance'] = imp['imp_vals']
    res['p_value'] = p_value
    return pd.DataFrame(res)
  
def loco(sim_data, seed=2021, ntree=100, prob_type='regression'):
    print("Applying LOCO Method")
    imp = compute_loco(sim_data.iloc[:, 1:],
                       sim_data.y,
                       ntree,
                       seed,
                       prob_type)
    res = {}
    res['method'] = 'LOCO'
    res['importance'] = imp['val_imp']
    res['p_value'] = imp['p_value']
    return pd.DataFrame(res)
    

## Data preparation

In [3]:
# n: number of samples
# p: number of variables
# n_signal: number of relevant predictors randomly chosen
# rho: the correlation coefficient between the variables
# type_sim: the type of correlation structure (blocks_fixed, simple toeplitz)
# n_blocks: number of blocks in the block-based correlation structure
# snr: Signal-Noise Ratio
# prob_sim_data: The type of the problem (classification, regression, regression_relu, regression_product, regression_combine)
data = robjects.conversion.rpy2py(generate_data(n=100,
                                                p=5,
                                                n_signal=2,
                                                rho=0.8,
                                                type_sim='blocks_fixed',
                                                n_blocks=1,
                                                snr=4,
                                                prob_sim_data='regression'
                                                ))

X = data.iloc[:, 1:]
y = data.y
print(f"Shape of X:{X.shape}")
print(f"Shape of y:{y.shape}")

Shape of X:(100, 5)
Shape of y:(100,)


## Execution

In [4]:
d0crt(data)

Applying d0CRT Method


ValueError: zero-size array to reduction operation maximum which has no identity

In [5]:
marginal(data)

[1] "Applying Marginal Method"


R[write to console]: Error in py_to_r(sklearn$metrics$r2_score(sim_data$y[-train_ind], pred)) : 
  could not find function "py_to_r"



RRuntimeError: Error in py_to_r(sklearn$metrics$r2_score(sim_data$y[-train_ind], pred)) : 
  could not find function "py_to_r"


In [4]:
conditional_RF(data)

[1] "Applying Strobl Method"


R[write to console]: Loading required package: grid

R[write to console]: Loading required package: modeltools

R[write to console]: Loading required package: stats4

R[write to console]: Loading required package: strucchange

R[write to console]: Loading required package: zoo

R[write to console]: 
Attaching package: ‘zoo’


R[write to console]: The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric


R[write to console]: Loading required package: sandwich

R[write to console]: Error in (function (sim_data, ntree = 100L, mtry = 5L, conditional = TRUE,  : 
  object 'sklearn' not found



RRuntimeError: Error in (function (sim_data, ntree = 100L, mtry = 5L, conditional = TRUE,  : 
  object 'sklearn' not found


In [4]:
# loco(data)

In [5]:
cpi_knockoff(data)

[1] "Applying CPI/Knockoff Method"


R[write to console]: Error: Element with key 'regr.ranger' not found in DictionaryLearner!



RRuntimeError: Error: Element with key 'regr.ranger' not found in DictionaryLearner!
