This is the script how to use Joint/Stage-wise discretization  

Note: Joint discretization is super fast than stage-wise.

Assume $j$ is the best cut point set in $j$th iteration, we could find another cut point ${t_{j+1}}$ to reach maximum MI or minimum P_value.  
We define sample size $n$, number of predictors $p$ and $k$ selected variables.
Time complexity of joint discretization:
* $\argmax(I(X_{j+1};Y) - I(X_{j};Y)) \rightarrow O(npk)$
* $\argmin(P(I(X_{j+1};Y) - I(X_{j};Y))) \rightarrow O(npk)$

Furthermore, we could further decrese the time complexity to $O(npl)$, where $l$ is the unique selected variable of $k$.

In [1]:
from joint_nalgorithmMI import efficientJointDiscretizationMI
from joint_nalgorithmPvalue import efficientJointDiscretizationPvalue
from joint_lognalgorithmPvalue import stageWiseDiscretizationPvalue
from joint_lognalgorithmMI import stageWiseDiscretizationMI

from data_generator_v2 import dataGenerator

df_X, y, variable_lst, btrue, formula = dataGenerator(n=200,
                                                    rho = 0.8,
                                                    p = 100,
                                                    r = 10,
                                                    num_terms = 10,
                                                    degree = 5,
                                                    alpha = None,
                                                    beta = 0.5,
                                                    SNR = 10,
                                                    random_state = 0,
                                                    model = 'binomial').sample()



  mu = 1/(1+ np.exp(-eta))
  snr = np.sqrt(np.var(mu)/np.mean(v))
  df = fun(x) - f0
  mu = 1/(1+ np.exp(-eta))
  mu = 1/(1+ np.exp(-eta))


In [2]:
# With maximum MI
dims_list, step_fmi, best_bins, values, best_counts, best_y_counts, best_cond_entr, num_bins =\
      efficientJointDiscretizationMI(duplicate=True, early_stopping='chi_square_adjust', delta_correction=True, delta=0.05).fit(df_X.values, y)

In [3]:
dims_list, step_fmi, num_bins, values

([7, 5],
 [0.174403624917856, 0.2909035511897311],
 4,
 [0.5074170337254518, 0.43238782649262236])

In [4]:
# with p_values
dims_list, step_fmi, best_bins, values, best_counts, best_y_counts, best_cond_entr, num_bins =\
      efficientJointDiscretizationPvalue(duplicate=True, early_stopping='chi_square_adjust', delta_correction=True, delta=0.05).fit(df_X.values, y)

In [5]:
dims_list, step_fmi, num_bins, values

([7, 5],
 [0.174403624917856, 0.2909035511897311],
 4,
 [0.5074170337254518, 0.43238782649262236])

In [6]:
from copy import deepcopy
dfs = deepcopy(df_X)
dfs['Y'] = y
best_subsetData_list, step_mi_list, dim_list, cutpoint_list, num_bins= \
    stageWiseDiscretizationPvalue(delta=0.05, early_stopping='chi_square_adjust').fit(dfs)

In [7]:
dim_list, step_mi_list, num_bins, cutpoint_list

([7, 5],
 array([0.17056694, 0.27956303]),
 4,
 [0.23537269829268415, 0.7905307502798592])

In [8]:
from copy import deepcopy
dfs = deepcopy(df_X)
dfs['Y'] = y

best_subsetData_list, step_mi_list, dim_list, cutpoint_list, num_bins= \
    stageWiseDiscretizationMI(delta=0.05, early_stopping='chi_square_adjust').fit(dfs)

In [13]:
import numpy as np
np.unique(dims_list)

array([5, 7])

In [11]:
dim_list, step_mi_list, num_bins, cutpoint_list

([75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  75,
  46,
  66,
  30,
  34,
  35],
 array([0.01940982, 0.02977135, 0.0370711 , 0.04927573, 0.08655236,
        0.09564181, 0.10348612, 0.12185453, 0.1369193 , 0.16533108,
        0.17816706, 0.18495271, 0.2046289 , 0.21844565, 0.22964883,
        0.23623193, 0.2421923 , 0.25300862, 0.26004916, 0.2655261 ,
        0.27050902, 0.27549193, 0.27965113, 0.28463404, 0.2881793 ,
        0.29119719, 0.29263837, 0.30810101, 0.31658513, 0.32501052,
        0.33233885, 0.33527829, 0.3427294 , 0.3433093 , 0.34614527,
        0.34674099, 0.3517239 , 0.35228619, 0.3818633 , 0.39586066,
        0.4