In [None]:
from rashomon_importance_distribution import RashomonImportanceDistribution
import pandas as pd

# Prepare a binarized dataset, with the rightmost column containing labels
df = pd.read_csv('./monk_1_example_data.csv')

# Specify the mapping used to go from columns in the original dataset
# to binarized columns
mapping={
    0: [0, 1, 2],
    1: [3, 4, 5],
    2: [6, 7],
    3: [8, 9, 10],
    4: [11, 12, 13, 14],
    5: [15, 16]
}

In [None]:
# Construct the Rashomon Importance Distribution for this dataset
RID = RashomonImportanceDistribution(
    input_df=df,
    binning_map=mapping,
    db=4, 
    lam=0.03, 
    eps=0.1,
    vi_metric='sub_mr',
    dataset_name='monk_1_demo',
    n_resamples=10,
    verbose=False,
    max_par_for_gosdt=1
)

In [None]:
# Compute the box and whiskers range for each variable
for v in range(6):
    print(f"Variable {v} --------------")
    
    # Get box and whiskers range for variable
    print("Box and whiskers range:", RID.bwr(v))

### Brief demo using real, unbinarized data

Data comes from the [Wisconsin breast cancer dataset](https://archive.ics.uci.edu/dataset/17/breast+cancer+wisconsin+diagnostic)

In [None]:
df = pd.read_csv("./wisconsin.csv")

In [None]:
# Construct the Rashomon Importance Distribution for this dataset
RID = RashomonImportanceDistribution(
    input_df=df,
    allow_binarize_internally=True,
    db=3, 
    lam=0.03, 
    eps=0.1,
    vi_metric='sub_mr',
    dataset_name='cancer_demo',
    n_resamples=10,
    verbose=False,
    max_par_for_gosdt=1
)

In [None]:
# Compute the box and whiskers range for each variable
for v in range(RID.n_vars):
    print(f"{df.columns[v]} Importance --------------")
    
    # Get box and whiskers range for variable
    print("Box and whiskers range:", RID.bwr(v))