In [1]:
from fusinter_v1 import FUSINTERDiscretizer as v1
from fusinter_v2 import FUSINTERDiscretizer as v2
from fusinter_v2_2 import FUSINTERDiscretizer as v2_2
from datasets import paper_dataset_x, paper_dataset_y

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.datasets import load_iris, load_wine

import ipywidgets as widgets

plt.style.use("fivethirtyeight")

## Paper Dataset

In [2]:
alpha_slider = widgets.FloatSlider(
    value=0.95,
    min=0,
    max=1,
    step=0.05,
    description='alpha:',
    orientation='horizontal',
    readout=True,
    readout_format='.2f',
)
lam_slider = widgets.FloatSlider(
    value=1,
    min=0,
    max=1.0,
    step=0.05,
    description='lambda:',
    orientation='horizontal',
    readout_format='.2f',
    readout=True,
)

dropdown = widgets.Dropdown(
    options=[("version 1", v1), ("version 2", v2), ("version 2.2", v2_2)],
    description='Discretizer Verion:',
    disabled=False,
)

@widgets.interact(alpha=alpha_slider, lam=lam_slider, selector=dropdown)
def f(alpha, lam, selector):
    FUSINTERDiscretizer = selector
    X = paper_dataset_x
    y = paper_dataset_y
    discretizer = FUSINTERDiscretizer(X, y)
    final_splits = discretizer.apply(alpha=alpha, lam=lam)
    display("final splits", final_splits)

    fig, ax = plt.subplots(1)
    fig.set_size_inches(15, 5)
    sns.stripplot(y=X, x=y.astype(int), ax=ax)
    ax.hlines(final_splits, xmin=-1, xmax=3, color="red")

    ax.set_title("Fusinter Splits for Paper Dataset")
    ax.set_xlabel("class")
    ax.set_ylabel("value")
    

interactive(children=(FloatSlider(value=0.95, description='alpha:', max=1.0, step=0.05), FloatSlider(value=1.0…

## Iris Dataset

In [3]:
alpha_slider = widgets.FloatSlider(
    value=0.95,
    min=0,
    max=1,
    step=0.05,
    description='alpha:',
    orientation='horizontal',
    readout=True,
    readout_format='.2f',
)
lam_slider = widgets.FloatSlider(
    value=1,
    min=0,
    max=1.0,
    step=0.05,
    description='lambda:',
    orientation='horizontal',
    readout_format='.2f',
    readout=True,
)

dropdown = widgets.Dropdown(
    options=[("version 1", v1), ("version 2", v2), ("version 2.2", v2_2)],
    description='Discretizer Verion:',
    disabled=False,
)

@widgets.interact(alpha=alpha_slider, lam=lam_slider, selector=dropdown)
def f(alpha, lam, selector):
    FUSINTERDiscretizer = selector
    iris_ds = load_iris()
    X = iris_ds["data"][:, 2] # sepal length data
    y = iris_ds["target"]
    discretizer = FUSINTERDiscretizer(X, y)
    final_splits = discretizer.apply(alpha=alpha, lam=lam)
    display("final splits", final_splits)

    fig, ax = plt.subplots(1)
    fig.set_size_inches(15, 5)
    sns.stripplot(y=X, x=y.astype(int), ax=ax)
    ax.hlines(final_splits, xmin=-1, xmax=3, color="red")

    ax.set_title("Fusinter Splits for Petal Length")
    ax.set_xlabel("class")
    ax.set_ylabel("length")
    

interactive(children=(FloatSlider(value=0.95, description='alpha:', max=1.0, step=0.05), FloatSlider(value=1.0…

## Titanic Dataset

In [4]:
alpha_slider = widgets.FloatSlider(
    value=0.95,
    min=0,
    max=1,
    step=0.05,
    description='alpha:',
    orientation='horizontal',
    readout=True,
    readout_format='.2f',
)
lam_slider = widgets.FloatSlider(
    value=1,
    min=0,
    max=1.0,
    step=0.05,
    description='lambda:',
    orientation='horizontal',
    readout_format='.2f',
    readout=True,
)

dropdown = widgets.Dropdown(
    options=[("version 1", v1), ("version 2", v2), ("version 2.2", v2_2)],
    description='Discretizer Verion:',
    disabled=False,
)

@widgets.interact(alpha=alpha_slider, lam=lam_slider, selector=dropdown)
def f(alpha, lam, selector):
    FUSINTERDiscretizer = selector
    df = pd.read_csv("titanic.csv")
    df.set_index("PassengerId", inplace=True)
    not_na_mask =  ~df["Age"].isnull()
    X = df["Age"][not_na_mask].to_numpy()
    y = df["Survived"][not_na_mask].to_numpy()
    discretizer = FUSINTERDiscretizer(X, y)
    final_splits = discretizer.apply(alpha=alpha, lam=lam)
    display("final splits", final_splits)

    fig, ax = plt.subplots(1)
    fig.set_size_inches(15, 5)
    plt.xlim(0,75)
    sns.kdeplot(x=X[y == 0], label="died")
    sns.kdeplot(x=X[y == 1], label="survived")
    plt.vlines(final_splits, ymin=0, ymax=0.03, color="red")
    plt.grid()
    ax.set_title("Fusinter Splits for Titanic Age Dataset")
    ax.set_xlabel("age")
    ax.set_ylabel("density")
    ax.legend()

    

interactive(children=(FloatSlider(value=0.95, description='alpha:', max=1.0, step=0.05), FloatSlider(value=1.0…

## Wine Dataset

In [4]:
alpha_slider = widgets.FloatSlider(
    value=0.95,
    min=0,
    max=1,
    step=0.05,
    description='alpha:',
    orientation='horizontal',
    readout=True,
    readout_format='.2f',
)
lam_slider = widgets.FloatSlider(
    value=1,
    min=0,
    max=1.0,
    step=0.05,
    description='lambda:',
    orientation='horizontal',
    readout_format='.2f',
    readout=True,
)

dropdown = widgets.Dropdown(
    options=[("version 1", v1), ("version 2", v2), ("version 2.2", v2_2)],
    description='Discretizer Verion:',
    disabled=False,
)

@widgets.interact(alpha=alpha_slider, lam=lam_slider, selector=dropdown)
def f(alpha, lam, selector):
    FUSINTERDiscretizer = selector
    data_wine = load_wine()
    X = data_wine["data"][:, 0]
    y = data_wine["target"]
    discretizer = FUSINTERDiscretizer(X, y)
    final_splits = discretizer.apply(alpha=alpha, lam=lam)
    display("final splits", final_splits)

    fig, ax = plt.subplots(1)
    fig.set_size_inches(15, 5)
    sns.stripplot(y=X, x=y.astype(int), ax=ax)
    ax.hlines(final_splits, xmin=-1, xmax=3, color="red")

    ax.set_title("Fusinter Splits for Wine Dataset")
    ax.set_xlabel("class")
    ax.set_ylabel("value")
    

interactive(children=(FloatSlider(value=0.95, description='alpha:', max=1.0, step=0.05), FloatSlider(value=1.0…

# Identical Dataset (For Testing that no split is generated)

In [3]:
alpha_slider = widgets.FloatSlider(
    value=0.95,
    min=0,
    max=1,
    step=0.05,
    description='alpha:',
    orientation='horizontal',
    readout=True,
    readout_format='.2f',
)
lam_slider = widgets.FloatSlider(
    value=1,
    min=0,
    max=1.0,
    step=0.05,
    description='lambda:',
    orientation='horizontal',
    readout_format='.2f',
    readout=True,
)

dropdown = widgets.Dropdown(
    options=[("version 1", v1), ("version 2", v2), ("version 2.2", v2_2)],
    description='Discretizer Verion:',
    disabled=False,
)

@widgets.interact(alpha=alpha_slider, lam=lam_slider, selector=dropdown)
def f(alpha, lam, selector):
    FUSINTERDiscretizer = selector
    X = np.tile(np.random.randn(20),2)
    y = np.repeat([0,1], 20)
    discretizer = FUSINTERDiscretizer(X, y)
    final_splits = discretizer.apply(alpha=alpha, lam=lam)
    display("final splits", final_splits)

    fig, ax = plt.subplots(1)
    fig.set_size_inches(15, 5)
    sns.stripplot(y=X, x=y.astype(int), ax=ax)
    ax.hlines(final_splits, xmin=-1, xmax=3, color="red")

    ax.set_title("Fusinter Splits for Identical Normal Distributed Dataset")
    ax.set_xlabel("class")
    ax.set_ylabel("value")

interactive(children=(FloatSlider(value=0.95, description='alpha:', max=1.0, step=0.05), FloatSlider(value=1.0…