In [None]:
from efficient_probit_regression.datasets import Covertype, BaseDataset, KDDCup, Webspam

import numpy as np
import pandas as pd

import plotly.express as px

In [None]:
def make_residual_df(dataset: BaseDataset, p_list: list):
    X, y = dataset.get_X(), dataset.get_y()

    df_list = []
    for p in p_list:
        cur_beta = dataset.get_beta_opt(p=p)
        cur_residuals = X @ cur_beta
        residuals_false = cur_residuals[cur_residuals * y < 0]
        cur_df = pd.DataFrame()
        cur_df["residual"] = residuals_false
        cur_df["p"] = p
        df_list.append(cur_df)

    df = pd.concat(df_list, ignore_index=True)

    return df

In [None]:
df = make_residual_df(Covertype(), p_list=[1, 1.5, 2, 5])
fig = px.box(df, y="residual", color="p", title="Covertype")
fig.write_html("residuals_covertype.html")
fig.show()

In [None]:
df = make_residual_df(KDDCup(), p_list=[1, 1.5, 2, 5])
fig = px.box(df, y="residual", color="p", title="KDDCup")
fig.write_html("residuals_kddcup.html")
fig.show()

In [None]:
df = make_residual_df(Webspam(), p_list=[1, 1.5, 2, 5])
fig = px.box(df, y="residual", color="p", title="Webspam")
fig.write_html("residuals_webspam.html")
fig.show()