In [None]:
from efficient_probit_regression.probit_model import PGeneralizedProbitModel
from efficient_probit_regression.sampling import leverage_score_sampling
from efficient_probit_regression.datasets import Covertype
from efficient_probit_regression import settings

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
dataset = Covertype()
p = 2

beta_opt = dataset.get_beta_opt(p=p)

X_reduced, y_reduced, weights = leverage_score_sampling(
    X = dataset.get_X(), 
    y = dataset.get_y(), 
    sample_size = 15000, 
    augmented = True, 
    online = False, 
    round_up = True, 
    p = p,
    fast_approx= True,
)

model = PGeneralizedProbitModel(p=p, X=X_reduced, y=y_reduced, w=weights)
model.fit()

beta_reduced = model.get_params()

In [None]:
df = pd.DataFrame({"beta_opt": beta_opt, "beta_reduced": beta_reduced}).assign(index = range(len(beta_opt))).melt(id_vars="index", var_name="beta")
df.head()

In [None]:
sns.lineplot(data=df, x="index", y="value", hue="beta")
plt.title("Covertype, size = 15000")
plt.savefig(settings.PLOTS_DIR / "compare_beta.pdf")

In [None]:
np.linalg.norm(beta_reduced - beta_opt, ord=2)

In [None]:
np.linalg.norm(beta_reduced - beta_opt, ord=np.inf)

In [None]:
model_opt = PGeneralizedProbitModel(p=p, X=dataset.get_X(), y=dataset.get_y())
f = lambda beta: model_opt.negative_log_likelihood(beta)

In [None]:
np.abs(f(beta_opt) - f(beta_reduced)) / f(beta_opt)