In [None]:
from efficient_probit_regression.probit_model import _g
from efficient_probit_regression.datasets import Covertype, KDDCup, Webspam
from efficient_probit_regression import settings

from sklearn.linear_model import LogisticRegression

import plotly.express as px

import numpy as np
import pandas as pd

from scipy import stats

import seaborn as sns

import matplotlib.pyplot as plt

In [None]:
def logistic_loss(x):
    return np.log(1 + np.exp(x))

In [None]:
x = np.arange(-3, 10, 0.01)

df = pd.DataFrame()
df["x"] = x
df["p=1"] = _g(x, p=1)
df["logistic"] = logistic_loss(x)


px.line(df.melt(id_vars=["x"]), x="x", y="value", color="variable")

In [None]:
# dataset = Covertype()
# dataset = KDDCup()
dataset = Webspam()

X, y = dataset.get_X(), dataset.get_y()
X_intercept = np.hstack((np.ones((X.shape[0], 1)), X))

In [None]:
logreg_model = LogisticRegression(penalty="none", fit_intercept=False, max_iter=1000)
logreg_model.fit(X, y)
beta_logreg = logreg_model.coef_[0]
beta_logreg

In [None]:
df_beta = pd.DataFrame()
df_beta["beta-index"] = range(X.shape[1])
df_beta["logistic"] = beta_logreg / np.linalg.norm(beta_logreg)

for p in [1]: #[1, 1.5, 2, 5]:
    cur_beta = dataset.get_beta_opt(p)
    df_beta[f"p={p}"] = cur_beta / np.linalg.norm(cur_beta)

df_beta = df_beta.melt(id_vars=["beta-index"], var_name="method")

# fig = px.line(df_beta, x="beta_index", y="value", color="method", title=f"{dataset.get_name().capitalize()} - Comparison of Normed Coefficients")

# fig.write_html(f"{dataset.get_name()}-compare-p.html")

# fig.show()

plt.rcParams["text.usetex"] = True
plt.rc("font", size=15)

fig, ax = plt.subplots()
sns.lineplot(data=df_beta, x="beta-index", y="value", hue="method", ax=ax)


ax.set_title(f"{dataset.get_name().capitalize()} - Comparison of Coefficients", fontsize=23)

fig.tight_layout()

plt.savefig(settings.PLOTS_DIR / f"{dataset.get_name()}-compare-coefficients.pdf")

fig.show()