In [None]:
import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.datasets import make_blobs

import pandas as pd

import numpy as np

from efficient_probit_regression.probit_model import PGeneralizedProbitModel
from efficient_probit_regression import settings

In [None]:
def get_line_y(x: np.ndarray, beta: np.ndarray):
    y = -beta[1] / beta[2] * x - beta[0] / beta[2]
    return y

def get_lines_df(data_X: np.ndarray, data_y: np.ndarray, p_list: list, x_min=-5, x_max=5):
    X_intercept = np.hstack((np.ones((X.shape[0], 1)), X))
    y_pm = 2*y - 1

    x = np.arange(x_min, x_max, 0.01)
    df_list = []
    for p in p_list:
        model = PGeneralizedProbitModel(p=p, X=X_intercept, y=y_pm)
        model.fit()
        beta = model.get_params()
        line_y = get_line_y(x, beta)
        cur_df = pd.DataFrame(columns=["x1", "x2", "p"])
        cur_df["x1"] = x
        cur_df["x2"] = line_y
        cur_df["p"] = p
        df_list.append(cur_df)

    return pd.concat(df_list, ignore_index=True)

In [None]:
centers = np.array([
    [-1, -1], 
    [1, 1],
    [6.5, 4]]
)
X, y = make_blobs(n_samples=[80, 80, 5], n_features=2, centers=centers, cluster_std=[1, 1, 0.5], random_state=1)
df = pd.DataFrame(X, columns=["x1", "x2"])
y = np.where(y==2, 0, y)
df["y"] = y

lines_df = get_lines_df(X, y, p_list = [1, 1.5, 2, 3, 5])

# use TeX for typesetting
plt.rcParams["text.usetex"] = True
plt.rc("font", size=15)

fig, ax = plt.subplots()
sns.scatterplot(data=df, x="x1", y="x2", hue="y", legend=False, ax=ax)

sns.lineplot(data=lines_df, x="x1", y="x2", hue="p", ax=ax, palette="flare")

ax.legend(loc="lower right", title="p")
ax.set_xlabel("$x_1$")
ax.set_ylabel("$x_2$")

ax.set_title("Multiple values of p", fontsize=23)

fig.tight_layout()

plt.savefig(settings.PLOTS_DIR / "2d-example.pdf")

fig.show()