# Matplotlib & Seaborn

**Matplotlib** es la biblioteca de visualización más básica y flexible en Python.

**Seaborn** es una biblioteca de alto nivel para crear gráficos estadísticos atractivos y fáciles de entender, que permite visualizar distribuciones, relaciones y patrones en datos con muy poco código.

In [None]:
import numpy as np
from optbinning import OptimalBinning
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import seaborn as sns

# import warnings
# warnings.filterwarnings("ignore", category=FutureWarning)

## Paleta de colores

In [None]:
# Discreta
custom_color_discrete = ListedColormap(["#1934e3", "#f0aa29", "#f02c29"])
custom_color_discrete

In [None]:
# Continuous
custom_color_continuous = LinearSegmentedColormap.from_list(
    "blue_orange_red", [(0, "#1934e3"), (0.5, "#f0aa29"), (1.0, "#f02c29")]
)
custom_color_continuous

## Theme

In [None]:
# Se customizan algunos parámetros
custom_rc = {
    "axes.titlesize": 18,
    "axes.spines.bottom": True,
    "axes.spines.left": False,
    "axes.spines.right": False,
    "axes.spines.top": False,
    "xtick.bottom": False,
    "ytick.left": False,
    "ytick.right": False,
    "xtick.labelsize": 14,
    "ytick.labelsize": 14,
}

# Se establece el theme
sns.set_theme(
    context="paper",
    style="white",
    palette=custom_color_discrete.colors,
    # palette=[custom_color_continuous(i) for i in np.linspace(0., 1., nu_colors)],
    rc=custom_rc,
)

In [None]:
# Para observar parametros personalizables
print(plt.rcParams)

## Pintar graficos

In [None]:
# Simulamos datos
N = 1000
np.random.seed(2025)
x1 = np.random.normal(2, 3, (N, 1))
x2 = np.random.normal(-3, 4, (N, 1))
noi1 = np.random.normal(0, 1, (N, 1))
y = 1 / (1 + np.exp(-(1 + 5 * x1 - 3 * x2 + noi1)))
y = np.where(y > 0.5, 1, 0).astype(np.int64)

df = pd.DataFrame(np.hstack([x1, x2, y]), columns=["X1", "X2", "Y"])
df.head()

In [None]:
dict_tends = {"X1": "ascending", "X2": "descending"}

In [None]:
for c in df.filter(regex=r".*\d+$").columns:
    print(c)

    opt_bin = OptimalBinning(
        name=c,
        dtype="numerical",
        solver="cp",
        monotonic_trend=dict_tends[c],
        min_prebin_size=0.05,
        divergence="iv",
        min_n_bins=3,
        max_n_bins=5,
        min_bin_size=0.05,
    )

    opt_bin.fit(df[c], df["Y"])
    print(opt_bin.splits)
    # print(opt_bin.binning_table.build())

    df[rf"{c}_CAT"] = None
    df.loc[df.loc[:, c].isnull(), rf"{c}_CAT"] = "00.MISSING"
    for i, limite in enumerate(opt_bin.splits):
        df.loc[
            ((df.loc[:, rf"{c}_CAT"].isna()) & (df.loc[:, c] < limite)), rf"{c}_CAT"
        ] = rf"{i + 1: 02d}.<={np.round(limite, 4)}"
    else:
        df.loc[
            ((df.loc[:, rf"{c}_CAT"].isnull()) & (df.loc[:, c] > limite)), rf"{c}_CAT"
        ] = rf"{i + 1: 02d}.>{np.round(limite, 4)}"

In [None]:
target = "Y"
fig, axs = plt.subplots(2, 1, figsize=(10, 5), constrained_layout=True)

for i, c in enumerate(df.filter(regex=".*_CAT$").columns):
    print(c)

    axs[i].set_title(rf"Bivariado {c}")

    df_plot = df.groupby([c], as_index=False).agg(
        COUNT=(c, "count"), RD=(target, "mean")
    )

    barplot = sns.barplot(
        x=df_plot[c],
        y=df_plot["COUNT"],
        ax=axs[i].twinx(),
        color="lightgreen",
    )
    for container in barplot.containers:
        # Añadir etiquetas sobre las barras
        barplot.bar_label(container, padding=3)
    barplot.tick_params(axis="y", which="both", right=False, labelright=True)

    scatter = sns.scatterplot(
        x=df_plot[c],
        y=df_plot["RD"],
        ax=axs[i],
        color="black",
    )
    scatter.set_zorder(10)
    scatter.set_xlabel("")
    scatter.patch.set_visible(False)
    scatter.tick_params(axis="y", which="both", left=False, labelleft=True)


plt.show()
plt.close()