# Convexidad del Modelo Logístico

Demostración práctica de la ventaja de la convexidad (Proposición 2.1) usando el dataset de cáncer de mama de *scikit‑learn*. Se compara la estabilidad de la regresión logística (convexa) con una red neuronal MLP (no convexa).

In [5]:
# --------- Modelo no convexo: MLP -------------
# Convexidad vs. no-convexidad en Breast-Cancer (scikit-learn)
import numpy as np, pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
import statsmodels.api as sm

# 1) Datos con sólo 2 features para visualización sencilla
data = load_breast_cancer(as_frame=True)
df = data.frame
X_raw = df[data.feature_names].values[:, :2]   # 2 primeras columnas
y      = df["target"].values

# Estandarizar + intercepto
X_scaled = StandardScaler().fit_transform(X_raw)
X = np.column_stack([np.ones_like(y), X_scaled])   # (n, 3)  -> β0, β1, β2
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

# ----------  A) Logística con gradiente-descenso (convexa) ----------
def nll(beta, X, y):               # negative log-likelihood
    z = X @ beta
    return np.sum(np.logaddexp(0, z) - y * z)

def grad_nll(beta, X, y):          # gradiente
    z = X @ beta
    p = 1 / (1 + np.exp(-z))
    return X.T @ (p - y)

def gd(beta0, X, y, lr=0.01, max_iter=10_000, tol=1e-6):
    beta = beta0.copy()
    for _ in range(max_iter):
        g = grad_nll(beta, X, y)
        beta -= lr * g
        if np.linalg.norm(g) < tol:
            break
    return beta

betas = []
for seed in range(5):
    rng = np.random.default_rng(seed)
    beta0 = rng.standard_normal(X.shape[1])
    betas.append(gd(beta0, X_train, y_train))
betas = np.vstack(betas)

beta_ref = betas[0]
log_df = pd.DataFrame({
    "seed"       : range(5),
    "β0"         : betas[:, 0],
    "β1"         : betas[:, 1],
    "β2"         : betas[:, 2],
    "‖β−β₀‖₂"    : np.linalg.norm(betas - beta_ref, axis=1),
    "acc_train"  : [accuracy_score(y_train, (X_train @ b > 0).astype(int)) for b in betas],
    "acc_test"   : [accuracy_score(y_test , (X_test  @ b > 0).astype(int)) for b in betas]
})
print("=== Regresión logística (función convexa) ===")
display(log_df)

# ----------  B) MLPClassifier (no convexa) ----------
mlp_rows = []
weights_ref = None
for seed in range(5):
    clf = MLPClassifier(hidden_layer_sizes=(5,),
                        solver="adam", max_iter=600,
                        random_state=seed)
    clf.fit(X_train[:, 1:], y_train)          # sin la columna de intercepto
    # vectorizar todos los pesos
    w = np.concatenate([w.flatten() for w in clf.coefs_] +
                       [b.flatten() for b in clf.intercepts_])
    if weights_ref is None:
        weights_ref = w
    mlp_rows.append({
        "seed"        : seed,
        "loss_final"  : clf.loss_,
        "n_iter"      : clf.n_iter_,
        "‖w‖₂"        : np.linalg.norm(w),
        "‖w−w₀‖₂"     : np.linalg.norm(w - weights_ref),
        "acc_train"   : accuracy_score(y_train, clf.predict(X_train[:, 1:])),
        "acc_test"    : accuracy_score(y_test , clf.predict(X_test [:, 1:])),
    })
mlp_df = pd.DataFrame(mlp_rows)
print("\n=== MLP (no convexa) ===")
display(mlp_df)



=== Regresión logística (función convexa) ===


Unnamed: 0,seed,β0,β1,β2,‖β−β₀‖₂,acc_train,acc_test
0,0,0.706917,-3.37204,-0.968886,0.0,0.892019,0.902098
1,1,0.706917,-3.37204,-0.968886,5.925371e-09,0.892019,0.902098
2,2,0.706917,-3.37204,-0.968886,7.301233e-09,0.892019,0.902098
3,3,0.706917,-3.37204,-0.968886,3.016771e-09,0.892019,0.902098
4,4,0.706917,-3.37204,-0.968886,1.808449e-09,0.892019,0.902098



=== MLP (no convexa) ===


Unnamed: 0,seed,loss_final,n_iter,‖w‖₂,‖w−w₀‖₂,acc_train,acc_test
0,0,0.270136,478,3.719868,0.0,0.894366,0.902098
1,1,0.272342,388,3.677122,4.473173,0.880282,0.895105
2,2,0.277985,321,3.326777,5.784832,0.873239,0.895105
3,3,0.249544,428,4.062318,5.759764,0.901408,0.909091
4,4,0.265755,466,3.889973,5.0216,0.901408,0.902098


## Interpretación

* **Regresión logística (convexa)**  
  - Las 5 corridas convergen al **mismo mínimo global** (‖β − β₀‖≈0).  
  - Accuracy de test idéntica → entrenamiento estable e independiente de la semilla.

* **MLP (no convexa)**  
  - Accuracy varía entre semillas → múltiples mínimos locales, sensibilidad a la inicialización.

Esta evidencia práctica conecta la *convexidad* de la familia exponencial (Proposición 2.1) con beneficios reales en ciencia de datos: **unicidad del MLE, reproducibilidad y optimización fiable**.