In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

X, y = load_breast_cancer(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score

model_l1 = LogisticRegression(penalty='l1', solver='saga', C=1.0, max_iter=5000)
model_l1.fit(X_train, y_train)

sfm_l1 = SelectFromModel(model_l1, prefit=True, max_features=5)
feat_l1 = X_train.columns[sfm_l1.get_support()]

model = LogisticRegression(max_iter=5000).fit(X_train[feat_l1], y_train)
print("L1 features:", list(feat_l1))
print("Accuracy (L1):", accuracy_score(y_test, model.predict(X_test[feat_l1])))

L1 features: ['mean perimeter', 'area error', 'worst radius', 'worst perimeter', 'worst area']
Accuracy (L1): 0.9590643274853801


In [3]:
import numpy as np

model_l2 = LogisticRegression(penalty='l2', C=1.0, max_iter=5000)
model_l2.fit(X_train, y_train)

abs_coefs = np.abs(model_l2.coef_)[0]
idx = np.argsort(abs_coefs)[-5:]
feat_l2 = X_train.columns[idx]

model = LogisticRegression(max_iter=5000).fit(X_train[feat_l2], y_train)
print("L2 features:", list(feat_l2))
print("Accuracy (L2):", accuracy_score(y_test, model.predict(X_test[feat_l2])))


L2 features: ['worst symmetry', 'worst compactness', 'mean radius', 'worst concavity', 'texture error']
Accuracy (L2): 0.9532163742690059


In [5]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNetCV, LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score

# 1. Standardize the data
scaler = StandardScaler().fit(X_train)
X_tr_s = scaler.transform(X_train)
X_ts_s = scaler.transform(X_test)

# 2. Fit ElasticNetCV (with high max_iter to avoid warnings)
enet_cv = ElasticNetCV(l1_ratio=[.1, .5, .9], cv=5, random_state=0, max_iter=10000)
enet_cv.fit(X_tr_s, y_train)

# 3. Use best model with fixed l1_ratio for SelectFromModel
best_l1 = enet_cv.l1_ratio_
enet = ElasticNetCV(l1_ratio=best_l1, cv=5, random_state=0, max_iter=10000)
enet.fit(X_tr_s, y_train)

# 4. Select top 5 features using model coefficients
sfm_enet = SelectFromModel(enet, prefit=True, max_features=5, threshold=-np.inf)
selected_mask = sfm_enet.get_support()
feat_enet = X_train.columns[selected_mask]

# 5. Train final model on selected features
model = LogisticRegression(max_iter=5000)
model.fit(X_tr_s[:, selected_mask], y_train)

# 6. Evaluate
y_pred = model.predict(X_ts_s[:, selected_mask])
acc = accuracy_score(y_test, y_pred)

print("Elastic Net Selected Features:", list(feat_enet))
print(f"Accuracy (ElasticNet): {acc:.4f}")


Elastic Net Selected Features: ['mean compactness', 'mean concave points', 'radius error', 'worst radius', 'worst area']
Accuracy (ElasticNet): 0.9415


In [6]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNetCV, LogisticRegression
from sklearn.metrics import accuracy_score

# 1. Standardize the data
scaler = StandardScaler().fit(X_train)
X_tr_s = scaler.transform(X_train)
X_ts_s = scaler.transform(X_test)

# 2. Fit ElasticNetCV with increased iterations
enet = ElasticNetCV(
    l1_ratio=[.1, .5, .9],
    cv=5,
    max_iter=10000,
    tol=1e-4,
    random_state=0
)
enet.fit(X_tr_s, y_train)

# 3. Manually select the 5 nonzero coefficients with largest magnitude
coefs = enet.coef_  # shape (n_features,)
# Zero out truly zero (or near-zero) coefficients first
nonzero_idxs = np.where(np.abs(coefs) > 1e-8)[0]
# Sort those by absolute value descending
sorted_nonzero = nonzero_idxs[np.argsort(-np.abs(coefs[nonzero_idxs]))]
top5_idxs = sorted_nonzero[:5]

feat_enet = X_train.columns[top5_idxs]

# 4. Retrain Logistic Regression on selected features
model = LogisticRegression(max_iter=5000).fit(
    X_tr_s[:, top5_idxs],
    y_train
)

# 5. Evaluate on test data
accuracy = accuracy_score(
    y_test,
    model.predict(X_ts_s[:, top5_idxs])
)

print("Elastic Net features:", list(feat_enet))
print("Accuracy (ElasticNet):", accuracy)

Elastic Net features: ['worst radius', 'worst area', 'mean concave points', 'mean compactness', 'radius error']
Accuracy (ElasticNet): 0.9415204678362573
