In [24]:
# import necessary libraries
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# load the breast cancer dataset
data = load_breast_cancer()

# split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=0)
# stratify the data (when classes are unbalanced)
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=0, stratify=data.target)

# scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# create the logistic regression classifier with ridge regularization
clf = LogisticRegression(penalty='l2', solver='lbfgs', C=1.0)

# fit the classifier to the training data
clf.fit(X_train, y_train)

# evaluate the classifier on the testing data
score = clf.score(X_test, y_test)
print("Accuracy: {:.2f}%".format(score * 100))


Accuracy: 98.25%


In [25]:
y_pred = clf.predict(X_test)

# evaluate performance
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(accuracy * 100))

Accuracy: 98.25%


In [26]:
data.data.shape

(569, 30)

In [27]:
data.target.shape

(569,)

In [28]:
print(X_train.shape)  # 30 features
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(455, 30)
(114, 30)
(455,)
(114,)


In [29]:
# a list of class labels
clf.classes_

array([0, 1])

In [30]:
# coefficients of each feature
clf.coef_

array([[-0.5326682 , -0.43264086, -0.48207258, -0.57327155, -0.12763767,
         0.45207129, -0.74207231, -0.96572449,  0.09879843,  0.43493673,
        -1.33479445, -0.01949011, -0.64071991, -0.84860967, -0.19969471,
         0.65257659,  0.07910152, -0.44696514,  0.42360067,  0.63203887,
        -0.98922283, -0.97626767, -0.79922226, -0.87625624, -0.74027249,
        -0.1818895 , -0.76610672, -0.84648921, -0.77219306, -0.60784768]])