In [37]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [27]:
# Load example dataset
data = load_breast_cancer()
data.data.shape

(569, 30)

In [28]:
# split the data for training and testing
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2)
X_train.shape

(455, 30)

In [29]:
# standardize the X_train
standard_scaler_obj = StandardScaler().fit(X_train)
standard_scaler_obj.mean_

array([1.41660835e+01, 1.92574945e+01, 9.22792308e+01, 6.58233407e+02,
       9.66960000e-02, 1.05846571e-01, 9.08695356e-02, 4.95933736e-02,
       1.82161978e-01, 6.29215604e-02, 4.10736923e-01, 1.21290593e+00,
       2.90823077e+00, 4.09620330e+01, 6.98922857e-03, 2.58960396e-02,
       3.27431356e-02, 1.19510769e-02, 2.04615143e-02, 3.86733363e-03,
       1.63199143e+01, 2.56236703e+01, 1.07679912e+02, 8.83643516e+02,
       1.32472747e-01, 2.58369385e-01, 2.76962305e-01, 1.15927015e-01,
       2.90826813e-01, 8.43005275e-02])

In [30]:
standard_scaler_obj.scale_

array([3.53762834e+00, 4.19646946e+00, 2.44895827e+01, 3.56529873e+02,
       1.42463314e-02, 5.40453235e-02, 8.29820066e-02, 3.96506524e-02,
       2.81052329e-02, 6.90902429e-03, 2.85596732e-01, 5.40779133e-01,
       2.10806527e+00, 4.72055274e+01, 2.93654020e-03, 1.85071163e-02,
       3.23883238e-02, 6.20472608e-03, 8.08198686e-03, 2.77769713e-03,
       4.79884614e+00, 6.06756952e+00, 3.36089983e+01, 5.61259957e+02,
       2.32165501e-02, 1.61270618e-01, 2.15813312e-01, 6.64082862e-02,
       6.17800329e-02, 1.83715453e-02])

In [31]:
X_train_standardized = standard_scaler_obj.transform(X_train)
X_test_standardized = standard_scaler_obj.transform(X_test)

In [32]:
# Fit model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_standardized, y_train)

In [35]:
# Predict probabilities
probs = model.predict_proba(X_test_standardized)[:, 1]
probs

array([9.99014253e-01, 9.26286785e-03, 9.57994500e-01, 8.95300954e-01,
       9.97678502e-01, 9.94462292e-01, 9.99866695e-01, 9.99589965e-01,
       4.70960919e-04, 2.77400744e-01, 9.99295681e-01, 2.91783087e-01,
       9.99502497e-01, 9.99985724e-01, 5.93908546e-07, 1.39958737e-07,
       3.65193931e-02, 9.79000360e-01, 9.96240902e-01, 9.99986333e-01,
       1.00672269e-04, 7.92916342e-01, 3.79349340e-03, 9.99958134e-01,
       9.99996347e-01, 3.85533591e-01, 1.08879557e-10, 1.44859395e-12,
       9.99998550e-01, 9.99983245e-01, 9.61031181e-01, 9.86938609e-01,
       1.06351650e-04, 9.98215515e-01, 3.95743632e-02, 5.73329223e-08,
       9.85624339e-01, 5.04067698e-02, 9.99212759e-01, 9.99885219e-01,
       3.48583741e-03, 7.45118352e-01, 9.97207270e-01, 8.83654803e-01,
       5.68272155e-01, 9.99778928e-01, 9.99999193e-01, 9.95714617e-01,
       9.99986887e-01, 9.99008023e-01, 9.82135469e-05, 6.11873203e-04,
       9.99825076e-01, 9.94763067e-01, 6.42622914e-01, 9.99767594e-01,
      

In [36]:
predicted_class_label = model.predict(X_test_standardized)
predicted_class_label

array([1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1,
       0, 0, 1, 0])

In [38]:
accuracy_score(y_test, predicted_class_label)

0.9824561403508771