## Logistic Regression Demonstration

Using `regression-inference` package

In [1]:
from regression_inference import LogisticRegression, summary

In [2]:
import numpy as np
import pandas as pd

In [3]:
#Â© Copyright 2007 - 2025, scikit-learn developers (BSD License).
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer(as_frame = True).frame

### Model Fitting

- Fit the Logistic Regression on the training set

In [4]:
data['const'] = np.ones(len(data))

features = data[[
    'const', 'mean radius', 'mean texture', 'mean perimeter', 'mean area',
    'target'
]].dropna()

X = features.drop(columns=['target'])
y = features['target']

logit_m = LogisticRegression().fit(X=X, y=y, cov_type=None, alpha=0.05)

In [5]:
# Printing the fitted model calls summary(model)

print(logit_m)
#print(summary(logit_m))

Logistic Regression Results
---------------------------------------------
Dependent:                             target
---------------------------------------------
 
const                                 -1.7729
                                     (6.8701)
 
mean radius                         9.4287***
                                     (1.6396)
 
mean texture                       -0.2376***
                                     (0.0460)
 
mean perimeter                     -1.1507***
                                     (0.1644)
 
mean area                          -0.0328***
                                     (0.0118)

---------------------------------------------
Accuracy                                0.919
Pseudo R-squared                        0.719
LR Statistic                          540.073
Observations                          569.000
Log Likelihood                       -105.683
Null Log Likelihood                  -375.720
Deviance                              211

### Model Predictions


In [7]:
logit_m.feature_names[1:]

Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area'], dtype='object')

In [8]:
# All predictions are in order of model.feature_names[1:]

logit_m.predict( X = [12.45, 15.7, 82.57, 477.1] )

array(0.77558695)

In [9]:
# Predict new values with inference
# return_table requires 2d array

prediction = logit_m.predict(X = [ [12.45, 15.7, 82.57, 477.1] ], return_table = True )

pd.DataFrame(prediction)

Unnamed: 0,features,prediction_prob,prediction_class,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"{'mean radius': '12.45', 'mean texture': '15.7...",0.7756,1,0.3655,3.3926,0.001,0.628,0.8762


In [10]:
prediction_set = [
     [[12.45, 15.7, 82.57, 477.1]],
     [[16.02, 23.24, 102.7, 797.8]],
] 

predictions = pd.concat(
    [pd.DataFrame(logit_m.predict(X = pred, return_table=True)) for pred in prediction_set]
)

predictions

Unnamed: 0,features,prediction_prob,prediction_class,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"{'mean radius': '12.45', 'mean texture': '15.7...",0.7756,1,0.3655,3.3926,0.001,0.628,0.8762
0,"{'mean radius': '16.02', 'mean texture': '23.2...",0.3629,0,0.3947,-1.4263,0.154,0.2081,0.5525


In [11]:
# Predict at the sample mean
  
sample_mean = (
    [X[i].mean() for i in list(logit_m.feature_names[1:])] # Preserves ordering
) 

prediction_set = [[sample_mean]] 

predictions = pd.concat(
    [pd.DataFrame(logit_m.predict(X = pred, return_table=True)) for pred in prediction_set]
)

predictions

Unnamed: 0,features,prediction_prob,prediction_class,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"{'mean radius': '14.13', 'mean texture': '19.2...",0.3919,0,0.4191,-1.0482,0.295,0.2209,0.5944


In [12]:
'''
Predict increments of 'mean_radius' holding all else at the sample mean

Maintain order of lm.feature_names[1:], ie, ['mean radius', 'mean texture', 'mean perimeter', 'mean area']

For prediction example where the incremental feature is not first, see tests/linear_regression_example.ipynb
'''

prev_names, post_names = None, ['mean texture', 'mean perimeter', 'mean area']

mean_prev, mean_post = None, [X[i].mean() for i in post_names]


prediction_range = np.linspace(
    X['mean radius'].min(),
    X['mean radius'].max(),
    30                          # Number of predictions 
)

prediction_set = [
    [ [i] + mean_post]
    for i in prediction_range  
] 

predictions = pd.concat(
    [pd.DataFrame(logit_m.predict(X = pred, return_table=True)) for pred in prediction_set]
)

predictions.tail()

Unnamed: 0,features,prediction_prob,prediction_class,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"{'mean radius': '25.20', 'mean texture': '19.2...",1.0,1,17.8493,5.8222,0.0,1.0,1.0
0,"{'mean radius': '25.92', 'mean texture': '19.2...",1.0,1,19.0437,5.8177,0.0,1.0,1.0
0,"{'mean radius': '26.65', 'mean texture': '19.2...",1.0,1,20.2381,5.8138,0.0,1.0,1.0
0,"{'mean radius': '27.38', 'mean texture': '19.2...",1.0,1,21.4326,5.8103,0.0,1.0,1.0
0,"{'mean radius': '28.11', 'mean texture': '19.2...",1.0,1,22.6271,5.8072,0.0,1.0,1.0


### Coefficient Inference Table

- Comprehensive regression inference

In [13]:
pd.DataFrame(logit_m.inference_table())

Unnamed: 0,feature,coefficient,std_error,z_statistic,P>|t|,ci_low_0.05,ci_high_0.05
0,const,-1.7729,6.8701,-0.2581,0.796,-15.2381,11.6923
1,mean radius,9.4287,1.6396,5.7507,0.0,6.2152,12.6423
2,mean texture,-0.2376,0.046,-5.1622,0.0,-0.3278,-0.1474
3,mean perimeter,-1.1507,0.1644,-7.0009,0.0,-1.4728,-0.8285
4,mean area,-0.0328,0.0118,-2.7713,0.006,-0.0559,-0.0096


### Variance Inflation Factor

- Generate a VIF table on the models features

In [14]:
pd.DataFrame(logit_m.variance_inflation_factor())

Unnamed: 0,feature,VIF
0,mean radius,254.2695
1,mean texture,1.1294
2,mean perimeter,239.4913
3,mean area,40.4028


### Robust Covariance (Preview)

- Preview the effect of robust covariances without applying to the model

In [15]:
pd.DataFrame(logit_m.robust_se(type="HC0"))

Unnamed: 0,feature,robust_se,robust_z,robust_p,ci_low_0.05,ci_high_0.05
0,const,5.267778,-0.336557,0.736451,-12.097563,8.551749
1,mean radius,1.751898,5.382013,7.365753e-08,5.995081,12.862395
2,mean texture,0.042113,-5.64224,1.678523e-08,-0.320149,-0.15507
3,mean perimeter,0.213494,-5.389642,7.0598e-08,-1.569096,-0.732215
4,mean area,0.009432,-3.474196,0.0005123872,-0.051257,-0.014283


In [16]:
pd.DataFrame(logit_m.robust_se(type="HC1"))

Unnamed: 0,feature,robust_se,robust_z,robust_p,ci_low_0.05,ci_high_0.05
0,const,5.291077,-0.335075,0.7375686,-12.143227,8.597413
1,mean radius,1.759646,5.358314,8.400224e-08,5.979894,12.877582
2,mean texture,0.042299,-5.617395,1.938582e-08,-0.320514,-0.154705
3,mean perimeter,0.214438,-5.36591,8.054215e-08,-1.570947,-0.730365
4,mean area,0.009474,-3.458898,0.0005423912,-0.051339,-0.014201


In [17]:
pd.DataFrame(logit_m.robust_se(type="HC2"))

Unnamed: 0,feature,robust_se,robust_z,robust_p,ci_low_0.05,ci_high_0.05
0,const,5.423556,-0.32689,0.743751,-12.402881,8.857067
1,mean radius,1.768828,5.330502,9.794188e-08,5.9619,12.895576
2,mean texture,0.042773,-5.555161,2.773568e-08,-0.321443,-0.153777
3,mean perimeter,0.215067,-5.350222,8.784617e-08,-1.572179,-0.729132
4,mean area,0.009721,-3.371166,0.0007485065,-0.051822,-0.013718


In [18]:
pd.DataFrame(logit_m.robust_se(type="HC3"))

Unnamed: 0,feature,robust_se,robust_z,robust_p,ci_low_0.05,ci_high_0.05
0,const,5.591738,-0.317058,0.7511993,-12.732512,9.186698
1,mean radius,1.786633,5.277379,1.310445e-07,5.927002,12.930474
2,mean texture,0.043463,-5.466905,4.579605e-08,-0.322796,-0.152423
3,mean perimeter,0.216685,-5.310282,1.094559e-07,-1.57535,-0.725962
4,mean area,0.010033,-3.266224,0.00108992,-0.052434,-0.013106


### Robust Covariance (Apply on Fit)

- Apply a robust covariance to a model during fit

- Subsequent predictions will be made with the robust covariance

In [19]:
robust_hc0 = LogisticRegression().fit(X=X, y=y, cov_type="HC0", alpha=0.05, target_name="targetHC0")
robust_hc1 = LogisticRegression().fit(X=X, y=y, cov_type="HC1", alpha=0.05, target_name="targetHC1")
robust_hc2 = LogisticRegression().fit(X=X, y=y, cov_type="HC2", alpha=0.05, target_name="targetHC2")
robust_hc3 = LogisticRegression().fit(X=X, y=y, cov_type="HC3", alpha=0.05, target_name="targetHC3")

In [20]:
# Compare to the nonrobust model

print(summary(logit_m, robust_hc0, robust_hc1, robust_hc2, robust_hc3))

Logistic Regression Results
---------------------------------------------------------------------------------------------------------
Dependent:                             target      targetHC0      targetHC1      targetHC2      targetHC3
---------------------------------------------------------------------------------------------------------
 
const                                 -1.7729        -1.7729        -1.7729        -1.7729        -1.7729
                                     (6.8701)       (5.2678)       (5.2911)       (5.4236)       (5.5917)
 
mean radius                         9.4287***      9.4287***      9.4287***      9.4287***      9.4287***
                                     (1.6396)       (1.7519)       (1.7596)       (1.7688)       (1.7866)
 
mean texture                       -0.2376***     -0.2376***     -0.2376***     -0.2376***     -0.2376***
                                     (0.0460)       (0.0421)       (0.0423)       (0.0428)       (0.0435)
 
mean perim