In [1]:
from sklearn.utils import Bunch
import numpy as np

data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
target = np.array([1, 0, 1])
feature_names = ['feature1', 'feature2', 'feature3']
target_names = ['class0', 'class1']

# Create the Bunch object
my_bunch = Bunch(data=data, target=target, feature_names=feature_names, target_names=target_names)


In [51]:
from sklearn.linear_model import LogisticRegression

# Load the iris dataset
X = my_bunch.data
y = my_bunch.target

# Fit a model
model = LogisticRegression()
model.fit(X, y)
y_pred = model.predict(X)
print(y_pred)
y_pred_proba = model.predict_proba(X)[:, 1]
print(y_pred_proba)

[1 1 1]
[0.66665234 0.66666141 0.66667048]


In [52]:
import pandas as pd
data_list = [[1.1, 1.12, 1.1], [0.13, 0.06, 0.13], [1.11, 1.13, 1.15]]
target_list = [1, 0, 1] 
data_array = np.array(data_list)
target_array = np.array(target_list)
data_frame = pd.DataFrame(data_list, columns=['feature1', 'feature2', 'feature3'])
data_frame['target'] = target_list


# Fit a model use the list
logit = LogisticRegression()
logit.fit(data_list, target_list)
print(logit.predict(data_list))
print(logit.predict_proba(data_list)[:, 1])

# Fit a model use the array
logit1 = LogisticRegression()
logit1.fit(data_array, target_array)
print(logit1.predict(data_array))
print(logit1.predict_proba(data_array)[:, 1])

# Fit a model use the DataFrame
logit2 = LogisticRegression()
logit2.fit(data_frame[['feature1', 'feature2', 'feature3']], data_frame['target'])
print(logit2.predict(data_frame[['feature1', 'feature2', 'feature3']]))
print(logit2.predict_proba(data_frame[['feature1', 'feature2', 'feature3']])[:, 1])

coef2 = logit2.coef_[0]
intercept2 = logit2.intercept_[0]
print(coef2, intercept2)

[1 0 1]
[0.76946551 0.45540594 0.77508227]
[1 0 1]
[0.76946551 0.45540594 0.77508227]
[1 0 1]
[0.76946551 0.45540594 0.77508227]
[0.44398633 0.48498809 0.45298247] -0.324556726081456


In [24]:
# Calculate the standard errors of the coefficients
# The covariance matrix of the coefficients is the inverse of the Hessian matrix
from scipy import stats

X_train = data_frame[['feature1', 'feature2', 'feature3']]
X_train = np.hstack((X_train, np.ones((X_train.shape[0], 1))))
cov_matrix = np.linalg.inv(np.dot(X_train.T, X_train))
standard_errors = np.sqrt(np.diag(cov_matrix))

# Perform the Wald test
wald_statistics = (np.append(coef2, intercept2) / standard_errors) ** 2
p_values = stats.chi2.sf(wald_statistics, df=1)

# Create a DataFrame to display the results
results = pd.DataFrame({
    'Coefficient': np.append(coef2, intercept2),
    'Standard Error': standard_errors, 
    'Wald Statistic': wald_statistics, 
    'p-value': p_values
}, index=['feature1', 'feature2', 'feature3', 'intercept'])

print(results)

           Coefficient  Standard Error  Wald Statistic  p-value
feature1      0.443986             NaN             NaN      NaN
feature2      0.484988             NaN             NaN      NaN
feature3      0.452982             NaN             NaN      NaN
intercept    -0.324557             NaN             NaN      NaN


  standard_errors = np.sqrt(np.diag(cov_matrix))


In [25]:
from skorecard.linear_model import LogisticRegression as skorecard_logit
sklogit = skorecard_logit(calculate_stats=True)
sklogit.fit(data_frame[['feature1', 'feature2', 'feature3']], data_frame['target'])
print(sklogit.coef_)
print(sklogit.intercept_)
sklogit.get_stats()






[[0.44398633 0.48498809 0.45298247]]
[-0.32455673]




Unnamed: 0,Coef.,Std.Err,z,P>|z|
const,-0.324557,33525990.0,-9.680751e-09,1.0
feature1,0.443986,455928200.0,9.738077e-10,1.0
feature2,0.484988,408545300.0,1.18711e-09,1.0
feature3,0.452982,9476567.0,4.780027e-08,1.0


In [20]:
Array1 = np.array([[1,2,3],[9,19,25]])
print(Array1.shape[0])
print(Array1.shape[1])
print(np.ones((Array1.shape[1], 1)))
Array2 = np.hstack((Array1, np.ones((Array1.shape[0], 1))))
Array3 = np.dot(Array2.T, Array2)
print(Array3)
print(np.linalg.inv(Array3))
print(np.sqrt(np.diag(np.linalg.inv(Array3))))


2
3
[[1.]
 [1.]
 [1.]]
[[ 82. 173. 228.  10.]
 [173. 365. 481.  21.]
 [228. 481. 634.  28.]
 [ 10.  21.  28.   2.]]
[[ 1.66913998e+14 -3.69399833e+13 -3.21514669e+13  3.42036881e+12]
 [ 2.38257967e+18  2.13365343e+17 -1.03126583e+18  2.84487125e+17]
 [-1.84114498e+18 -1.64859787e+17  7.96898921e+17 -2.19832204e+17]
 [ 7.58108705e+17  6.78856147e+16 -3.28132959e+17  9.05189415e+16]]
[1.29195201e+07 4.61914866e+08 8.92691952e+08 3.00863659e+08]


In [27]:
import statsmodels.api as sm
smlogit = sm.Logit(data_frame['target'], sm.add_constant(data_frame[['feature1', 'feature2', 'feature3']]))
result = smlogit.fit()
print(result.summary())

         Current function value: 0.000000
         Iterations: 35
                           Logit Regression Results                           
Dep. Variable:                 target   No. Observations:                    3
Model:                          Logit   Df Residuals:                        0
Method:                           MLE   Df Model:                            2
Date:                Sun, 09 Feb 2025   Pseudo R-squ.:                   1.000
Time:                        21:49:51   Log-Likelihood:            -5.4052e-12
converged:                      False   LL-Null:                       -1.9095
Covariance Type:            nonrobust   LLR p-value:                    0.1481
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        -30.9048   3.68e+12  -8.39e-12      1.000   -7.22e+12    7.22e+12
feature1      25.6297   5.01e+13   5.12e-13      1.000   -9.82e+1



In [73]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

# Generate some example data
np.random.seed(0)
n_samples = 100
X = np.random.rand(n_samples, 3)
X = sm.add_constant(X)  # Add intercept term
y = (X[:, 1] + X[:, 2] * 2 + np.random.randn(n_samples) > 1.5).astype(int)

# Create a DataFrame for the independent variables
data_frame = pd.DataFrame(X, columns=['const', 'feature1', 'feature2', 'feature3'])

# Fit a logistic regression model using statsmodels
logit_model = sm.Logit(y, data_frame)
result = logit_model.fit()

# Print the summary of the model
print(result.summary())

# Get the coefficients and intercept
print("Coefficients:", result.params)
print("Standard Errors:", result.bse)
print("P-values:", result.pvalues)

Optimization terminated successfully.
         Current function value: 0.589044
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:                      y   No. Observations:                  100
Model:                          Logit   Df Residuals:                       96
Method:                           MLE   Df Model:                            3
Date:                 So, 09 Feb 2025   Pseudo R-squ.:                  0.1492
Time:                        22:46:18   Log-Likelihood:                -58.904
converged:                       True   LL-Null:                       -69.235
Covariance Type:            nonrobust   LLR p-value:                 0.0001238
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.5450      0.761     -3.344      0.001      -4.037      -1.053
feature1       1.3975      0.

In [69]:
logit5 = LogisticRegression(fit_intercept=True, solver='lbfgs', penalty = None) 
logit5.fit(data_frame[['feature1', 'feature2', 'feature3']], y)
print(logit5.coef_ , logit5.intercept_)


[[1.39545798 3.10830058 0.82937716]] [-2.54370352]


In [71]:
sklogit = skorecard_logit(calculate_stats=True, penalty=None)
sklogit.fit(data_frame[['feature1', 'feature2', 'feature3']], y)
print(sklogit.coef_, sklogit.intercept_)
print(sklogit.get_stats())

[[1.39545798 3.10830058 0.82937716]] [-2.54370352]
             Coef.   Std.Err         z     P>|z|
const    -2.543704  0.760960 -3.342757  0.000830
feature1  1.395458  0.776272  1.797641  0.072234
feature2  3.108301  0.826957  3.758720  0.000171
feature3  0.829377  0.832969  0.995688  0.319402


