In [1]:
from sklearn.utils import Bunch
import numpy as np

data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
target = np.array([1, 0, 1])
feature_names = ['feature1', 'feature2', 'feature3']
target_names = ['class0', 'class1']

# Create the Bunch object
my_bunch = Bunch(data=data, target=target, feature_names=feature_names, target_names=target_names)


In [2]:
from sklearn.linear_model import LogisticRegression

# Load the iris dataset
X = my_bunch.data
y = my_bunch.target

# Fit a model
model = LogisticRegression()
model.fit(X, y)
y_pred = model.predict(X)
print(y_pred)
y_pred_proba = model.predict_proba(X)[:, 1]
print(y_pred_proba)

[1 1 1]
[0.66665234 0.66666141 0.66667048]


In [12]:
import pandas as pd
data_list = [[1.1, 1.12, 1.1], [0.1, 0.1, 0.1], [1.11, 1.13, 1.15]]
target_list = [1, 0, 1] 
data_array = np.array(data_list)
target_array = np.array(target_list)
data_frame = pd.DataFrame(data_list, columns=['feature1', 'feature2', 'feature3'])
data_frame['target'] = target_list


# Fit a model use the list
logit = LogisticRegression()
logit.fit(data_list, target_list)
print(logit.predict(data_list))
print(logit.predict_proba(data_list)[:, 1])

# Fit a model use the array
logit1 = LogisticRegression()
logit1.fit(data_array, target_array)
print(logit1.predict(data_array))
print(logit1.predict_proba(data_array)[:, 1])

# Fit a model use the DataFrame
logit2 = LogisticRegression()
logit2.fit(data_frame[['feature1', 'feature2', 'feature3']], data_frame['target'])
print(logit2.predict(data_frame[['feature1', 'feature2', 'feature3']]))
print(logit2.predict_proba(data_frame[['feature1', 'feature2', 'feature3']])[:, 1])

coef2 = logit2.coef_[0]
intercept2 = logit2.intercept_[0]
print(coef2, intercept2)

[1 0 1]
[0.77027068 0.45375714 0.77596418]
[1 0 1]
[0.77027068 0.45375714 0.77596418]
[1 0 1]
[0.77027068 0.45375714 0.77596418]
[0.45592253 0.46512163 0.46499517] -0.3241054672448133


In [13]:
# Calculate the standard errors of the coefficients
# The covariance matrix of the coefficients is the inverse of the Hessian matrix
from scipy import stats

X_train = data_frame[['feature1', 'feature2', 'feature3']]
X_train = np.hstack((X_train, np.ones((X_train.shape[0], 1))))
cov_matrix = np.linalg.inv(np.dot(X_train.T, X_train))
standard_errors = np.sqrt(np.diag(cov_matrix))

# Perform the Wald test
wald_statistics = (np.append(coef2, intercept2) / standard_errors) ** 2
p_values = stats.chi2.sf(wald_statistics, df=1)

# Create a DataFrame to display the results
results = pd.DataFrame({
    'Coefficient': np.append(coef2, intercept2),
    'Standard Error': standard_errors, 
    'Wald Statistic': wald_statistics, 
    'p-value': p_values
}, index=['feature1', 'feature2', 'feature3', 'intercept'])

print(results)

           Coefficient  Standard Error  Wald Statistic   p-value
feature1      0.455923    5.188842e+07    7.720425e-17  1.000000
feature2      0.465122    5.062285e+07    8.441894e-17  1.000000
feature3      0.464995    2.531143e+05    3.374922e-12  0.999999
intercept    -0.324105    1.012457e+05    1.024754e-11  0.999997


In [24]:
from skorecard.linear_model import LogisticRegression as skorecard_logit
sklogit = skorecard_logit(calculate_stats=True)
sklogit.fit(data_frame[['feature1', 'feature2', 'feature3']], data_frame['target'])
print(sklogit.coef_)
print(sklogit.intercept_)
sklogit.get_stats()






[[0.45592253 0.46512163 0.46499517]]
[-0.32410547]






Unnamed: 0,Coef.,Std.Err,z,P>|z|
const,-0.324105,279846.2,-1.158156e-06,0.999999
feature1,0.455923,143421200.0,3.178906e-09,1.0
feature2,0.465122,139923100.0,3.324123e-09,1.0
feature3,0.464995,699615.6,6.646438e-07,0.999999


In [57]:
Array1 = np.array([[1,2,3],[9,19,25]])
print(Array1.shape[0])
print(Array1.shape[1])
print(np.ones((Array1.shape[1], 1)))
Array2 = np.hstack((Array1, np.ones((Array1.shape[0], 1))))
Array3 = np.dot(Array2.T, Array2)
print(Array3)
print(np.linalg.inv(Array3))
print(np.sqrt(np.diag(np.linalg.inv(Array3))))


2
3
[[1.]
 [1.]
 [1.]]
[[ 82. 173. 228.  10.]
 [173. 365. 481.  21.]
 [228. 481. 634.  28.]
 [ 10.  21.  28.   2.]]
[[ 1.66913998e+14 -3.69399833e+13 -3.21514669e+13  3.42036881e+12]
 [ 2.38257967e+18  2.13365343e+17 -1.03126583e+18  2.84487125e+17]
 [-1.84114498e+18 -1.64859787e+17  7.96898921e+17 -2.19832204e+17]
 [ 7.58108705e+17  6.78856147e+16 -3.28132959e+17  9.05189415e+16]]
[1.29195201e+07 4.61914866e+08 8.92691952e+08 3.00863659e+08]
