In [2]:
from sklearn.utils import Bunch
import numpy as np

data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
target = np.array([1, 0, 1])
feature_names = ['feature1', 'feature2', 'feature3']
target_names = ['class0', 'class1']

# Create the Bunch object
my_bunch = Bunch(data=data, target=target, feature_names=feature_names, target_names=target_names)


In [3]:
from sklearn.linear_model import LogisticRegression

# Load the iris dataset
X = my_bunch.data
y = my_bunch.target

# Fit a model
model = LogisticRegression()
model.fit(X, y)
y_pred = model.predict(X)
print(y_pred)
y_pred_proba = model.predict_proba(X)[:, 1]
print(y_pred_proba)

[1 1 1]
[0.66665234 0.66666141 0.66667048]


In [49]:
import pandas as pd
data_list = [[1.1, 1.3, 1.1], [0.1, 0.2, 0.1], [1.2, 1.05, 1]]
target_list = [1, 0, 1] 
data_array = np.array(data_list)
target_array = np.array(target_list)
data_frame = pd.DataFrame(data_list, columns=['feature1', 'feature2', 'feature3'])
data_frame['target'] = target_list


# Fit a model use the list
logit = LogisticRegression()
logit.fit(data_list, target_list)
print(logit.predict(data_list))
print(logit.predict_proba(data_list)[:, 1])

# Fit a model use the array
logit1 = LogisticRegression()
logit1.fit(data_array, target_array)
print(logit1.predict(data_array))
print(logit1.predict_proba(data_array)[:, 1])

# Fit a model use the DataFrame
logit2 = LogisticRegression()
logit2.fit(data_frame[['feature1', 'feature2', 'feature3']], data_frame['target'])
print(logit2.predict(data_frame[['feature1', 'feature2', 'feature3']]))
print(logit2.predict_proba(data_frame[['feature1', 'feature2', 'feature3']])[:, 1])

coef2 = logit2.coef_[0]
intercept2 = logit2.intercept_[0]
print(coef2, intercept2)

[1 0 1]
[0.77885766 0.46121674 0.75987592]
[1 0 1]
[0.77885766 0.46121674 0.75987592]
[1 0 1]
[0.77885766 0.46121674 0.75987592]
[0.4852305  0.447298   0.43720871] -0.33714882400900953


In [53]:
# Calculate the standard errors of the coefficients
# The covariance matrix of the coefficients is the inverse of the Hessian matrix
from scipy import stats

X_train = data_frame[['feature1', 'feature2', 'feature3']]
X_train = np.hstack((X_train, np.ones((X_train.shape[0], 1))))
cov_matrix = np.linalg.inv(np.dot(X_train.T, X_train))
standard_errors = np.sqrt(np.diag(cov_matrix))

# Perform the Wald test
wald_statistics = (np.append(coef2, intercept2) / standard_errors) ** 2
p_values = stats.chi2.sf(wald_statistics, df=1)

# Create a DataFrame to display the results
results = pd.DataFrame({
    'Coefficient': np.append(coef2, intercept2),
    'Standard Error': standard_errors, 
    'Wald Statistic': wald_statistics, 
    'p-value': p_values
}, index=['feature1', 'feature2', 'feature3', 'intercept'])

print(results)

           Coefficient  Standard Error  Wald Statistic  p-value
feature1      0.485231    2.002408e+07    5.872067e-16      1.0
feature2      0.447298    2.860583e+07    2.445034e-16      1.0
feature3      0.437209    5.149049e+07    7.209805e-17      1.0
intercept    -0.337149    2.574525e+06    1.714941e-14      1.0
