In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

In [2]:
x, y = make_classification(n_samples=50000, n_features=10, n_informative=2, n_redundant=0, random_state=0, shuffle=False)

In [3]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=7)

In [4]:
clf = LogisticRegression(random_state=7).fit(x_train, y_train)

In [5]:
y_pred = clf.predict_proba(x_test)
y_pred.shape

(5000, 2)

In [6]:
mean_acc = clf.score(x_test, y_test)
print("mean_accuracy:", mean_acc)
print("AUC:", roc_auc_score(y_test, y_pred[:, 1]))

mean_accuracy: 0.881
AUC: 0.9421900426869171


In [7]:
log_reg = sm.Logit(y_train, x_train).fit()
print(log_reg.summary())

Optimization terminated successfully.
         Current function value: 0.313583
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                      y   No. Observations:                45000
Model:                          Logit   Df Residuals:                    44990
Method:                           MLE   Df Model:                            9
Date:                Mon, 28 Mar 2022   Pseudo R-squ.:                  0.5476
Time:                        11:30:30   Log-Likelihood:                -14111.
converged:                       True   LL-Null:                       -31191.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
x1            -0.2179      0.010    -21.370      0.000      -0.238      -0.198
x2             2.3856      0.

In [8]:
yhat = log_reg.predict(x_test)

In [9]:
yhat

array([0.13967642, 0.09271094, 0.9947    , ..., 0.64838355, 0.89829366,
       0.08168349])

In [10]:
clf2 = RandomForestClassifier(max_depth=2, random_state=0)
clf2.fit(x_train, y_train)

RandomForestClassifier(max_depth=2, random_state=0)

In [11]:
rf_predict = clf2.predict_proba(x_test)

In [12]:
mean_acc2 = clf2.score(x_test, y_test)
print("mean_accuracy:", mean_acc2)
print("AUC:", roc_auc_score(y_test, rf_predict[:, 1]))

mean_accuracy: 0.8688
AUC: 0.9465856310703242
