In [19]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [20]:
X, y = make_classification(
    n_samples= 1000,
    n_features= 10,
    n_informative=8,
    n_redundant=2,
    n_repeated=0,
    n_classes=2,
    random_state=42
)

#Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [21]:
from sklearn.metrics import classification_report

model = LogisticRegression()
model.fit(X_train, y_train)


y_pred = model.predict(X_test)
report = classification_report(y_pred, y_test)
print(report)

              precision    recall  f1-score   support

           0       0.65      0.73      0.69       116
           1       0.74      0.66      0.70       134

    accuracy                           0.70       250
   macro avg       0.70      0.70      0.70       250
weighted avg       0.70      0.70      0.70       250



In [22]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)

scores = []

for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    model.fit(X_train, y_train)
    scores.append(model.score(X_test, y_test))
scores

[0.675, 0.715, 0.72, 0.645, 0.72]

### Evaluate Logistic Regression

In [23]:
from sklearn.model_selection import cross_val_score

scores_logistic = cross_val_score(LogisticRegression(), X, y, cv=kf)
np.average(scores_logistic)

np.float64(0.6950000000000001)

### Evaluate Decision Tree

In [24]:
from sklearn.tree import DecisionTreeClassifier

scores_dtree = cross_val_score(DecisionTreeClassifier(), X, y, cv=kf)
np.average(scores_dtree)

np.float64(0.7969999999999999)

### Evaluate XGBoost

In [25]:
from xgboost import XGBClassifier

scores_xgb = cross_val_score(XGBClassifier(), X, y, cv=kf)
np.average(scores_xgb)

np.float64(0.897)

### Evaluate Random Forest

In [26]:
from sklearn.ensemble import RandomForestClassifier

scores_rforest = cross_val_score(RandomForestClassifier(n_estimators=20), X, y, cv=kf)
np.average(scores_rforest)

np.float64(0.8780000000000001)

In [28]:
from sklearn.ensemble import RandomForestClassifier

scores_rforest = cross_val_score(RandomForestClassifier(n_estimators=30), X, y, cv=kf)
np.average(scores_rforest)

np.float64(0.875)

In [29]:
from sklearn.model_selection import cross_validate

cross_validate(DecisionTreeClassifier(), X, y, cv=kf, scoring=['accuracy', 'roc_auc'])

{'fit_time': array([0.00900126, 0.01100373, 0.01100111, 0.01100349, 0.01200461]),
 'score_time': array([0.00600052, 0.00300026, 0.00547576, 0.00400114, 0.00300074]),
 'test_accuracy': array([0.75 , 0.8  , 0.775, 0.825, 0.82 ]),
 'test_roc_auc': array([0.74789241, 0.79851941, 0.77661064, 0.82692308, 0.82175833])}