### Synthetic Dataset

In [3]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Create synthetic dataset
X, y = make_classification(
    n_features=10,
    n_samples=1000,
    n_informative=8,
    n_redundant=2,
    n_repeated=0,
    n_classes=2,
    random_state=42
)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

### Model Training: Logistic Regression without  K Fold

In [4]:

# Train logistic regression classifier
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.73      0.65      0.69       130
           1       0.66      0.74      0.70       120

    accuracy                           0.70       250
   macro avg       0.70      0.70      0.70       250
weighted avg       0.70      0.70      0.70       250



### Doing K Folds to data

In [7]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)

scores = []
model = LogisticRegression()
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model.fit(X_train, y_train)
    print(model.score(X_test, y_test))

0.675
0.715
0.72
0.645
0.72


### Model Training: Logistic Regression with K Fold (5)

In [10]:
from sklearn.model_selection import cross_val_score

scores_logistic = cross_val_score(LogisticRegression(), X, y, cv=kf)
print(np.average(scores_logistic))

0.6950000000000001


### Model Training: Decision Tree with K Fold (5)

In [11]:
from sklearn.tree import DecisionTreeClassifier

scores_dt = cross_val_score(DecisionTreeClassifier(), X, y, cv=kf)
print(np.average(scores_dt))

0.7929999999999999


### Model Training: Random Forest with K Fold (5)

In [13]:
from sklearn.ensemble import RandomForestClassifier

scores_rf = cross_val_score(RandomForestClassifier(), X, y, cv=kf, scoring="accuracy")
print(np.average(scores_rf))

0.889


In [17]:
from sklearn .model_selection import cross_validate

cross_validate(DecisionTreeClassifier(), X, y, cv=kf, scoring=["accuracy", 'roc_auc'])

{'fit_time': array([0.04384971, 0.01435041, 0.01470208, 0.01030588, 0.00983787]),
 'score_time': array([0.03448176, 0.00303459, 0.00310755, 0.00286889, 0.00312495]),
 'test_accuracy': array([0.735, 0.805, 0.795, 0.83 , 0.825]),
 'test_roc_auc': array([0.7331393 , 0.80362145, 0.79641857, 0.83173077, 0.82707748])}