### Synthetic Dataset

In [2]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Create synthetic dataset
X, y = make_classification(
    n_features=10,
    n_samples=1000,
    n_informative=8,
    n_redundant=2,
    n_repeated=0,
    n_classes=2,
    weights=[0.9, 0.1],
    random_state=42
)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
from collections import Counter
Counter(y)

Counter({np.int64(0): 897, np.int64(1): 103})

## K Fold

In [8]:
from sklearn.model_selection import  KFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)
for train_index, test_index in kf.split(X, y):
    X_train, y_train = X[train_index], y[train_index]
    y_train, y_test = y[train_index], y[test_index]
    print(Counter(y_test))

Counter({np.int64(0): 177, np.int64(1): 23})
Counter({np.int64(0): 179, np.int64(1): 21})
Counter({np.int64(0): 183, np.int64(1): 17})
Counter({np.int64(0): 181, np.int64(1): 19})
Counter({np.int64(0): 177, np.int64(1): 23})


### Stratified K Fold

In [9]:
from sklearn.model_selection import  StratifiedKFold

stkf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for train_index, test_index in stkf.split(X, y):
    X_train, y_train = X[train_index], y[train_index]
    y_train, y_test = y[train_index], y[test_index]
    print(Counter(y_test))

Counter({np.int64(0): 180, np.int64(1): 20})
Counter({np.int64(0): 180, np.int64(1): 20})
Counter({np.int64(0): 179, np.int64(1): 21})
Counter({np.int64(0): 179, np.int64(1): 21})
Counter({np.int64(0): 179, np.int64(1): 21})


### Logistic Regression 

In [10]:
from sklearn.model_selection import cross_val_score

scores_logistic = cross_val_score(LogisticRegression(), X, y, cv=stkf)
print(np.average(scores_logistic))

0.9019999999999999


### Decision Tree Classifier 

In [11]:
from sklearn.tree import DecisionTreeClassifier

scores_dt = cross_val_score(DecisionTreeClassifier(), X, y, cv=stkf)
print(np.average(scores_dt))

0.8939999999999999


### Random Forest Classifier

In [12]:
from sklearn.ensemble import RandomForestClassifier

scores_rf = cross_val_score(RandomForestClassifier(), X, y, cv=stkf, scoring="accuracy")
print(np.average(scores_rf))

0.917
