In [3]:
from sklearn.datasets import make_classification
import numpy as np
from sklearn.model_selection import train_test_split

In [6]:
X,y = make_classification(
    n_samples=1000,
    n_classes=2,
    n_features=10,
    n_informative=8,
    n_redundant=2,
    n_repeated=0,
    weights = [0.9, 0.1],
    random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
from collections import Counter
Counter(y)

Counter({0: 897, 1: 103})

In [13]:
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()

kf = KFold(n_splits=5,shuffle=True,random_state=42)
scores = []
for train_ind,test_ind in kf.split(X,y):
    X_train,X_test = X[train_ind],X[test_ind]
    y_train,y_test = y[train_ind],y[test_ind]
    lr.fit(X_train,y_train)
    print(Counter(y_test))
    scores.append(lr.score(X_test,y_test))
print(np.mean(scores))
    

Counter({0: 177, 1: 23})
Counter({0: 179, 1: 21})
Counter({0: 183, 1: 17})
Counter({0: 181, 1: 19})
Counter({0: 177, 1: 23})
0.897


In [14]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5,shuffle=True,random_state=42)
lr = LogisticRegression()

scores = []
for train_ind,test_ind in skf.split(X,y):
    X_train,X_test = X[train_ind],X[test_ind]
    y_train,y_test = y[train_ind],y[test_ind]
    lr.fit(X_train,y_train)
    print(Counter(y_test))
    scores.append(lr.score(X_test,y_test))
print(np.mean(scores))

Counter({0: 180, 1: 20})
Counter({0: 180, 1: 20})
Counter({0: 179, 1: 21})
Counter({0: 179, 1: 21})
Counter({0: 179, 1: 21})
0.9019999999999999


### Cross Validation on Logistic Regression

In [15]:
from sklearn.model_selection import cross_val_score

cross_val_score(LogisticRegression(),X,y,cv=skf)

array([0.915, 0.91 , 0.895, 0.895, 0.895])

### Cross Validation on Decision Tree

In [16]:
from sklearn.tree import DecisionTreeClassifier

cross_val_score(DecisionTreeClassifier(),X,y,cv=skf)

array([0.9  , 0.915, 0.89 , 0.85 , 0.9  ])

### Cross Validation on RandomForest Classifier

In [18]:
from sklearn.ensemble import RandomForestClassifier

cross_val_score(RandomForestClassifier(n_estimators=50),X,y,cv=skf)

array([0.92 , 0.915, 0.915, 0.91 , 0.92 ])

### Cross Validation on XGBoost

In [20]:
from xgboost import XGBClassifier

cross_val_score(XGBClassifier(n_estimators=50),X,y,cv=skf)

array([0.93 , 0.915, 0.935, 0.92 , 0.92 ])