In [2]:
from sklearn.datasets import make_classification
import numpy as np
from sklearn.metrics import classification_report

In [3]:
X,y = make_classification(
    n_features=10,
    n_samples=1000,
    n_classes=2,
    n_informative=8,
    n_redundant=2,
    n_repeated=0,
    random_state=42
)

### Train a Logistic Regression Model
##### Manually using KFold Cross Validation

In [16]:
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
kf = KFold(n_splits=5,shuffle=True,random_state=42)
scores = []

for train_ind,test_ind in kf.split(X,y):
    X_train,X_test = X[train_ind],X[test_ind]
    y_train,y_test = y[train_ind],y[test_ind]

    lr.fit(X_train,y_train)
    scores.append(lr.score(X_test,y_test))
print(scores)
print(f"Avearge Model's Accuracy is {np.mean(scores):.3f}")

[0.675, 0.715, 0.72, 0.645, 0.72]
Avearge Model's Accuracy is 0.695


### Using KFold Cross Validation using cross_val_score

In [20]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(LogisticRegression(),X,y,cv=kf)
print(f"Avearge Model's Accuracy is {np.mean(scores):.3f}")

Avearge Model's Accuracy is 0.695


### Training SVM model

In [21]:
from sklearn.svm import SVC

scores = cross_val_score(SVC(),X,y,cv=kf)
print(f"Avearge Model's Accuracy is {np.mean(scores):.3f}")

Avearge Model's Accuracy is 0.909


### Training Decision Tree Classifier


In [24]:
from sklearn.tree import DecisionTreeClassifier

scores = cross_val_score(DecisionTreeClassifier(),X,y,cv=kf)
print(f"Avearge Model's Accuracy is {np.mean(scores):.3f}")

Avearge Model's Accuracy is 0.788


In [25]:
scores = cross_val_score(DecisionTreeClassifier(criterion="entropy"),X,y,cv=kf)
print(f"Avearge Model's Accuracy is {np.mean(scores):.3f}")

Avearge Model's Accuracy is 0.797


### Training RandomForest Classifier

In [37]:
from sklearn.ensemble import RandomForestClassifier

scores = cross_val_score(RandomForestClassifier(n_estimators=74),X,y,cv=kf) # here by default scoring parameter is "accuracy"
print(f"Avearge Model's Accuracy is {np.mean(scores):.3f}")

Avearge Model's Accuracy is 0.898


### Using Cross Validate to evaluate the model

In [39]:
from sklearn.model_selection import cross_validate

cross_validate(RandomForestClassifier(n_estimators=74),X,y,cv=kf,scoring=["accuracy","roc_auc"])

{'fit_time': array([0.43755865, 0.34976745, 0.34401202, 0.34386015, 0.33845115]),
 'score_time': array([0.08747411, 0.01800537, 0.01909351, 0.02077937, 0.018255  ]),
 'test_accuracy': array([0.87 , 0.915, 0.885, 0.875, 0.89 ]),
 'test_roc_auc': array([0.93175432, 0.95058023, 0.96573629, 0.95522837, 0.96131072])}