In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler

### Preparing Data


In [10]:
#import data
import warnings
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

from sklearn.datasets import load_wine, load_digits, load_breast_cancer 

wine_dataset = load_wine()
digits_dataset = load_digits()
cancer_dataset = load_breast_cancer()

In [3]:
five_kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
ten_kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
twenty_kf = StratifiedKFold(n_splits=20, shuffle=True, random_state=42)

### Testing Models

In [11]:
#import models
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

log_reg = LogisticRegression(max_iter=1000)
nb = GaussianNB()

In [None]:
def calc_scores(model, X, y, kf):
    scores = cross_val_score(model, X, y, cv=kf)
    return scores

### Logistic Regression

In [14]:
print('Accuracy for Logistic Regression on Wine Dataset:')
scores = cross_val_score(log_reg, wine_dataset.data, wine_dataset.target, cv=five_kf, scoring='accuracy')
print(f'Wine Dataset Accuracy KF5: {scores.mean()}')

scores = cross_val_score(log_reg, wine_dataset.data, wine_dataset.target, cv=ten_kf, scoring='accuracy')
print(f'Wine Dataset Accuracy KF10: {scores.mean()}')

scores = cross_val_score(log_reg, wine_dataset.data, wine_dataset.target, cv=twenty_kf, scoring='accuracy')
print(f'Wine Dataset Accuracy KF20: {scores.mean()}')

Accuracy for Logistic Regression on Wine Dataset:
Wine Dataset Accuracy KF5: 0.9496825396825397
Wine Dataset Accuracy KF10: 0.95
Wine Dataset Accuracy KF20: 0.95


In [15]:
print('Accuracy for Logistic Regression on Digits Dataset:')
scores = cross_val_score(log_reg, digits_dataset.data, digits_dataset.target, cv=five_kf, scoring='accuracy')
print(f'digits Dataset Accuracy KF5: {scores.mean()}')

scores = cross_val_score(log_reg, digits_dataset.data, digits_dataset.target, cv=ten_kf, scoring='accuracy')
print(f'digits Dataset Accuracy KF10: {scores.mean()}')

scores = cross_val_score(log_reg, digits_dataset.data, digits_dataset.target, cv=twenty_kf, scoring='accuracy')
print(f'digits Dataset Accuracy KF20: {scores.mean()}')

Accuracy for Logistic Regression on Digits Dataset:
digits Dataset Accuracy KF5: 0.966615598885794
digits Dataset Accuracy KF10: 0.9627281191806331


KeyboardInterrupt: 

In [None]:
print('Accuracy for Logistic Regression on Cancer Dataset:')
scores = cross_val_score(log_reg, cancer_dataset.data, cancer_dataset.target, cv=five_kf, scoring='accuracy')
print(f'cancer Dataset Accuracy KF5: {scores.mean()}')

scores = cross_val_score(log_reg, cancer_dataset.data, cancer_dataset.target, cv=ten_kf, scoring='accuracy')
print(f'cancer Dataset Accuracy KF10: {scores.mean()}')

scores = cross_val_score(log_reg, cancer_dataset.data, cancer_dataset.target, cv=twenty_kf, scoring='accuracy')
print(f'cancer Dataset Accuracy KF20: {scores.mean()}')

### Naive Bayes

In [None]:
print('Accuracy for Naive Bayes on Wine Dataset:')
scores = cross_val_score(nb, wine_dataset.data, wine_dataset.target, cv=five_kf, scoring='accuracy')
print(f'Wine Dataset Accuracy KF5: {scores.mean()}')

scores = cross_val_score(nb, wine_dataset.data, wine_dataset.target, cv=ten_kf, scoring='accuracy')
print(f'Wine Dataset Accuracy KF10: {scores.mean()}')

scores = cross_val_score(nb, wine_dataset.data, wine_dataset.target, cv=twenty_kf, scoring='accuracy')
print(f'Wine Dataset Accuracy KF20: {scores.mean()}')