In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from sklearn.datasets import load_breast_cancer

from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC 
from sklearn.ensemble import VotingClassifier

In [2]:
breast_cancer = load_breast_cancer()

X = breast_cancer.data
Y = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3)
print("Size of Training Data: ", X_train.shape, y_train.shape)
print("Size of Testing Data: ", X_test.shape, y_test.shape)

Size of Training Data:  (398, 30) (398,)
Size of Testing Data:  (171, 30) (171,)


# Training data of KFold & Testing data of hold out used (30%)

In [3]:
CV = KFold(n_splits = 3)
fold = 1
algo = ["Naive Bayes", "Decision Tree", "SVM"]
accuracy = []

for train_index, test_index in CV.split(X):
    
    X_train, y_train = X[train_index], Y[train_index]
    
    if (fold == 1): 
        model1 = GaussianNB()
        model1.fit(X_train, y_train)
        y_pred = model1.predict(X_test)
        
    if (fold == 2): 
        model2 = DecisionTreeClassifier() 
        model2.fit(X_train, y_train)
        y_pred = model2.predict(X_test)
    
    if (fold == 3): 
        model3 = SVC(kernel='linear', C = 1)
        model3.fit(X_train, y_train)
        y_pred = model3.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    accuracy.append(acc)
    print(f"Fold {fold}: Accuracy using {algo[fold-1]}: {acc}")
    fold += 1
    
ensemble = VotingClassifier(estimators=[('gnb', model1), ('dt', model2), ('svm', model3)], voting='hard')
ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)
final_acc = accuracy_score(y_test, y_pred)
print(f"Accuracy using majority voting : {final_acc}")

Fold 1: Accuracy using Naive Bayes: 0.9415204678362573
Fold 2: Accuracy using Decision Tree: 0.9883040935672515
Fold 3: Accuracy using SVM: 0.9649122807017544
Accuracy using majority voting : 0.9649122807017544
