In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import StackingClassifier
from sklearn.model_selection import train_test_split

In [2]:
dataset = pd.read_csv('data/brain_tumor_dataset.csv', index_col = 0)

dataset = dataset.drop(['image_name', 'label_name'], axis = 1)

dataset

Unnamed: 0,mean,variance,std,skewness,kurtosis,entropy,contrast,dissimilarity,homogeneity,asm,energy,correlation,label
0,47.957241,1534.028907,39.166681,0.591976,-0.146808,12.115296,54.121755,3.947006,0.408638,0.005599,0.074824,0.982349,3
1,53.239227,1702.933157,41.266611,1.525444,3.256895,12.212518,39.458877,2.382121,0.663735,0.071248,0.266923,0.988422,0
2,43.551712,1860.386608,43.132199,1.174871,1.526766,11.981298,42.525494,3.121216,0.510266,0.012075,0.109885,0.988572,1
3,70.573677,2225.436749,47.174535,0.879047,0.867972,12.257411,36.775501,3.006803,0.535195,0.026063,0.161442,0.991737,2
4,49.876183,2688.290875,51.848731,0.695864,-0.464646,11.865826,34.988839,2.842435,0.565111,0.036918,0.192142,0.993493,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3201,43.240494,1728.160768,41.571153,1.032431,1.358818,11.996204,35.655661,2.656304,0.561187,0.019953,0.141256,0.989684,2
3202,31.026756,2054.392251,45.325404,1.474903,1.355120,11.522565,42.386963,2.596273,0.676621,0.109545,0.330977,0.989695,2
3203,43.290009,1392.145868,37.311471,1.077291,2.438517,12.079829,42.669891,2.780008,0.536012,0.009523,0.097585,0.984667,1
3204,46.065979,2176.462016,46.652567,0.798026,-0.141716,11.928128,28.031548,2.543993,0.570965,0.021920,0.148054,0.993561,2


In [3]:
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [4]:
def get_models():
    models = dict()
    models['KNN'] = KNeighborsClassifier(n_neighbors = 5)
    models['DT'] = DecisionTreeClassifier(max_depth = 7)
    models['NB'] = GaussianNB()
    models['Stacking'] = get_stacking()
    
    return models

In [5]:
# evaluate a given model using cross-validation
def evaluate_model(model, x, y):
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3)
    scores = cross_val_score(model, x, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
    return scores

In [6]:
# get a stacking ensemble of models
def get_stacking():
    # define the base models
    level0 = list()
    level0.append(('KNN', KNeighborsClassifier(n_neighbors = 5)))
    level0.append(('DT', DecisionTreeClassifier(max_depth = 7)))
    level0.append(('NB', GaussianNB()))
    # define meta learner model
    level1 = KNeighborsClassifier(n_neighbors = 5)
    # define the stacking ensemble
    model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5)
    return model

In [7]:
models = get_models()
results, names = list(), list()

for name, model in models.items():
    scores = evaluate_model(model, x, y)
    results.append(scores)
    names.append(name)
    print('>%s %.4f' % (name, np.mean(scores)))

>KNN 0.5994
>DT 0.6794
>NB 0.5421
>Stacking 0.6521
