#### Import modules and read data

In [1]:
import numpy as np
import os
import pandas as pd

from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, KFold, RepeatedKFold
from sklearn.svm import SVC

In [2]:
normal_data = pd.read_csv(os.getcwd() + '/Data/brain_tumour_normalized.csv')
normal_data.head()

Unnamed: 0,Image,Mean,Variance,Standard Deviation,Entropy,Skewness,Kurtosis,Contrast,Energy,ASM,Homogeneity,Dissimilarity,Correlation,Coarseness,Class
0,Image1,0.194705,0.212023,0.443074,0.274801,0.068211,0.010937,0.028236,0.47541,0.246092,0.603108,0.139694,0.981764,0.0,0
1,Image2,0.261489,0.276124,0.510114,0.674843,0.052278,0.007693,0.017951,0.797096,0.648383,0.7738,0.093527,0.997417,0.0,0
2,Image3,0.219003,0.392326,0.6142,0.001487,0.090618,0.016478,0.02328,0.012719,0.001173,0.23076,0.195261,0.972855,0.0,1
3,Image4,0.1773,0.329007,0.55975,0.001513,0.108202,0.021559,0.043805,0.012908,0.001192,0.196137,0.258588,0.941475,0.0,1
4,Image5,0.218223,0.24984,0.483677,0.370574,0.068403,0.011067,0.050836,0.56486,0.338854,0.560862,0.226679,0.960995,0.0,0


In [3]:
X, Y = normal_data[normal_data.columns[1 : 14]], normal_data[normal_data.columns[-1]]
X.shape, Y.shape

((3762, 13), (3762,))

#### Logistic Regression

Without repeated folds:

In [4]:
cv = KFold(n_splits = 10, shuffle = True, random_state = 42)  # create CV splits

In [5]:
logisticRegr = LogisticRegression()  # create model

scores = cross_val_score(logisticRegr, X, Y, scoring = 'accuracy', cv = cv)  # calc accuracy

In [6]:
print('Overall accuracy:', scores.mean() * 100)
print('Overall std. dev. of scores:', scores.std())

Overall accuracy: 97.76736836164571
Overall std. dev. of scores: 0.00571257777793943


With repeated folds:

In [7]:
cvr = RepeatedKFold(n_splits = 10, n_repeats = 3, random_state = 42)  # creates CV splits for repeated KFold CV

In [8]:
logisticRegr = LogisticRegression()  # create model

scores = cross_val_score(logisticRegr, X, Y, scoring = 'accuracy', cv = cvr)  # calc accuracy

In [9]:
print('Overall accuracy:', scores.mean() * 100)
print('Overall std. dev. of scores:', scores.std())

Overall accuracy: 97.76713320917281
Overall std. dev. of scores: 0.006159991467893768


#### SVM - RBF Kernel

Without repeated folds:

In [10]:
cv = KFold(n_splits = 10, shuffle = True, random_state = 42)  # create CV splits

In [11]:
svm_rbf = SVC(kernel = 'rbf', gamma = 'auto')  # create model

scores = cross_val_score(svm_rbf, X, Y, scoring = 'accuracy', cv = cv)  # calc accuracy

In [12]:
print('Overall accuracy:', scores.mean() * 100)
print('Overall std. dev. of scores:', scores.std())

Overall accuracy: 97.36871437440037
Overall std. dev. of scores: 0.006099179752415618


With repeated folds:

In [13]:
cvr = RepeatedKFold(n_splits = 10, n_repeats = 3, random_state = 42)  # creates CV splits for repeated KFold CV

In [14]:
svm_rbf = SVC(kernel = 'rbf', gamma = 'auto')  # create model

scores = cross_val_score(svm_rbf, X, Y, scoring = 'accuracy', cv = cvr)  # calc accuracy

In [15]:
print('Overall accuracy:', scores.mean() * 100)
print('Overall std. dev. of scores:', scores.std())

Overall accuracy: 97.32408243505087
Overall std. dev. of scores: 0.006544625993357317


#### SVM - Sigmoid Kernel

Without repeated folds:

In [16]:
cv = KFold(n_splits = 10, shuffle = True, random_state = 42)  # create CV splits

In [17]:
svm_rbf = SVC(kernel = 'sigmoid', gamma = 'auto')  # create model

scores = cross_val_score(svm_rbf, X, Y, scoring = 'accuracy', cv = cv)  # calc accuracy

In [18]:
print('Overall accuracy:', scores.mean() * 100)
print('Overall std. dev. of scores:', scores.std())

Overall accuracy: 96.99658558609403
Overall std. dev. of scores: 0.007416498502075566


With repeated folds:

In [19]:
cvr = RepeatedKFold(n_splits = 10, n_repeats = 3, random_state = 42)  # creates CV splits for repeated KFold CV

In [20]:
svm_rbf = SVC(kernel = 'sigmoid', gamma = 'auto')  # create model

scores = cross_val_score(svm_rbf, X, Y, scoring = 'accuracy', cv = cvr)  # calc accuracy

In [21]:
print('Overall accuracy:', scores.mean() * 100)
print('Overall std. dev. of scores:', scores.std())

Overall accuracy: 97.00514513610624
Overall std. dev. of scores: 0.007028382236675999
