
# Different classifiers

In this notebook I am willing to compare the different classifiers on a visual object recognition decoding task.

In [1]:
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import plotly.express as px

from nilearn.input_data import NiftiMasker
from nilearn import datasets, plotting, image
from nilearn.image import get_data
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, RidgeClassifier, RidgeClassifierCV
from sklearn.model_selection import GridSearchCV, LeaveOneGroupOut, cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier

In [2]:
# Designate input file
haxby_ds = datasets.fetch_haxby(subjects=[4], fetch_stimuli=True)

# 'func' is a list of filenames: one for each subject
func_file = haxby_ds.func[0]

# Standardizing
mask_vt_file = haxby_ds.mask_vt[0]
masker = NiftiMasker(mask_img=mask_vt_file, standardize=True)

# Load the behavioral data that I will predict
beh_label = pd.read_csv(haxby_ds.session_target[0], sep=" ")

#select data
X = masker.fit_transform(func_file)
y = beh_label['labels']

# Identify the resting state
nonrest_task_mask = (y != 'rest')

# Remove the resting state and find names of remaining active labels
categories = y[nonrest_task_mask].unique()

# Extract tags indicating to which acquisition run a tag belongs
session_labels = beh_label['chunks'][nonrest_task_mask]

masked_timecourses = masker.fit_transform(func_file)[nonrest_task_mask]

In [3]:
#shuffle the data and split the sample into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True)

#standarize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Decision trees

In [4]:
dtc = tree.DecisionTreeClassifier(criterion='entropy')

#train model
dtc.fit(X_train, y_train)
score = dtc.score(X_test, y_test)

#print classification report
dtc = dtc.predict(X_test)
report = classification_report(y_test, dtc)
print(report)

print("Test score with L1 penalty: %.4f" % score)

              precision    recall  f1-score   support

      bottle       0.36      0.56      0.43         9
         cat       0.50      0.40      0.44        10
       chair       0.55      0.40      0.46        15
        face       0.60      0.33      0.43         9
       house       0.64      0.54      0.58        13
        rest       0.67      0.76      0.71        55
    scissors       0.43      0.35      0.39        17
scrambledpix       0.42      0.38      0.40        13
        shoe       0.12      0.20      0.15         5

    accuracy                           0.54       146
   macro avg       0.48      0.44      0.45       146
weighted avg       0.55      0.54      0.54       146

Test score with L1 penalty: 0.5411


In [5]:
dtc = tree.DecisionTreeClassifier(criterion='entropy')

#train model
dtc.fit(X_train, y_train)
score = dtc.score(X_test, y_test)

# Prediction accuracy
cv_scores_dtc = cross_val_score(dtc, X_train, y_train, cv=5) 
print(cv_scores_dtc)

# The mean prediction accuracy
classification_accuracy_dtc = np.mean(cv_scores_dtc)
classification_accuracy_dtc

[0.51526718 0.44061303 0.40229885 0.47509579 0.43678161]


0.4540112895206341

In [6]:
prediction_accuracy_dtc = cv_scores_dtc * 100
print(prediction_accuracy_dtc)

[51.52671756 44.06130268 40.22988506 47.50957854 43.67816092]


### Multinomial Naive Bayes Classifier

In [7]:
#multinomial logistic regression object using L1 penalty
mnb = LogisticRegression(C=50., multi_class='multinomial',
                         penalty='l1', solver='saga', tol=0.1)

#train model
mnb.fit(X_train, y_train)
sparsity = np.mean(mnb.coef_) * 100
score = mnb.score(X_test, y_test)

# print('Best C % .4f' % clf.C_)
print("Sparsity with L1 penalty: %.2f%%" % sparsity)
print("Test score with L1 penalty: %.4f" % score)

Sparsity with L1 penalty: 0.00%
Test score with L1 penalty: 0.7877


In [8]:
# Prediction accuracy
cv_scores_mnb = cross_val_score(mnb, X_train, y_train, cv=5) 
print(cv_scores_mnb)

# The mean prediction accuracy
classification_accuracy_mnb = np.mean(cv_scores_mnb)
classification_accuracy_mnb

[0.81679389 0.79693487 0.78544061 0.79693487 0.79310345]


0.7978415372466439

In [9]:
prediction_accuracy_mnb = cv_scores_mnb * 100
print(prediction_accuracy_mnb)

[81.67938931 79.69348659 78.5440613  79.69348659 79.31034483]


### K-Nearest Neighbours

In [10]:
#shuffle the data and split the sample into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True)

#standarize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#kneighbors classifier object
knn = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='kd_tree', leaf_size=30, 
                           p=2, metric='minkowski', metric_params=None, n_jobs=None)

#fit model
knn.fit(X_train, y_train)

#response prediction
pred = knn.predict(X_test)

#accuracy
knn.score(X_test, y_test)

#print classification report
knn = knn.predict(X_test)
report = classification_report(y_test, knn)
print(report)

#evaluate accuracy
print("Accuracy on test set: %0.3f%%"%(accuracy_score(y_test, pred)*100))

              precision    recall  f1-score   support

      bottle       0.67      0.89      0.76         9
         cat       0.64      0.78      0.70         9
       chair       0.45      0.62      0.53         8
        face       0.71      0.86      0.77        14
       house       0.67      1.00      0.80         6
        rest       0.89      0.75      0.81        64
    scissors       0.75      0.69      0.72        13
scrambledpix       0.82      0.75      0.78        12
        shoe       0.78      0.64      0.70        11

    accuracy                           0.76       146
   macro avg       0.71      0.78      0.73       146
weighted avg       0.78      0.76      0.76       146

Accuracy on test set: 76.027%


In [11]:
knn = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='kd_tree', leaf_size=30, 
                           p=2, metric='minkowski', metric_params=None, n_jobs=None)

knn.fit(X_train, y_train)

# Prediction accuracy
cv_scores_knn = cross_val_score(knn, X_train, y_train, cv=5) 
print(cv_scores_knn)

# The mean prediction accuracy
classification_accuracy_knn = np.mean(cv_scores_knn)
classification_accuracy_knn

[0.73664122 0.70881226 0.69731801 0.75862069 0.69348659]


0.7189757538533532

In [12]:
prediction_accuracy_knn = cv_scores_knn * 100
print(prediction_accuracy_knn)

[73.66412214 70.88122605 69.73180077 75.86206897 69.348659  ]


### Neural Networks

In [13]:
#multinomial logistic regression object using L1 penalty
nn = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)

MLPClassifier(activation='logistic', alpha=1e-05, batch_size='auto',
              beta_1=0.9, beta_2=0.999, early_stopping=False,
              epsilon=1e-08, hidden_layer_sizes=(5, 2),
              max_iter=200, momentum=0.9, n_iter_no_change=10,
              nesterovs_momentum=True, power_t=0.5, random_state=1,
              solver='lbfgs', tol=0.0001, validation_fraction=0.1, 
              verbose=False, warm_start=False)

#train model
nn.fit(X_train, y_train)
score = nn.score(X_test, y_test)

#print classification report
nn = nn.predict(X_test)
report = classification_report(y_test, nn)
print(report)

print("Test score with L1 penalty: %.4f" % score)

              precision    recall  f1-score   support

      bottle       0.00      0.00      0.00         9
         cat       0.26      1.00      0.42         9
       chair       0.36      0.50      0.42         8
        face       0.00      0.00      0.00        14
       house       0.86      1.00      0.92         6
        rest       0.91      0.91      0.91        64
    scissors       0.22      0.15      0.18        13
scrambledpix       0.46      0.50      0.48        12
        shoe       0.00      0.00      0.00        11

    accuracy                           0.58       146
   macro avg       0.34      0.45      0.37       146
weighted avg       0.53      0.58      0.54       146

Test score with L1 penalty: 0.5822


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  _warn_prf(average, modifier, msg_start, len(result))


In [14]:
nn = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)

MLPClassifier(activation='logistic', alpha=1e-05, batch_size='auto',
              beta_1=0.9, beta_2=0.999, early_stopping=False,
              epsilon=1e-08, hidden_layer_sizes=(5, 2),
              max_iter=200, momentum=0.9, n_iter_no_change=10,
              nesterovs_momentum=True, power_t=0.5, random_state=1,
              solver='lbfgs', tol=0.0001, validation_fraction=0.1, 
              verbose=False, warm_start=False)

#train model
nn.fit(X_train, y_train)

# Prediction accuracy
cv_scores_nn = cross_val_score(nn, X_train, y_train, cv=5) 
print(cv_scores_nn)

# The mean prediction accuracy
classification_accuracy_nn = np.mean(cv_scores_nn)
classification_accuracy_nn

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

[0.58015267 0.57471264 0.59386973 0.57854406 0.55172414]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


0.5758006492936738

In [15]:
prediction_accuracy_nn = cv_scores_nn * 100
print(prediction_accuracy_nn)

[58.01526718 57.47126437 59.38697318 57.85440613 55.17241379]


### Multinomial Logistic Regression

In [16]:
logistic_50 = LogisticRegression(C=50., multi_class='multinomial',
                     penalty='elasticnet', solver='saga', tol=0.1, l1_ratio=0.4)

#train model
logistic_50.fit(X_train, y_train)
sparsity = np.mean(logistic_50.coef_) * 100
score = logistic_50.score(X_test, y_test)

#print classification report
logistic_50 = logistic_50.predict(X_test)
report = classification_report(y_test, logistic_50)
print(report)

print("Sparsity with L1 penalty: %.2f%%" % sparsity)
print("Test score with L1 penalty: %.4f" % score)

              precision    recall  f1-score   support

      bottle       0.64      0.78      0.70         9
         cat       0.75      0.67      0.71         9
       chair       0.83      0.62      0.71         8
        face       0.82      1.00      0.90        14
       house       0.86      1.00      0.92         6
        rest       0.90      0.88      0.89        64
    scissors       0.75      0.69      0.72        13
scrambledpix       0.91      0.83      0.87        12
        shoe       0.75      0.82      0.78        11

    accuracy                           0.84       146
   macro avg       0.80      0.81      0.80       146
weighted avg       0.84      0.84      0.83       146

Sparsity with L1 penalty: 0.00%
Test score with L1 penalty: 0.8356


In [18]:
logistic_50 = LogisticRegression(C=50., multi_class='multinomial',
                     penalty='elasticnet', solver='saga', tol=0.1, l1_ratio=0.4)

#train model
logistic_50.fit(X_train, y_train)
sparsity = np.mean(logistic_50.coef_) * 100
score = logistic_50.score(X_test, y_test)

# Prediction accuracy
cv_scores_logistic_50 = cross_val_score(logistic_50, X_train, y_train, cv=5) 
print(cv_scores_logistic_50)

# The mean prediction accuracy
classification_accuracy_logistic_50 = np.mean(cv_scores_logistic_50)
classification_accuracy_logistic_50

[0.80916031 0.82375479 0.76628352 0.80842912 0.7816092 ]


0.7978473867392004

In [19]:
prediction_accuracy_logistic_50 = cv_scores_logistic_50 * 100
print(prediction_accuracy_logistic_50)

[80.91603053 82.37547893 76.62835249 80.84291188 78.16091954]


### Support vector machine

In [20]:
#standarize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

svm = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovo', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

svm.fit(X_train, y_train)

#accuracy
score = svm.score(X_test, y_test)

#print classification report
svm = svm.predict(X_test)
report = classification_report(y_test, svm)
print(report)

print("Test score with L1 penalty: %.4f" % score)

              precision    recall  f1-score   support

      bottle       0.64      0.78      0.70         9
         cat       0.90      1.00      0.95         9
       chair       0.62      0.62      0.62         8
        face       0.87      0.93      0.90        14
       house       0.86      1.00      0.92         6
        rest       0.90      0.89      0.90        64
    scissors       0.82      0.69      0.75        13
scrambledpix       0.90      0.75      0.82        12
        shoe       0.82      0.82      0.82        11

    accuracy                           0.85       146
   macro avg       0.81      0.83      0.82       146
weighted avg       0.85      0.85      0.85       146

Test score with L1 penalty: 0.8493


In [21]:
#standarize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

svm = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovo', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

svm.fit(X_train, y_train)

# Prediction accuracy
cv_scores_svm  = cross_val_score(svm , X_train, y_train, cv=5) 
print(cv_scores_svm)

# The mean prediction accuracy
classification_accuracy_svm  = np.mean(cv_scores_svm )
classification_accuracy_svm

[0.85877863 0.82758621 0.83524904 0.8467433  0.83141762]


0.8399549589073148

In [22]:
prediction_accuracy_svm = cv_scores_svm * 100
print(prediction_accuracy_svm)

[85.8778626  82.75862069 83.52490421 84.6743295  83.14176245]


In [23]:
# A dictionary, to hold all our classifiers
classifiers = {'SVC': svm,
               'LogisticRegression': logistic_50,
               'KNeighborsClassifier':knn,
               'DecisionTreeClassifier': dtc,
               'MLPClassifier': nn,
               'Multinomial Naive Bayes': mnb,
               }

In [None]:
# import os
# os.remove('prediction_accuracy_result.csv')

In [None]:
# Support vector machine: [85.8778626  82.75862069 83.52490421 84.6743295  83.14176245]
# Naive Bayes: [81.67938931 79.69348659 78.5440613  79.69348659 79.31034483]
# Logistic Regression: [80.91603053 82.37547893 76.62835249 80.84291188 78.16091954]
# Nearest Neighbours: [73.66412214 70.88122605 69.73180077 75.86206897 69.348659]
# Neural Networks: [58.01526718 57.47126437 59.38697318 57.85440613 55.17241379]
# Decision trees: [51.52671756 44.06130268 40.22988506 47.50957854 43.67816092]

In [74]:
df = pd.DataFrame()

dict = [ 
    {'Classifier':'SVM', 'Fold':'1', 'Accuraacy%':'85.87'},
    {'Classifier':'SVM', 'Fold':'2', 'Accuraacy%':'82.75'},
    {'Classifier':'SVM', 'Fold':'3', 'Accuraacy%':'83.52'},
    {'Classifier':'SVM', 'Fold':'4', 'Accuraacy%':'84.67'},
    {'Classifier':'SVM', 'Fold':'5', 'Accuraacy%':'83.14'},
    
    {'Classifier':'Naive Bayes', 'Fold':'1', 'Accuraacy%':'81.67'},
    {'Classifier':'Naive Bayes', 'Fold':'2', 'Accuraacy%':'79.69'},
    {'Classifier':'Naive Bayes', 'Fold':'3', 'Accuraacy%':'78.54'},
    {'Classifier':'Naive Bayes', 'Fold':'4', 'Accuraacy%':'79.69'},
    {'Classifier':'Naive Bayes', 'Fold':'5', 'Accuraacy%':'79.31'},
            
    {'Classifier':'Logistic Regression', 'Fold':'1', 'Accuraacy%':'80.91'},
    {'Classifier':'Logistic Regression', 'Fold':'2', 'Accuraacy%':'82.37'},
    {'Classifier':'Logistic Regression', 'Fold':'3', 'Accuraacy%':'76.62'},
    {'Classifier':'Logistic Regression', 'Fold':'4', 'Accuraacy%':'80.84'},
    {'Classifier':'Logistic Regression', 'Fold':'5', 'Accuraacy%':'78.16'},
    
    {'Classifier':'K-Nearest', 'Fold':'1', 'Accuraacy%':'73.66'},
    {'Classifier':'K-Nearest', 'Fold':'2', 'Accuraacy%':'70.88'},
    {'Classifier':'K-Nearest', 'Fold':'3', 'Accuraacy%':'69.73'},
    {'Classifier':'K-Nearest', 'Fold':'4', 'Accuraacy%':'75.86'},
    {'Classifier':'K-Nearest', 'Fold':'5', 'Accuraacy%':'69.34'},    
    
    {'Classifier':'Neural Networks', 'Fold':'1', 'Accuraacy%':'58.01'},
    {'Classifier':'Neural Networks', 'Fold':'2', 'Accuraacy%':'57.47'},
    {'Classifier':'Neural Networks', 'Fold':'3', 'Accuraacy%':'59.38'},
    {'Classifier':'Neural Networks', 'Fold':'4', 'Accuraacy%':'57.85'},
    {'Classifier':'Neural Networks', 'Fold':'5', 'Accuraacy%':'55.17'},
    
    {'Classifier':'Decision trees', 'Fold':'1', 'Accuraacy%':'51.52'},
    {'Classifier':'Decision trees', 'Fold':'2', 'Accuraacy%':'44.06'},
    {'Classifier':'Decision trees', 'Fold':'3', 'Accuraacy%':'40.22'},
    {'Classifier':'Decision trees', 'Fold':'4', 'Accuraacy%':'47.50'},
    {'Classifier':'Decision trees', 'Fold':'5', 'Accuraacy%':'43.67'}]

df = pd.DataFrame(data=dict)

df.to_csv ('prediction_accuracy_result.csv', index = False)

print (df)

             Classifier Fold Accuraacy%
0                   SVM    1      85.87
1                   SVM    2      82.75
2                   SVM    3      83.52
3                   SVM    4      84.67
4                   SVM    5      83.14
5           Naive Bayes    1      81.67
6           Naive Bayes    2      79.69
7           Naive Bayes    3      78.54
8           Naive Bayes    4      79.69
9           Naive Bayes    5      79.31
10  Logistic Regression    1      80.91
11  Logistic Regression    2      82.37
12  Logistic Regression    3      76.62
13  Logistic Regression    4      80.84
14  Logistic Regression    5      78.16
15            K-Nearest    1      73.66
16            K-Nearest    2      70.88
17            K-Nearest    3      69.73
18            K-Nearest    4      75.86
19            K-Nearest    5      69.34
20      Neural Networks    1      58.01
21      Neural Networks    2      57.47
22      Neural Networks    3      59.38
23      Neural Networks    4      57.85


In [76]:
fig = px.scatter(df, x="Fold", y="Accuraacy%", color="Classifier", marginal_y="rug")
fig.show()

In [None]:
# # List arguments in wide form
# series1 = [45.03816794, 43.67816092, 48.27586207, 48.27586207, 45.97701149]
# series2 = [74.80916031, 74.71264368, 77.39463602, 68.19923372, 70.11494253]
# fig = px.line(x=[1, 2, 3, 4, 5], y=[series1, series2], title="1",  )
# fig.show()

In [90]:
fig = px.line(df, x="Fold", y="Accuraacy%", color="Classifier", title= "Classifiers Accuracy Results")
# fig.update_xaxes(ticks="inside", y=["1","2","3","4","5"])
fig.update_yaxes(ticks="inside", col= [1, 2, 3, 4, 5])
#fig.update_xaxes(ticks="inside", col=1)

fig.update_xaxes(zeroline=True, zerolinewidth=2, zerolinecolor='LightPink')
fig.update_yaxes(zeroline=True, zerolinewidth=2, zerolinecolor='LightPink')


fig.update_layout(
    autosize=False,
    width=800,
    height=500,
    paper_bgcolor="LightSteelBlue",
)

fig.show()
