imports

In [98]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, balanced_accuracy_score
from sklearn.model_selection import StratifiedKFold, cross_validate

In [99]:

def preprocess(file_path,split_value,target_column_index,has_header,delete_list,extraction_list):
    """this function is used for taking the dataset from .csv file making a test train split
    and x, y split (has header is either True or False)"""

    #reading the .csv
    if has_header:
        df = pd.read_csv(file_path, header=0, delimiter=",")  # First row as header
        print(f"Dataset shape: {df.shape}")
        print(f"Column names: {list(df.columns)}")
    else:
        df = pd.read_csv(file_path, header=None, delimiter=",")  # No header row
        print(f"Dataset shape: {df.shape}")

    
    #making target variable split
    x_ds = df.drop(df.columns[target_column_index], axis=1)  # All except target
    y_ds = df.iloc[:, target_column_index]  # Target column only

    x_ds = x_ds.drop(columns = delete_list + extraction_list)
    #making test train split
    x_train, x_test, y_train, y_test = train_test_split(x_ds, y_ds, test_size=split_value, random_state=10, shuffle= True)
    print(f'shape of x training: {x_train.shape}')
    print(f'shape of x testing: {x_test.shape}')

    return x_train, x_test, y_train, y_test, x_ds, y_ds

In [100]:
def implement(model, x_train, y_train, x_test, y_test, x_ds, y_ds):
    """this function contains implementation (modeling and fitting) on the dataset with 1 
    initiation and shows the results of the implementations.
    Note: This function doesn't have compile so it is not appoprate to use this function for perceptron learning algs.
    because it doesn't have training vs test accuracy comparison and doesn't have epochs and .argmax(axis=1) """
    #initiante ml
    model.fit(x_train, y_train)
    #model evaluation
    y_pred = model.predict(x_test)

    print(f'Confusion matrix: \n{confusion_matrix(y_test, y_pred)}')
    print(f'Classification report: {classification_report(y_test, y_pred)}')
    print(f"Accuracy score is: {accuracy_score(y_test,y_pred):.3f}")
    print(f"Balanced accuracy score is: {balanced_accuracy_score(y_test,y_pred):.3f}")
    print(f"Precision score is: {precision_score(y_test,y_pred, average='weighted'):.3f}")
    print(f"Recall score is: {recall_score(y_test,y_pred, average='weighted'):.3f}")
    
    # StratifiedKFold evaluations
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=10)
    scoring = ['balanced_accuracy', 'accuracy', 'precision_weighted', 'recall_weighted']
    cv_results = cross_validate(model, x_ds, y_ds, cv=skf, scoring=scoring)

    #if type(model) == KNeighborsClassifier:
    #    knn_cv_bacc = round(cv_results['test_balanced_accuracy'].mean(),3)
    #    knn_cv_precision = round(cv_results['test_precision_weighted'].mean(),3)
    #    knn_cv_recall = round(cv_results['test_recall_weighted'].mean(),3)
        

    print(f"StratifiedKFold CV Balanced Accuracy: {cv_results['test_balanced_accuracy'].mean():.3f}\scriptsize(±{cv_results['test_balanced_accuracy'].std() * 2:.2f})")
    print(f"StratifiedKFold CV Accuracy: {cv_results['test_accuracy'].mean():.3f}\scriptsize(±{cv_results['test_accuracy'].std() * 2:.2f})")
    print(f"StratifiedKFold CV Precision: {cv_results['test_precision_weighted'].mean():.3f}\scriptsize(±{cv_results['test_precision_weighted'].std() * 2:.2f})")
    print(f"StratifiedKFold CV Recall: {cv_results['test_recall_weighted'].mean():.3f}\scriptsize(±{cv_results['test_recall_weighted'].std() * 2:.2f})")

    return cv_results 
        

  print(f"StratifiedKFold CV Balanced Accuracy: {cv_results['test_balanced_accuracy'].mean():.3f}\scriptsize(±{cv_results['test_balanced_accuracy'].std() * 2:.2f})")
  print(f"StratifiedKFold CV Accuracy: {cv_results['test_accuracy'].mean():.3f}\scriptsize(±{cv_results['test_accuracy'].std() * 2:.2f})")
  print(f"StratifiedKFold CV Precision: {cv_results['test_precision_weighted'].mean():.3f}\scriptsize(±{cv_results['test_precision_weighted'].std() * 2:.2f})")
  print(f"StratifiedKFold CV Recall: {cv_results['test_recall_weighted'].mean():.3f}\scriptsize(±{cv_results['test_recall_weighted'].std() * 2:.2f})")


Select the dataset!

Runs the preprocess for diffrent datasets.

In [101]:
x_train1, x_test1, y_train1, y_test1, x_ds1, y_ds1 = preprocess("beans_kmeans.csv", 0.2, 16, True, [], [])

Dataset shape: (13611, 17)
Column names: ['Area', 'Perimeter', 'MajorAxisLength', 'MinorAxisLength', 'AspectRation', 'Eccentricity', 'ConvexArea', 'EquivDiameter', 'Extent', 'Solidity', 'roundness', 'Compactness', 'ShapeFactor1', 'ShapeFactor2', 'ShapeFactor3', 'ShapeFactor4', 'Class']
shape of x training: (10888, 16)
shape of x testing: (2723, 16)


In [102]:
x_train2, x_test2, y_train2, y_test2, x_ds2, y_ds2 = preprocess("beans_kmeans.csv", 0.2, 16, True, [],["ShapeFactor2","Compactness","ShapeFactor3","roundness","MajorAxisLength","Eccentricity","AspectRation"] )

Dataset shape: (13611, 17)
Column names: ['Area', 'Perimeter', 'MajorAxisLength', 'MinorAxisLength', 'AspectRation', 'Eccentricity', 'ConvexArea', 'EquivDiameter', 'Extent', 'Solidity', 'roundness', 'Compactness', 'ShapeFactor1', 'ShapeFactor2', 'ShapeFactor3', 'ShapeFactor4', 'Class']
shape of x training: (10888, 9)
shape of x testing: (2723, 9)


x_train, x_test, y_train, y_test, x_ds, y_ds = preprocess("diabetes_kmeans.csv", 0.2, 8, True, [], [])

x_train, x_test, y_train, y_test, x_ds, y_ds = preprocess("diabetes_kmeans.csv", 0.2, 8, True, [], [])

In [103]:
x_train3, x_test3, y_train3, y_test3, x_ds3, y_ds3 = preprocess("divorce.csv", 0.2, 54, True, [], [])

Dataset shape: (170, 55)
Column names: ['Atr1', 'Atr2', 'Atr3', 'Atr4', 'Atr5', 'Atr6', 'Atr7', 'Atr8', 'Atr9', 'Atr10', 'Atr11', 'Atr12', 'Atr13', 'Atr14', 'Atr15', 'Atr16', 'Atr17', 'Atr18', 'Atr19', 'Atr20', 'Atr21', 'Atr22', 'Atr23', 'Atr24', 'Atr25', 'Atr26', 'Atr27', 'Atr28', 'Atr29', 'Atr30', 'Atr31', 'Atr32', 'Atr33', 'Atr34', 'Atr35', 'Atr36', 'Atr37', 'Atr38', 'Atr39', 'Atr40', 'Atr41', 'Atr42', 'Atr43', 'Atr44', 'Atr45', 'Atr46', 'Atr47', 'Atr48', 'Atr49', 'Atr50', 'Atr51', 'Atr52', 'Atr53', 'Atr54', 'Class']
shape of x training: (136, 54)
shape of x testing: (34, 54)


In [104]:
x_train4, x_test4, y_train4, y_test4, x_ds4, y_ds4 = preprocess("divorce.csv", 0.2, 54, True, [], ['Atr32','Atr33','Atr34','Atr35','Atr36','Atr52','Atr54'])

Dataset shape: (170, 55)
Column names: ['Atr1', 'Atr2', 'Atr3', 'Atr4', 'Atr5', 'Atr6', 'Atr7', 'Atr8', 'Atr9', 'Atr10', 'Atr11', 'Atr12', 'Atr13', 'Atr14', 'Atr15', 'Atr16', 'Atr17', 'Atr18', 'Atr19', 'Atr20', 'Atr21', 'Atr22', 'Atr23', 'Atr24', 'Atr25', 'Atr26', 'Atr27', 'Atr28', 'Atr29', 'Atr30', 'Atr31', 'Atr32', 'Atr33', 'Atr34', 'Atr35', 'Atr36', 'Atr37', 'Atr38', 'Atr39', 'Atr40', 'Atr41', 'Atr42', 'Atr43', 'Atr44', 'Atr45', 'Atr46', 'Atr47', 'Atr48', 'Atr49', 'Atr50', 'Atr51', 'Atr52', 'Atr53', 'Atr54', 'Class']
shape of x training: (136, 47)
shape of x testing: (34, 47)


In [105]:
x_train5, x_test5, y_train5, y_test5, x_ds5, y_ds5 = preprocess("parkinsons_kmeans.csv", 0.2, 17, True, ['name'], [])

Dataset shape: (195, 24)
Column names: ['name', 'MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)', 'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP', 'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5', 'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'status', 'RPDE', 'DFA', 'spread1', 'spread2', 'D2', 'PPE']
shape of x training: (156, 22)
shape of x testing: (39, 22)


In [106]:
x_train6, x_test6, y_train6, y_test6, x_ds6, y_ds6 = preprocess("parkinsons_kmeans.csv", 0.2, 17, True, ['name'], ['spread1','HNR','PPE','MDVP:APQ'])

Dataset shape: (195, 24)
Column names: ['name', 'MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)', 'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP', 'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5', 'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'status', 'RPDE', 'DFA', 'spread1', 'spread2', 'D2', 'PPE']
shape of x training: (156, 18)
shape of x testing: (39, 18)


In [107]:
x_train7, x_test7, y_train7, y_test7, x_ds7, y_ds7 = preprocess("rice_binned_kmeans.csv", 0.2, 7, True, [], [])

Dataset shape: (3810, 8)
Column names: ['Area', 'Perimeter', 'Major_Axis_Length', 'Minor_Axis_Length', 'Eccentricity', 'Convex_Area', 'Extent', 'Class']
shape of x training: (3048, 7)
shape of x testing: (762, 7)


In [108]:
x_train8, x_test8, y_train8, y_test8, x_ds8, y_ds8 = preprocess("rice_binned_kmeans.csv", 0.2, 7, True, [], ['Eccentricity', 'Major_Axis_Length', 'Perimeter'])

Dataset shape: (3810, 8)
Column names: ['Area', 'Perimeter', 'Major_Axis_Length', 'Minor_Axis_Length', 'Eccentricity', 'Convex_Area', 'Extent', 'Class']
shape of x training: (3048, 4)
shape of x testing: (762, 4)


x_train, x_test, y_train, y_test, x_ds, y_ds = preprocess("wdbc_binned_kmeans.csv", 0.2, 1, True, ['ID'], [])

x_train, x_test, y_train, y_test, x_ds, y_ds = preprocess("wdbc_binned_kmeans.csv", 0.2, 1, True, ['ID'], [])

Run the implement function for diffrent models

In [109]:
#KNN
from sklearn.neighbors import KNeighborsClassifier
cv_results1 = implement(KNeighborsClassifier(),
                     x_train1, y_train1, x_test1, y_test1, x_ds1, y_ds1)

cv_results2 = implement(KNeighborsClassifier(),
                     x_train2, y_train2, x_test2, y_test2, x_ds2, y_ds2)

cv_results3 = implement(KNeighborsClassifier(),
                     x_train3, y_train3, x_test3, y_test3, x_ds3, y_ds3)

cv_results4 = implement(KNeighborsClassifier(),
                     x_train4, y_train4, x_test4, y_test4, x_ds4, y_ds4)

cv_results5 = implement(KNeighborsClassifier(),
                     x_train5, y_train5, x_test5, y_test5, x_ds5, y_ds5)

cv_results6 = implement(KNeighborsClassifier(),
                     x_train6, y_train6, x_test6, y_test6, x_ds6, y_ds6)

cv_results7 = implement(KNeighborsClassifier(),
                     x_train7, y_train7, x_test7, y_test7, x_ds7, y_ds7)

cv_results8 = implement(KNeighborsClassifier(),
                     x_train8, y_train8, x_test8, y_test8, x_ds8, y_ds8)

knn_0_bacc= round(cv_results2['test_balanced_accuracy'].mean() - cv_results1['test_balanced_accuracy'].mean(), 3)
knn_1_bacc= round(cv_results4['test_balanced_accuracy'].mean() - cv_results3['test_balanced_accuracy'].mean(), 3)
knn_2_bacc= round(cv_results6['test_balanced_accuracy'].mean() - cv_results5['test_balanced_accuracy'].mean(), 3)
knn_3_bacc= round(cv_results8['test_balanced_accuracy'].mean() - cv_results7['test_balanced_accuracy'].mean(), 3)

knn_0_precision= round(cv_results2['test_precision_weighted'].mean() - cv_results1['test_precision_weighted'].mean(), 3)
knn_1_precision= round(cv_results4['test_precision_weighted'].mean() - cv_results3['test_precision_weighted'].mean(), 3)
knn_2_precision= round(cv_results6['test_precision_weighted'].mean() - cv_results5['test_precision_weighted'].mean(), 3)
knn_3_precision= round(cv_results8['test_precision_weighted'].mean() - cv_results7['test_precision_weighted'].mean(), 3)

knn_0_recall= round(cv_results2['test_recall_weighted'].mean() - cv_results1['test_recall_weighted'].mean(), 3)
knn_1_recall= round(cv_results4['test_recall_weighted'].mean() - cv_results3['test_recall_weighted'].mean(), 3)
knn_2_recall= round(cv_results6['test_recall_weighted'].mean() - cv_results5['test_recall_weighted'].mean(), 3)
knn_3_recall= round(cv_results8['test_recall_weighted'].mean() - cv_results7['test_recall_weighted'].mean(), 3)

Confusion matrix: 
[[213   0  14   0   1   1   4]
 [  0 117   1   0   0   0   0]
 [ 11   0 310   0   6   1   2]
 [  0   0   0 645   0   8  53]
 [  0   0   7   5 370   0  14]
 [  4   0   0  12   0 384  11]
 [  1   0   2  62   5   8 451]]
Classification report:               precision    recall  f1-score   support

    BARBUNYA       0.93      0.91      0.92       233
      BOMBAY       1.00      0.99      1.00       118
        CALI       0.93      0.94      0.93       330
    DERMASON       0.89      0.91      0.90       706
       HOROZ       0.97      0.93      0.95       396
       SEKER       0.96      0.93      0.94       411
        SIRA       0.84      0.85      0.85       529

    accuracy                           0.91      2723
   macro avg       0.93      0.93      0.93      2723
weighted avg       0.92      0.91      0.91      2723

Accuracy score is: 0.914
Balanced accuracy score is: 0.926
Precision score is: 0.915
Recall score is: 0.914
StratifiedKFold CV Balanced Accurac

In [110]:
#Ada Boost
from sklearn.ensemble import AdaBoostClassifier
cv_results1 = implement(AdaBoostClassifier(n_estimators=100, random_state=10),
                     x_train1, y_train1, x_test1, y_test1, x_ds1, y_ds1)

cv_results2 = implement(AdaBoostClassifier(n_estimators=100, random_state=10),
                     x_train2, y_train2, x_test2, y_test2, x_ds2, y_ds2)

cv_results3 = implement(AdaBoostClassifier(n_estimators=100, random_state=10),
                     x_train3, y_train3, x_test3, y_test3, x_ds3, y_ds3)

cv_results4 = implement(AdaBoostClassifier(n_estimators=100, random_state=10),
                     x_train4, y_train4, x_test4, y_test4, x_ds4, y_ds4)

cv_results5 = implement(AdaBoostClassifier(n_estimators=100, random_state=10),
                     x_train5, y_train5, x_test5, y_test5, x_ds5, y_ds5)

cv_results6 = implement(AdaBoostClassifier(n_estimators=100, random_state=10),
                     x_train6, y_train6, x_test6, y_test6, x_ds6, y_ds6)

cv_results7 = implement(AdaBoostClassifier(n_estimators=100, random_state=10),
                     x_train7, y_train7, x_test7, y_test7, x_ds7, y_ds7)

cv_results8 = implement(AdaBoostClassifier(n_estimators=100, random_state=10),
                     x_train8, y_train8, x_test8, y_test8, x_ds8, y_ds8)


ada_0_bacc= round(cv_results2['test_balanced_accuracy'].mean() - cv_results1['test_balanced_accuracy'].mean(), 3)
ada_1_bacc= round(cv_results4['test_balanced_accuracy'].mean() - cv_results3['test_balanced_accuracy'].mean(), 3)
ada_2_bacc= round(cv_results6['test_balanced_accuracy'].mean() - cv_results5['test_balanced_accuracy'].mean(), 3)
ada_3_bacc= round(cv_results8['test_balanced_accuracy'].mean() - cv_results7['test_balanced_accuracy'].mean(), 3)

ada_0_precision= round(cv_results2['test_precision_weighted'].mean() - cv_results1['test_precision_weighted'].mean(), 3)
ada_1_precision= round(cv_results4['test_precision_weighted'].mean() - cv_results3['test_precision_weighted'].mean(), 3)
ada_2_precision= round(cv_results6['test_precision_weighted'].mean() - cv_results5['test_precision_weighted'].mean(), 3)
ada_3_precision= round(cv_results8['test_precision_weighted'].mean() - cv_results7['test_precision_weighted'].mean(), 3)

ada_0_recall= round(cv_results2['test_recall_weighted'].mean() - cv_results1['test_recall_weighted'].mean(), 3)
ada_1_recall= round(cv_results4['test_recall_weighted'].mean() - cv_results3['test_recall_weighted'].mean(), 3)
ada_2_recall= round(cv_results6['test_recall_weighted'].mean() - cv_results5['test_recall_weighted'].mean(), 3)
ada_3_recall= round(cv_results8['test_recall_weighted'].mean() - cv_results7['test_recall_weighted'].mean(), 3)


Confusion matrix: 
[[173   0  50   0   5   1   4]
 [  0 118   0   0   0   0   0]
 [  6   0 247   0  75   1   1]
 [  0   0   0 559  10  25 112]
 [  1   0   3   2 380   0  10]
 [  8   0   0  25   0 367  11]
 [  3   0   2  39  20  27 438]]
Classification report:               precision    recall  f1-score   support

    BARBUNYA       0.91      0.74      0.82       233
      BOMBAY       1.00      1.00      1.00       118
        CALI       0.82      0.75      0.78       330
    DERMASON       0.89      0.79      0.84       706
       HOROZ       0.78      0.96      0.86       396
       SEKER       0.87      0.89      0.88       411
        SIRA       0.76      0.83      0.79       529

    accuracy                           0.84      2723
   macro avg       0.86      0.85      0.85      2723
weighted avg       0.84      0.84      0.84      2723

Accuracy score is: 0.838
Balanced accuracy score is: 0.852
Precision score is: 0.844
Recall score is: 0.838


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


StratifiedKFold CV Balanced Accuracy: 0.772\scriptsize(±0.13)
StratifiedKFold CV Accuracy: 0.833\scriptsize(±0.06)
StratifiedKFold CV Precision: 0.832\scriptsize(±0.07)
StratifiedKFold CV Recall: 0.833\scriptsize(±0.06)
Confusion matrix: 
[[176   0  47   0   0   0  10]
 [  0 109   9   0   0   0   0]
 [ 28   0 293   0   7   0   2]
 [  0   0   0 629   0   5  72]
 [  2   0  22  19 284   0  69]
 [  6   0   1  74   0 302  28]
 [  3   0   4  49  10  15 448]]
Classification report:               precision    recall  f1-score   support

    BARBUNYA       0.82      0.76      0.79       233
      BOMBAY       1.00      0.92      0.96       118
        CALI       0.78      0.89      0.83       330
    DERMASON       0.82      0.89      0.85       706
       HOROZ       0.94      0.72      0.81       396
       SEKER       0.94      0.73      0.82       411
        SIRA       0.71      0.85      0.77       529

    accuracy                           0.82      2723
   macro avg       0.86      0.8

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


StratifiedKFold CV Balanced Accuracy: 0.753\scriptsize(±0.14)
StratifiedKFold CV Accuracy: 0.804\scriptsize(±0.07)
StratifiedKFold CV Precision: 0.819\scriptsize(±0.07)
StratifiedKFold CV Recall: 0.804\scriptsize(±0.07)
Confusion matrix: 
[[17  0]
 [ 0 17]]
Classification report:               precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        17

    accuracy                           1.00        34
   macro avg       1.00      1.00      1.00        34
weighted avg       1.00      1.00      1.00        34

Accuracy score is: 1.000
Balanced accuracy score is: 1.000
Precision score is: 1.000
Recall score is: 1.000
StratifiedKFold CV Balanced Accuracy: 0.976\scriptsize(±0.08)
StratifiedKFold CV Accuracy: 0.976\scriptsize(±0.08)
StratifiedKFold CV Precision: 0.980\scriptsize(±0.07)
StratifiedKFold CV Recall: 0.976\scriptsize(±0.08)
Confusion matrix: 
[[17  0]
 [ 0 17]]
Classification report:    

In [111]:
#SVM
from sklearn.svm import SVC 
cv_results1 = implement(SVC(max_iter = -1, random_state=10),
                     x_train1, y_train1, x_test1, y_test1, x_ds1, y_ds1)

cv_results2 = implement(SVC(max_iter = -1, random_state=10),
                     x_train2, y_train2, x_test2, y_test2, x_ds2, y_ds2)

cv_results3 = implement(SVC(max_iter = -1, random_state=10),
                     x_train3, y_train3, x_test3, y_test3, x_ds3, y_ds3)

cv_results4 = implement(SVC(max_iter = -1, random_state=10),
                     x_train4, y_train4, x_test4, y_test4, x_ds4, y_ds4)

cv_results5 = implement(SVC(max_iter = -1, random_state=10),
                     x_train5, y_train5, x_test5, y_test5, x_ds5, y_ds5)

cv_results6 = implement(SVC(max_iter = -1, random_state=10),
                     x_train6, y_train6, x_test6, y_test6, x_ds6, y_ds6)

cv_results7 = implement(SVC(max_iter = -1, random_state=10),
                     x_train7, y_train7, x_test7, y_test7, x_ds7, y_ds7)

cv_results8 = implement(SVC(max_iter = -1, random_state=10),
                     x_train8, y_train8, x_test8, y_test8, x_ds8, y_ds8)

svm_0_bacc= round(cv_results2['test_balanced_accuracy'].mean() - cv_results1['test_balanced_accuracy'].mean(), 3)
svm_1_bacc= round(cv_results4['test_balanced_accuracy'].mean() - cv_results3['test_balanced_accuracy'].mean(), 3)
svm_2_bacc= round(cv_results6['test_balanced_accuracy'].mean() - cv_results5['test_balanced_accuracy'].mean(), 3)
svm_3_bacc= round(cv_results8['test_balanced_accuracy'].mean() - cv_results7['test_balanced_accuracy'].mean(), 3)

svm_0_precision= round(cv_results2['test_precision_weighted'].mean() - cv_results1['test_precision_weighted'].mean(), 3)
svm_1_precision= round(cv_results4['test_precision_weighted'].mean() - cv_results3['test_precision_weighted'].mean(), 3)
svm_2_precision= round(cv_results6['test_precision_weighted'].mean() - cv_results5['test_precision_weighted'].mean(), 3)
svm_3_precision= round(cv_results8['test_precision_weighted'].mean() - cv_results7['test_precision_weighted'].mean(), 3)

svm_0_recall= round(cv_results2['test_recall_weighted'].mean() - cv_results1['test_recall_weighted'].mean(), 3)
svm_1_recall= round(cv_results4['test_recall_weighted'].mean() - cv_results3['test_recall_weighted'].mean(), 3)
svm_2_recall= round(cv_results6['test_recall_weighted'].mean() - cv_results5['test_recall_weighted'].mean(), 3)
svm_3_recall= round(cv_results8['test_recall_weighted'].mean() - cv_results7['test_recall_weighted'].mean(), 3)

Confusion matrix: 
[[216   0  11   0   1   1   4]
 [  0 118   0   0   0   0   0]
 [ 10   0 313   0   5   1   1]
 [  0   0   0 644   1   9  52]
 [  0   0   8   7 369   0  12]
 [  6   0   0  10   0 383  12]
 [  0   0   0  41  10   8 470]]
Classification report:               precision    recall  f1-score   support

    BARBUNYA       0.93      0.93      0.93       233
      BOMBAY       1.00      1.00      1.00       118
        CALI       0.94      0.95      0.95       330
    DERMASON       0.92      0.91      0.91       706
       HOROZ       0.96      0.93      0.94       396
       SEKER       0.95      0.93      0.94       411
        SIRA       0.85      0.89      0.87       529

    accuracy                           0.92      2723
   macro avg       0.94      0.93      0.94      2723
weighted avg       0.92      0.92      0.92      2723

Accuracy score is: 0.923
Balanced accuracy score is: 0.934
Precision score is: 0.924
Recall score is: 0.923
StratifiedKFold CV Balanced Accurac

In [112]:
#Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB 

cv_results1 = implement(GaussianNB(),
                     x_train1, y_train1, x_test1, y_test1, x_ds1, y_ds1)

cv_results2 = implement(GaussianNB(),
                     x_train2, y_train2, x_test2, y_test2, x_ds2, y_ds2)

cv_results3 = implement(GaussianNB(),
                     x_train3, y_train3, x_test3, y_test3, x_ds3, y_ds3)

cv_results4 = implement(GaussianNB(),
                     x_train4, y_train4, x_test4, y_test4, x_ds4, y_ds4)

cv_results5 = implement(GaussianNB(),
                     x_train5, y_train5, x_test5, y_test5, x_ds5, y_ds5)

cv_results6 = implement(GaussianNB(),
                     x_train6, y_train6, x_test6, y_test6, x_ds6, y_ds6)

cv_results7 = implement(GaussianNB(),
                     x_train7, y_train7, x_test7, y_test7, x_ds7, y_ds7)

cv_results8 = implement(GaussianNB(),
                     x_train8, y_train8, x_test8, y_test8, x_ds8, y_ds8)

gnb_0_bacc= round(cv_results2['test_balanced_accuracy'].mean() - cv_results1['test_balanced_accuracy'].mean(), 3)
gnb_1_bacc= round(cv_results4['test_balanced_accuracy'].mean() - cv_results3['test_balanced_accuracy'].mean(), 3)
gnb_2_bacc= round(cv_results6['test_balanced_accuracy'].mean() - cv_results5['test_balanced_accuracy'].mean(), 3)
gnb_3_bacc= round(cv_results8['test_balanced_accuracy'].mean() - cv_results7['test_balanced_accuracy'].mean(), 3)

gnb_0_precision= round(cv_results2['test_precision_weighted'].mean() - cv_results1['test_precision_weighted'].mean(), 3)
gnb_1_precision= round(cv_results4['test_precision_weighted'].mean() - cv_results3['test_precision_weighted'].mean(), 3)
gnb_2_precision= round(cv_results6['test_precision_weighted'].mean() - cv_results5['test_precision_weighted'].mean(), 3)
gnb_3_precision= round(cv_results8['test_precision_weighted'].mean() - cv_results7['test_precision_weighted'].mean(), 3)

gnb_0_recall= round(cv_results2['test_recall_weighted'].mean() - cv_results1['test_recall_weighted'].mean(), 3)
gnb_1_recall= round(cv_results4['test_recall_weighted'].mean() - cv_results3['test_recall_weighted'].mean(), 3)
gnb_2_recall= round(cv_results6['test_recall_weighted'].mean() - cv_results5['test_recall_weighted'].mean(), 3)
gnb_3_recall= round(cv_results8['test_recall_weighted'].mean() - cv_results7['test_recall_weighted'].mean(), 3)

Confusion matrix: 
[[179   0  42   0   0   1  11]
 [  1 117   0   0   0   0   0]
 [ 27   0 297   0   4   1   1]
 [  0   0   0 604   0  20  82]
 [  0   0  11   6 365   0  14]
 [  5   0   0   5   0 387  14]
 [  6   0   0  27  11  14 471]]
Classification report:               precision    recall  f1-score   support

    BARBUNYA       0.82      0.77      0.79       233
      BOMBAY       1.00      0.99      1.00       118
        CALI       0.85      0.90      0.87       330
    DERMASON       0.94      0.86      0.90       706
       HOROZ       0.96      0.92      0.94       396
       SEKER       0.91      0.94      0.93       411
        SIRA       0.79      0.89      0.84       529

    accuracy                           0.89      2723
   macro avg       0.90      0.90      0.90      2723
weighted avg       0.89      0.89      0.89      2723

Accuracy score is: 0.889
Balanced accuracy score is: 0.896
Precision score is: 0.892
Recall score is: 0.889
StratifiedKFold CV Balanced Accurac

In [113]:
#Random Forests
from sklearn.ensemble import RandomForestClassifier

cv_results1 = implement(RandomForestClassifier(),
                     x_train1, y_train1, x_test1, y_test1, x_ds1, y_ds1)

cv_results2 = implement(RandomForestClassifier(),
                     x_train2, y_train2, x_test2, y_test2, x_ds2, y_ds2)

cv_results3 = implement(RandomForestClassifier(),
                     x_train3, y_train3, x_test3, y_test3, x_ds3, y_ds3)

cv_results4 = implement(RandomForestClassifier(),
                     x_train4, y_train4, x_test4, y_test4, x_ds4, y_ds4)

cv_results5 = implement(RandomForestClassifier(),
                     x_train5, y_train5, x_test5, y_test5, x_ds5, y_ds5)

cv_results6 = implement(RandomForestClassifier(),
                     x_train6, y_train6, x_test6, y_test6, x_ds6, y_ds6)

cv_results7 = implement(RandomForestClassifier(),
                     x_train7, y_train7, x_test7, y_test7, x_ds7, y_ds7)

cv_results8 = implement(RandomForestClassifier(),
                     x_train8, y_train8, x_test8, y_test8, x_ds8, y_ds8)

rf_0_bacc= round(cv_results2['test_balanced_accuracy'].mean() - cv_results1['test_balanced_accuracy'].mean(), 3)
rf_1_bacc= round(cv_results4['test_balanced_accuracy'].mean() - cv_results3['test_balanced_accuracy'].mean(), 3)
rf_2_bacc= round(cv_results6['test_balanced_accuracy'].mean() - cv_results5['test_balanced_accuracy'].mean(), 3)
rf_3_bacc= round(cv_results8['test_balanced_accuracy'].mean() - cv_results7['test_balanced_accuracy'].mean(), 3)

rf_0_precision= round(cv_results2['test_precision_weighted'].mean() - cv_results1['test_precision_weighted'].mean(), 3)
rf_1_precision= round(cv_results4['test_precision_weighted'].mean() - cv_results3['test_precision_weighted'].mean(), 3)
rf_2_precision= round(cv_results6['test_precision_weighted'].mean() - cv_results5['test_precision_weighted'].mean(), 3)
rf_3_precision= round(cv_results8['test_precision_weighted'].mean() - cv_results7['test_precision_weighted'].mean(), 3)

rf_0_recall= round(cv_results2['test_recall_weighted'].mean() - cv_results1['test_recall_weighted'].mean(), 3)
rf_1_recall= round(cv_results4['test_recall_weighted'].mean() - cv_results3['test_recall_weighted'].mean(), 3)
rf_2_recall= round(cv_results6['test_recall_weighted'].mean() - cv_results5['test_recall_weighted'].mean(), 3)
rf_3_recall= round(cv_results8['test_recall_weighted'].mean() - cv_results7['test_recall_weighted'].mean(), 3)


Confusion matrix: 
[[216   0   9   0   1   2   5]
 [  0 118   0   0   0   0   0]
 [ 14   0 308   0   6   1   1]
 [  0   0   0 646   1   8  51]
 [  2   0   7   9 363   0  15]
 [  2   0   0  15   0 382  12]
 [  2   0   1  52   7   7 460]]
Classification report:               precision    recall  f1-score   support

    BARBUNYA       0.92      0.93      0.92       233
      BOMBAY       1.00      1.00      1.00       118
        CALI       0.95      0.93      0.94       330
    DERMASON       0.89      0.92      0.90       706
       HOROZ       0.96      0.92      0.94       396
       SEKER       0.95      0.93      0.94       411
        SIRA       0.85      0.87      0.86       529

    accuracy                           0.92      2723
   macro avg       0.93      0.93      0.93      2723
weighted avg       0.92      0.92      0.92      2723

Accuracy score is: 0.916
Balanced accuracy score is: 0.927
Precision score is: 0.917
Recall score is: 0.916
StratifiedKFold CV Balanced Accurac

In [None]:
#Decesion Trees
from sklearn.tree import DecisionTreeClassifier


cv_results1 = implement(DecisionTreeClassifier(),
                     x_train1, y_train1, x_test1, y_test1, x_ds1, y_ds1)

cv_results2 = implement(DecisionTreeClassifier(),
                     x_train2, y_train2, x_test2, y_test2, x_ds2, y_ds2)

cv_results3 = implement(DecisionTreeClassifier(),
                     x_train3, y_train3, x_test3, y_test3, x_ds3, y_ds3)

cv_results4 = implement(DecisionTreeClassifier(),
                     x_train4, y_train4, x_test4, y_test4, x_ds4, y_ds4)

cv_results5 = implement(DecisionTreeClassifier(),
                     x_train5, y_train5, x_test5, y_test5, x_ds5, y_ds5)

cv_results6 = implement(DecisionTreeClassifier(),
                     x_train6, y_train6, x_test6, y_test6, x_ds6, y_ds6)

cv_results7 = implement(DecisionTreeClassifier(),
                     x_train7, y_train7, x_test7, y_test7, x_ds7, y_ds7)

cv_results8 = implement(DecisionTreeClassifier(),
                     x_train8, y_train8, x_test8, y_test8, x_ds8, y_ds8)

dt_0_bacc= round(cv_results2['test_balanced_accuracy'].mean() - cv_results1['test_balanced_accuracy'].mean(), 3)
dt_1_bacc= round(cv_results4['test_balanced_accuracy'].mean() - cv_results3['test_balanced_accuracy'].mean(), 3)
dt_2_bacc= round(cv_results6['test_balanced_accuracy'].mean() - cv_results5['test_balanced_accuracy'].mean(), 3)
dt_3_bacc= round(cv_results8['test_balanced_accuracy'].mean() - cv_results7['test_balanced_accuracy'].mean(), 3)

dt_0_precision= round(cv_results2['test_precision_weighted'].mean() - cv_results1['test_precision_weighted'].mean(), 3)
dt_1_precision= round(cv_results4['test_precision_weighted'].mean() - cv_results3['test_precision_weighted'].mean(), 3)
dt_2_precision= round(cv_results6['test_precision_weighted'].mean() - cv_results5['test_precision_weighted'].mean(), 3)
dt_3_precision= round(cv_results8['test_precision_weighted'].mean() - cv_results7['test_precision_weighted'].mean(), 3)

dt_0_recall= round(cv_results2['test_recall_weighted'].mean() - cv_results1['test_recall_weighted'].mean(), 3)
dt_1_recall= round(cv_results4['test_recall_weighted'].mean() - cv_results3['test_recall_weighted'].mean(), 3)
dt_2_recall= round(cv_results6['test_recall_weighted'].mean() - cv_results5['test_recall_weighted'].mean(), 3)
dt_3_recall= round(cv_results8['test_recall_weighted'].mean() - cv_results7['test_recall_weighted'].mean(), 3)

TypeError: implement() missing 6 required positional arguments: 'x_train', 'y_train', 'x_test', 'y_test', 'x_ds', and 'y_ds'

Comparison results for futher use at LaTex table format

In [None]:
print (f"""
Beans 
& accuracy
& {knn_0_bacc}
& {ada_0_bacc}
& {svm_0_bacc}
& {gnb_0_bacc}
& {rf_0_bacc}
& {dt_0_bacc}
\\\\
Beans 
& precision 
& {knn_0_precision}
& {ada_0_precision}
& {svm_0_precision}
& {gnb_0_precision}
& {rf_0_precision}
& {dt_0_precision}
\\\\
Beans 
& {knn_0_recall}
& {ada_0_recall}
& {svm_0_recall}
& {gnb_0_recall}
& {rf_0_recall}
& {dt_0_recall}
\\\\Divorce 
& accuracy
& {knn_1_bacc}
& {ada_1_bacc}
& {svm_1_bacc}
& {gnb_1_bacc}
& {rf_1_bacc}
& {dt_1_bacc}
\\\\
Divorce 
& precision 
& {knn_1_precision}
& {ada_1_precision}
& {svm_1_precision}
& {gnb_1_precision}
& {rf_1_precision}
& {dt_1_precision}
\\\\
Divorce
& recall 
& {knn_1_recall}
& {ada_1_recall}
& {svm_1_recall}
& {gnb_1_recall}
& {rf_1_recall}
& {dt_1_recall}
\\\\Parkinson's 
& accuracy
& {knn_2_bacc}
& {ada_2_bacc}
& {svm_2_bacc}
& {gnb_2_bacc}
& {rf_2_bacc}
& {dt_2_bacc}
\\\\
Parkinson's 
& precision 
& {knn_2_precision}
& {ada_2_precision}
& {svm_2_precision}
& {gnb_2_precision}
& {rf_2_precision}
& {dt_2_precision} 
\\\\
Parkinson's 
& recall 
& {knn_2_recall}
& {ada_2_recall}
& {svm_2_recall}
& {gnb_2_recall}
& {rf_2_recall}
& {dt_2_recall}
\\\\
Rice 
& accuracy
& {knn_3_bacc}
& {ada_3_bacc}
& {svm_3_bacc}
& {gnb_3_bacc}
& {rf_3_bacc}
& {dt_3_bacc}
\\\\
Rice 
& precision 
& {knn_3_precision}
& {ada_3_precision}
& {svm_3_precision}
& {gnb_3_precision}
& {rf_3_precision}
& {dt_3_precision}
\\\\
Rice 
& recall 
& {knn_3_recall}
& {ada_3_recall}
& {svm_3_recall}
& {gnb_3_recall}
& {rf_3_recall}
& {dt_3_recall}
\\\\
""")