In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

def classify_and_evaluate(scenario, classifier):
    total_sensitivity = total_specificity = total_accuracy = total_f1 = 0

    datasets = [f'drive/My Drive/CP/CPA{i}.xlsx' for i in range(1, 4)] + [f'drive/My Drive/CP/CPB{i}.xlsx' for i in range(1, 8)]

    scaler = StandardScaler()

    for dataset in datasets:
        data = pd.read_excel(dataset)

        X = data.drop('kelas', axis=1)
        y = data['kelas']

        X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

        if classifier == 'SVM':
            clf = svm.SVC()
        elif classifier == 'K-NN':
            clf = KNeighborsClassifier()
        else:
            print('Classifier not recognized')
            return

        if scenario == '10-fold CV':
            skf = StratifiedKFold(n_splits=10)

            sensitivity = specificity = accuracy = f1 = 0

            for train_index, test_index in skf.split(X, y):
                X_train, X_test = X.iloc[train_index], X.iloc[test_index]
                y_train, y_test = y.iloc[train_index], y.iloc[test_index]

                clf.fit(X_train, y_train)
                y_pred = clf.predict(X_test)

                report = classification_report(y_test, y_pred, output_dict=True)
                cm = confusion_matrix(y_test, y_pred)
                sensitivity += report['weighted avg']['recall']
                specificity += cm[0, 0] / (cm[0, 0] + cm[0, 1])
                accuracy += report['accuracy']
                f1 += report['weighted avg']['f1-score']

            sensitivity /= 10
            specificity /= 10
            accuracy /= 10
            f1 /= 10

        elif scenario in ['75-25', '50-25-25']:
            if scenario == '75-25':
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
            else:  # 50-25-25
                X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.5, random_state=42)
                X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)

            report = classification_report(y_test, y_pred, output_dict=True)
            cm = confusion_matrix(y_test, y_pred)
            sensitivity = report['weighted avg']['recall']
            specificity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
            accuracy = report['accuracy']
            f1 = report['weighted avg']['f1-score']

        else:
            print('Scenario not recognized')
            return

        print(f'Dataset {dataset}:')
        print(f'Sensitivity: {sensitivity}')
        print(f'Specificity: {specificity}')
        print(f'Accuracy: {accuracy}')
        print(f'F1-Score: {f1}\n')

        total_sensitivity += sensitivity
        total_specificity += specificity
        total_accuracy += accuracy
        total_f1 += f1

    total_sensitivity /= 10
    total_specificity /= 10
    total_accuracy /= 10
    total_f1 /= 10

    print('Average Performance Parameters:')
    print(f'Sensitivity: {total_sensitivity}')
    print(f'Specificity: {total_specificity}')
    print(f'Accuracy: {total_accuracy}')
    print(f'F1-Score: {total_f1}')


In [None]:
# Scenario '10-fold CV', '75-25', '50-25-25'
# Classifier 'SVM', 'K-NN'
classify_and_evaluate('50-25-25', 'SVM')

Dataset drive/My Drive/CP/CPA1.xlsx:
Sensitivity: 0.8
Specificity: 0.7931034482758621
Accuracy: 0.8
F1-Score: 0.8009740259740261

Dataset drive/My Drive/CP/CPA2.xlsx:
Sensitivity: 0.7
Specificity: 0.6896551724137931
Accuracy: 0.7
F1-Score: 0.7018181818181819

Dataset drive/My Drive/CP/CPA3.xlsx:
Sensitivity: 0.7466666666666667
Specificity: 0.7924528301886793
Accuracy: 0.7466666666666667
F1-Score: 0.751062452661296

Dataset drive/My Drive/CP/CPB1.xlsx:
Sensitivity: 0.8
Specificity: 0.7586206896551724
Accuracy: 0.8
F1-Score: 0.8012882447665056

Dataset drive/My Drive/CP/CPB2.xlsx:
Sensitivity: 0.78
Specificity: 0.7241379310344828
Accuracy: 0.78
F1-Score: 0.7813247691690086

Dataset drive/My Drive/CP/CPB3.xlsx:
Sensitivity: 0.88
Specificity: 0.8275862068965517
Accuracy: 0.88
F1-Score: 0.8807729468599034

Dataset drive/My Drive/CP/CPB4.xlsx:
Sensitivity: 0.88
Specificity: 0.8275862068965517
Accuracy: 0.88
F1-Score: 0.8807729468599034

Dataset drive/My Drive/CP/CPB5.xlsx:
Sensitivity: 0.893