In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

# Load datasets from Excel files: CPA1, CPA2, ..., CPB7
datasets = {
    "CPA1": pd.read_excel('CPA1.xlsx'),
    "CPA2": pd.read_excel('CPA2.xlsx'),
    "CPA3": pd.read_excel('CPA3.xlsx'),
    "CPB1": pd.read_excel('CPB1.xlsx'),
    "CPB2": pd.read_excel('CPB2.xlsx'),
    "CPB3": pd.read_excel('CPB3.xlsx'),
    "CPB4": pd.read_excel('CPB4.xlsx'),
    "CPB5": pd.read_excel('CPB5.xlsx'),
    "CPB6": pd.read_excel('CPB6.xlsx'),
    "CPB7": pd.read_excel('CPB7.xlsx')
}

def evaluate_classifier(X, y, classifier, cv=10):
    results = []

    # 10-fold Cross Validation
    kf = KFold(n_splits=cv, shuffle=True, random_state=42)
    scores = cross_val_score(classifier, X, y, cv=kf, scoring='f1_macro')
    y_pred = classifier.fit(X, y).predict(X)
    tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    accuracy = accuracy_score(y, y_pred)
    results.append(['10-fold Cross Validation',
                    f'{accuracy:.3f}',
                    f'{scores.mean():.3f} (+/- {scores.std():.3f})',
                    f'{sensitivity:.3f}',
                    f'{specificity:.3f}'])

    # Train-test split: 75% training, 25% testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    results.append(['75% training, 25% testing',
                    f'{accuracy_score(y_test, y_pred):.3f}',
                    f'{f1_score(y_test, y_pred, average="macro"):.3f}',
                    f'{recall_score(y_test, y_pred, average="macro"):.3f}',
                    f'{precision_score(y_test, y_pred, average="macro"):.3f}'])

    # Train-validation-test split: 50% training, 25% validation, 25% testing
    X_train, X_tmp, y_train, y_tmp = train_test_split(X, y, test_size=0.5, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_tmp, y_tmp, test_size=0.5, random_state=42)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    results.append(['50% training, 25% validation, 25% testing',
                    f'{accuracy_score(y_test, y_pred):.3f}',
                    f'{f1_score(y_test, y_pred, average="macro"):.3f}',
                    f'{recall_score(y_test, y_pred, average="macro"):.3f}',
                    f'{precision_score(y_test, y_pred, average="macro"):.3f}'])

    return results

# Perform classification for each dataset and save results to Excel
all_results = []
for dataset_name, dataset in datasets.items():
    print(f"\nClassification for dataset {dataset_name}")
    X = dataset[['powLv1', 'powLv2', 'powLv3', 'powLv4', 'powLv5']]
    y = dataset['kelas']
    classifiers = {
        "SVM": SVC(),
        "K-NN": KNeighborsClassifier()
    }
    for classifier_name, classifier in classifiers.items():
        print(f"\nUsing {classifier_name}:")
        results = evaluate_classifier(X, y, classifier)
        headers = ['Evaluation', 'Accuracy', 'F1-Score', 'Sensitivity', 'Specificity']
        print(tabulate(results, headers=headers, tablefmt='github'))
        all_results.extend([(dataset_name, classifier_name, *result) for result in results])

# Create a DataFrame from all results
df_results = pd.DataFrame(all_results, columns=['Dataset', 'Classifier', 'Evaluation', 'Accuracy', 'F1-Score', 'Sensitivity', 'Specificity'])

# Save results to Excel
df_results.to_excel('classification_results.xlsx', index=False)
print("Results saved to 'classification_results.xlsx'")



Classification for dataset CPA1

Using SVM:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.525 | 0.351 (+/- 0.097) |          0.89 |          0.16 |
| 75% training, 25% testing                 |      0.46  | 0.315             |          0.5  |          0.23 |
| 50% training, 25% validation, 25% testing |      0.42  | 0.296             |          0.5  |          0.21 |

Using K-NN:
| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.825 | 0.732 (+/- 0.093) |         0.78  |         0.87  |
| 75% training, 25% testing                 |      0.76  | 0.758             |         0.758 |     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |       0.66 | 0.402 (+/- 0.145) |           0.8 |          0.52 |
| 75% training, 25% testing                 |       0.46 | 0.315             |           0.5 |          0.23 |
| 50% training, 25% validation, 25% testing |       0.42 | 0.296             |           0.5 |          0.21 |

Using K-NN:
| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.795 | 0.728 (+/- 0.141) |         0.72  |         0.87  |
| 75% training, 25% testing                 |      0.82  | 0.819             |         0.82  |     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.81  | 0.688 (+/- 0.119) |         0.62  |         0.905 |
| 75% training, 25% testing                 |      0.773 | 0.723             |         0.71  |         0.756 |
| 50% training, 25% validation, 25% testing |      0.773 | 0.715             |         0.707 |         0.727 |

Classification for dataset CPB1

Using SVM:
| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.635 | 0.400 (+/- 0.159) |          0.99 |          0.28 |
| 75% training, 25% testing                 |      0.46  | 0.315   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |       0.89 | 0.879 (+/- 0.051) |         0.89  |         0.89  |
| 75% training, 25% testing                 |       0.88 | 0.879             |         0.879 |         0.879 |
| 50% training, 25% validation, 25% testing |       0.8  | 0.799             |         0.808 |         0.8   |

Classification for dataset CPB2

Using SVM:
| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |       0.64 | 0.399 (+/- 0.157) |           1   |          0.28 |
| 75% training, 25% testing                 |       0.46 | 0.315   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |       0.9  | 0.860 (+/- 0.058) |         0.93  |         0.87  |
| 75% training, 25% testing                 |       0.88 | 0.880             |         0.882 |         0.88  |
| 50% training, 25% validation, 25% testing |       0.8  | 0.800             |         0.821 |         0.821 |

Classification for dataset CPB3

Using SVM:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.805 | 0.446 (+/- 0.217) |          0.99 |          0.62 |
| 75% training, 25% testing                 |      0.46  | 0.315             |          0.5  |          0.23 |
| 50% training, 25% validation, 25% testing |      0.42  | 0.296             |          0.5  |          0.21 |

Using K-NN:
| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.905 | 0.862 (+/- 0.082) |         0.94  |         0.87  |
| 75% training, 25% testing                 |      0.88  | 0.879             |         0.879 |     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.915 | 0.886 (+/- 0.077) |         0.96  |         0.87  |
| 75% training, 25% testing                 |      0.88  | 0.879             |         0.879 |         0.879 |
| 50% training, 25% validation, 25% testing |      0.88  | 0.879             |         0.89  |         0.88  |

Classification for dataset CPB5

Using SVM:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.667 | 0.398 (+/- 0.037) |           0   |         1     |
| 75% training, 25% testing                 |      0.667 | 0.400             |           0.5 |         0.333 |
| 50% training, 25% validation, 25% testing |      0.707 | 0.414             |           0.5 |         0.353 |

Using K-NN:
| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.883 | 0.849 (+/- 0.058) |         0.84  |         0.905 |
| 75% training, 25% testing                 |      0.88  | 0.869             |         0.88  |     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.667 | 0.398 (+/- 0.037) |           0   |         1     |
| 75% training, 25% testing                 |      0.667 | 0.400             |           0.5 |         0.333 |
| 50% training, 25% validation, 25% testing |      0.707 | 0.414             |           0.5 |         0.353 |

Using K-NN:
| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.9   | 0.830 (+/- 0.070) |         0.9   |         0.9   |
| 75% training, 25% testing                 |      0.853 | 0.842             |         0.86  |     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.723 | 0.436 (+/- 0.150) |         0.995 |          0.45 |
| 75% training, 25% testing                 |      0.48  | 0.324             |         0.5   |          0.24 |
| 50% training, 25% validation, 25% testing |      0.54  | 0.351             |         0.5   |          0.27 |

Using K-NN:
| Evaluation                                |   Accuracy | F1-Score          |   Sensitivity |   Specificity |
|-------------------------------------------|------------|-------------------|---------------|---------------|
| 10-fold Cross Validation                  |      0.895 | 0.851 (+/- 0.051) |         0.915 |         0.875 |
| 75% training, 25% testing                 |      0.86  | 0.860             |         0.862 |     