In [None]:
import pandas as pd
import numpy as np
import math
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("/content/k-means_clustered_dataset.csv")
df.head()

Unnamed: 0,Year,Access to electricity (% of population),Agricultural land (% of land area),"Annual freshwater withdrawals, total (% of internal resources)",Arable land (% of land area),Forest area (% of land area),Electric power consumption (kWh per capita),Energy use (kg of oil equivalent per capita),Renewable electricity output (% of total electricity output),Renewable energy consumption (% of total final energy consumption),Population growth (annual %),GDP per capita (current US$),CO2 emissions (metric tons per capita),Cluster
0,2000,4.446891,57.945817,43.015907,11.779587,1.852782,1586.59112,985.730004,74.989094,44.99,1.443803,182.174037,0.055167,1
1,2001,9.294527,57.94735,43.015907,11.779587,1.852782,1587.375364,1011.679617,72.81146,45.6,0.742517,182.174037,0.055293,1
2,2002,14.133616,57.939684,43.015907,11.771921,1.852782,1649.718098,1034.410867,79.063971,37.83,6.449321,182.174037,0.06681,1
3,2003,18.971165,58.083805,43.015907,11.916042,1.852782,1738.666619,1010.524231,70.249729,36.66,7.541019,199.643228,0.073005,1
4,2004,23.814182,58.151266,43.015907,11.983503,1.852782,1841.168267,1121.869767,70.890841,44.24,3.933178,221.830531,0.054867,1


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = df.drop(['Cluster'], axis=1)
y = df['Cluster']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (4468, 13)
X_test shape: (1118, 13)
y_train shape: (4468,)
y_test shape: (1118,)


#KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold

In [None]:
from sklearn.metrics import confusion_matrix

# Initialize KNN classifier
knn = KNeighborsClassifier()


# Fit the classifier on the training data
knn.fit(X_train, y_train)

# Predict on the test data
y_pred = knn.predict(X_test)

# Generate classification report
report = classification_report(y_test, y_pred, output_dict=True)

# Get unique labels from the 'Outcome' column
labels = y.unique()

# Dictionary to store evaluation metrics
scores = {label: {'specificity': None, 'sensitivity': None, 'precision': None, 'recall': None, 'f1_score': None, 'accuracy': None} for label in labels}

# Generate confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=labels)

# Extract evaluation metrics for each label
for idx, label in enumerate(labels):
    tp = cm[idx, idx]  # True Positive
    fn = cm[idx, :].sum() - tp  # False Negative
    fp = cm[:, idx].sum() - tp  # False Positive
    tn = cm.sum() - (tp + fn + fp)  # True Negative

    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    precision = report[str(label)]['precision']
    recall = report[str(label)]['recall']
    f1_score = report[str(label)]['f1-score']
    accuracy = report['accuracy']

    scores[label]['specificity'] = specificity
    scores[label]['sensitivity'] = sensitivity
    scores[label]['precision'] = precision
    scores[label]['recall'] = recall
    scores[label]['f1_score'] = f1_score
    scores[label]['accuracy'] = accuracy

# Print evaluation metrics for each label
for label in labels:
    print(f"\nLabel '{label}' Scores:")
    print("Specificity:", scores[label]['specificity'])
    print("Sensitivity:", scores[label]['sensitivity'])
    print("Precision:", scores[label]['precision'])
    print("Recall:", scores[label]['recall'])
    print("F1 Score:", scores[label]['f1_score'])
    print("Accuracy:", scores[label]['accuracy'])

# Summarize results
print("\nOverall accuracy:", report['accuracy'])


Label '1' Scores:
Specificity: 0.9915254237288136
Sensitivity: 0.9977324263038548
Precision: 0.9977324263038548
Recall: 0.9977324263038548
F1 Score: 0.9977324263038548
Accuracy: 0.9955277280858676

Label '2' Scores:
Specificity: 0.9977876106194691
Sensitivity: 0.985981308411215
Precision: 0.9906103286384976
Recall: 0.985981308411215
F1 Score: 0.9882903981264637
Accuracy: 0.9955277280858676

Label '0' Scores:
Specificity: 0.9990875912408759
Sensitivity: 1.0
Precision: 0.9565217391304348
Recall: 1.0
F1 Score: 0.9777777777777777
Accuracy: 0.9955277280858676

Overall accuracy: 0.9955277280858676


#KNN with k-fold

In [None]:
# Initialize KNN classifier
knn = KNeighborsClassifier()

# Initialize KFold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Get unique labels from the 'Outcome' column
labels = y.unique()

# Dictionary to store evaluation metrics
avg_scores = {label: {'specificity': [], 'sensitivity': [], 'precision': [], 'recall': [], 'f1_score': [], 'accuracy': []} for label in labels}

# Perform KFold cross-validation
for i, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Fit the classifier on the training data
    knn.fit(X_train, y_train)

    # Predict on the test data
    y_pred = knn.predict(X_test)

    # Generate classification report
    report = classification_report(y_test, y_pred, output_dict=True)

    # Generate confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels=labels)

    # Extract evaluation metrics for each label
    for idx, label in enumerate(labels):
        tp = cm[idx, idx]  # True Positive
        fn = cm[idx, :].sum() - tp  # False Negative
        fp = cm[:, idx].sum() - tp  # False Positive
        tn = cm.sum() - (tp + fn + fp)  # True Negative

        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
        precision = report[str(label)]['precision']
        recall = report[str(label)]['recall']
        f1_score = report[str(label)]['f1-score']
        accuracy = report['accuracy']

        avg_scores[label]['specificity'].append(specificity)
        avg_scores[label]['sensitivity'].append(sensitivity)
        avg_scores[label]['precision'].append(precision)
        avg_scores[label]['recall'].append(recall)
        avg_scores[label]['f1_score'].append(f1_score)
        avg_scores[label]['accuracy'].append(accuracy)

    # Print evaluation metrics for each label after each iteration
    print(f"\nFold {i+1}:")
    for label in labels:
        print(f"\nLabel '{label}' Scores:")
        print("Specificity:", avg_scores[label]['specificity'][-1])
        print("Sensitivity:", avg_scores[label]['sensitivity'][-1])
        print("Precision:", avg_scores[label]['precision'][-1])
        print("Recall:", avg_scores[label]['recall'][-1])
        print("F1 Score:", avg_scores[label]['f1_score'][-1])
        print("Accuracy:", avg_scores[label]['accuracy'][-1])

# Summarize results across all folds
print("\nAverage scores across all folds:")
for label in labels:
    print(f"\nLabel '{label}' Average Scores:")
    print("Specificity:", sum(avg_scores[label]['specificity']) / len(avg_scores[label]['specificity']))
    print("Sensitivity:", sum(avg_scores[label]['sensitivity']) / len(avg_scores[label]['sensitivity']))
    print("Precision:", sum(avg_scores[label]['precision']) / len(avg_scores[label]['precision']))
    print("Recall:", sum(avg_scores[label]['recall']) / len(avg_scores[label]['recall']))
    print("F1 Score:", sum(avg_scores[label]['f1_score']) / len(avg_scores[label]['f1_score']))
    print("Accuracy:", sum(avg_scores[label]['accuracy']) / len(avg_scores[label]['accuracy']))



Fold 1:

Label '1' Scores:
Specificity: 0.9915254237288136
Sensitivity: 0.9977324263038548
Precision: 0.9977324263038548
Recall: 0.9977324263038548
F1 Score: 0.9977324263038548
Accuracy: 0.9955277280858676

Label '2' Scores:
Specificity: 0.9977876106194691
Sensitivity: 0.985981308411215
Precision: 0.9906103286384976
Recall: 0.985981308411215
F1 Score: 0.9882903981264637
Accuracy: 0.9955277280858676

Label '0' Scores:
Specificity: 0.9990875912408759
Sensitivity: 1.0
Precision: 0.9565217391304348
Recall: 1.0
F1 Score: 0.9777777777777777
Accuracy: 0.9955277280858676

Fold 2:

Label '1' Scores:
Specificity: 1.0
Sensitivity: 0.9988558352402745
Precision: 1.0
Recall: 0.9988558352402745
F1 Score: 0.9994275901545506
Accuracy: 0.9964189794091316

Label '2' Scores:
Specificity: 0.9966996699669967
Sensitivity: 0.9951923076923077
Precision: 0.9857142857142858
Recall: 0.9951923076923077
F1 Score: 0.9904306220095694
Accuracy: 0.9964189794091316

Label '0' Scores:
Specificity: 0.9990757855822551
Sen

#Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Initialize Decision Tree classifier
dt = DecisionTreeClassifier()

# Fit the classifier on the training data
dt.fit(X_train, y_train)

# Predict on the test data
y_pred = dt.predict(X_test)

# Generate classification report
report = classification_report(y_test, y_pred, output_dict=True)

# Get unique labels from the 'Outcome' column
labels = y.unique()

# Dictionary to store evaluation metrics
avg_scores = {label: {'specificity': None, 'sensitivity': None, 'precision': None, 'recall': None, 'f1_score': None, 'accuracy': None} for label in labels}

# Generate confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=labels)

# Extract evaluation metrics for each label
for idx, label in enumerate(labels):
    print(f"\nLabel '{label}' Scores:")
    tp = cm[idx, idx]  # True Positive
    fn = cm[idx, :].sum() - tp  # False Negative
    fp = cm[:, idx].sum() - tp  # False Positive
    tn = cm.sum() - (tp + fn + fp)  # True Negative

    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    precision = report[str(label)]['precision']
    recall = report[str(label)]['recall']
    f1_score = report[str(label)]['f1-score']
    accuracy = report['accuracy']

    print("Specificity:", specificity)
    print("Sensitivity:", sensitivity)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1_score)
    print("Accuracy:", accuracy)

# Summarize results
print("\nOverall accuracy:", report['accuracy'])


Label '1' Scores:
Specificity: 1.0
Sensitivity: 0.9943820224719101
Precision: 1.0
Recall: 0.9943820224719101
F1 Score: 0.9971830985915493
Accuracy: 0.9955237242614146

Label '2' Scores:
Specificity: 0.9946524064171123
Sensitivity: 1.0
Precision: 0.9732620320855615
Recall: 1.0
F1 Score: 0.986449864498645
Accuracy: 0.9955237242614146

Label '0' Scores:
Specificity: 1.0
Sensitivity: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Accuracy: 0.9955237242614146

Overall accuracy: 0.9955237242614146


In [None]:
from sklearn.tree import DecisionTreeClassifier

# Initialize Decision Tree classifier
dt = DecisionTreeClassifier()

# Initialize KFold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Get unique labels from the 'Outcome' column
labels = y.unique()

# Dictionary to store evaluation metrics
avg_scores = {label: {'specificity': [], 'sensitivity': [], 'precision': [], 'recall': [], 'f1_score': [], 'accuracy': []} for label in labels}

# Perform KFold cross-validation
for i, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Fit the classifier on the training data
    dt.fit(X_train, y_train)

    # Predict on the test data
    y_pred = dt.predict(X_test)

    # Generate classification report
    report = classification_report(y_test, y_pred, output_dict=True)

    # Generate confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels=labels)

    # Extract evaluation metrics for each label
    for idx, label in enumerate(labels):
        tp = cm[idx, idx]  # True Positive
        fn = cm[idx, :].sum() - tp  # False Negative
        fp = cm[:, idx].sum() - tp  # False Positive
        tn = cm.sum() - (tp + fn + fp)  # True Negative

        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
        precision = report[str(label)]['precision']
        recall = report[str(label)]['recall']
        f1_score = report[str(label)]['f1-score']
        accuracy = report['accuracy']

        avg_scores[label]['specificity'].append(specificity)
        avg_scores[label]['sensitivity'].append(sensitivity)
        avg_scores[label]['precision'].append(precision)
        avg_scores[label]['recall'].append(recall)
        avg_scores[label]['f1_score'].append(f1_score)
        avg_scores[label]['accuracy'].append(accuracy)

    # Print evaluation metrics for each label after each iteration
    print(f"Fold {i+1}:")
    for label in labels:
        print(f"\nLabel '{label}' Scores:")
        print("Specificity:", avg_scores[label]['specificity'][-1])
        print("Sensitivity:", avg_scores[label]['sensitivity'][-1])
        print("Precision:", avg_scores[label]['precision'][-1])
        print("Recall:", avg_scores[label]['recall'][-1])
        print("F1 Score:", avg_scores[label]['f1_score'][-1])
        print("Accuracy:", avg_scores[label]['accuracy'][-1])

# Summarize results across all folds
print("\nAverage scores across all folds:")
for label in labels:
    print(f"\nLabel '{label}' Average Scores:")
    print("Specificity:", sum(avg_scores[label]['specificity']) / len(avg_scores[label]['specificity']))
    print("Sensitivity:", sum(avg_scores[label]['sensitivity']) / len(avg_scores[label]['sensitivity']))
    print("Precision:", sum(avg_scores[label]['precision']) / len(avg_scores[label]['precision']))
    print("Recall:", sum(avg_scores[label]['recall']) / len(avg_scores[label]['recall']))
    print("F1 Score:", sum(avg_scores[label]['f1_score']) / len(avg_scores[label]['f1_score']))
    print("Accuracy:", sum(avg_scores[label]['accuracy']) / len(avg_scores[label]['accuracy']))

Fold 1:

Label '1' Scores:
Specificity: 0.9872881355932204
Sensitivity: 0.9988662131519275
Precision: 0.996606334841629
Recall: 0.9988662131519275
F1 Score: 0.9977349943374858
Accuracy: 0.9946332737030411

Label '2' Scores:
Specificity: 0.9988938053097345
Sensitivity: 0.9766355140186916
Precision: 0.9952380952380953
Recall: 0.9766355140186916
F1 Score: 0.9858490566037736
Accuracy: 0.9946332737030411

Label '0' Scores:
Specificity: 0.9981751824817519
Sensitivity: 1.0
Precision: 0.9166666666666666
Recall: 1.0
F1 Score: 0.9565217391304348
Accuracy: 0.9946332737030411
Fold 2:

Label '1' Scores:
Specificity: 1.0
Sensitivity: 0.9977116704805492
Precision: 1.0
Recall: 0.9977116704805492
F1 Score: 0.9988545246277205
Accuracy: 0.9973142345568488

Label '2' Scores:
Specificity: 0.9966996699669967
Sensitivity: 1.0
Precision: 0.985781990521327
Recall: 1.0
F1 Score: 0.9928400954653938
Accuracy: 0.9973142345568488

Label '0' Scores:
Specificity: 1.0
Sensitivity: 0.9714285714285714
Precision: 1.0
Rec

#Neural Network

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix


# Initialize Neural Network classifier
nn = MLPClassifier(random_state=42)

# Fit the classifier on the training data
nn.fit(X_train, y_train)

# Predict on the test data
y_pred = nn.predict(X_test)

# Generate classification report
report = classification_report(y_test, y_pred, output_dict=True)

# Get unique labels from the 'Outcome' column
labels = y.unique()

# Dictionary to store evaluation metrics
scores = {label: {'specificity': 0, 'sensitivity': 0, 'precision': 0, 'recall': 0, 'f1_score': 0, 'accuracy': 0} for label in labels}

# Generate confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=labels)

# Extract evaluation metrics for each label
for idx, label in enumerate(labels):
    tp = cm[idx, idx]  # True Positive
    fn = cm[idx, :].sum() - tp  # False Negative
    fp = cm[:, idx].sum() - tp  # False Positive
    tn = cm.sum() - (tp + fn + fp)  # True Negative

    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    precision = report[str(label)]['precision']
    recall = report[str(label)]['recall']
    f1_score = report[str(label)]['f1-score']
    accuracy = report['accuracy']

    scores[label]['specificity'] = specificity
    scores[label]['sensitivity'] = sensitivity
    scores[label]['precision'] = precision
    scores[label]['recall'] = recall
    scores[label]['f1_score'] = f1_score
    scores[label]['accuracy'] = accuracy

# Print evaluation metrics for each label
for label in labels:
    print(f"\nLabel '{label}' Scores:")
    print("Specificity:", scores[label]['specificity'])
    print("Sensitivity:", scores[label]['sensitivity'])
    print("Precision:", scores[label]['precision'])
    print("Recall:", scores[label]['recall'])
    print("F1 Score:", scores[label]['f1_score'])
    print("Accuracy:", scores[label]['accuracy'])

# Summarize results
print("\nOverall accuracy:", report['accuracy'])



Label '1' Scores:
Specificity: 1.0
Sensitivity: 0.9741573033707865
Precision: 1.0
Recall: 0.9741573033707865
F1 Score: 0.9869095048377916
Accuracy: 0.9776186213070726

Label '2' Scores:
Specificity: 0.9732620320855615
Sensitivity: 1.0
Precision: 0.8792270531400966
Recall: 1.0
F1 Score: 0.9357326478149101
Accuracy: 0.9776186213070726

Label '0' Scores:
Specificity: 1.0
Sensitivity: 0.9555555555555556
Precision: 1.0
Recall: 0.9555555555555556
F1 Score: 0.9772727272727273
Accuracy: 0.9776186213070726

Overall accuracy: 0.9776186213070726


In [None]:
# Initialize Neural Network classifier
nn = MLPClassifier(random_state=42, max_iter=1000)

# Initialize KFold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Get unique labels from the 'Outcome' column
labels = y.unique()

# Dictionary to store evaluation metrics
avg_scores = {label: {'specificity': [], 'sensitivity': [], 'precision': [], 'recall': [], 'f1_score': [], 'accuracy': []} for label in labels}

# Perform KFold cross-validation
for i, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Fit the classifier on the training data
    nn.fit(X_train, y_train)

    # Predict on the test data
    y_pred = nn.predict(X_test)

    # Generate classification report
    report = classification_report(y_test, y_pred, output_dict=True)

    # Generate confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels=labels)

    # Extract evaluation metrics for each label
    for idx, label in enumerate(labels):
        tp = cm[idx, idx]  # True Positive
        fn = cm[idx, :].sum() - tp  # False Negative
        fp = cm[:, idx].sum() - tp  # False Positive
        tn = cm.sum() - (tp + fn + fp)  # True Negative

        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
        precision = report[str(label)]['precision']
        recall = report[str(label)]['recall']
        f1_score = report[str(label)]['f1-score']
        accuracy = report['accuracy']

        avg_scores[label]['specificity'].append(specificity)
        avg_scores[label]['sensitivity'].append(sensitivity)
        avg_scores[label]['precision'].append(precision)
        avg_scores[label]['recall'].append(recall)
        avg_scores[label]['f1_score'].append(f1_score)
        avg_scores[label]['accuracy'].append(accuracy)

    # Print evaluation metrics for each label after each iteration
    print(f"Fold {i+1}:")
    for label in labels:
        print(f"\nLabel '{label}' Scores:")
        print("Specificity:", avg_scores[label]['specificity'][-1])
        print("Sensitivity:", avg_scores[label]['sensitivity'][-1])
        print("Precision:", avg_scores[label]['precision'][-1])
        print("Recall:", avg_scores[label]['recall'][-1])
        print("F1 Score:", avg_scores[label]['f1_score'][-1])
        print("Accuracy:", avg_scores[label]['accuracy'][-1])

# Summarize results across all folds
print("\nAverage scores across all folds:")
for label in labels:
    print(f"\nLabel '{label}' Average Scores:")
    print("Specificity:", sum(avg_scores[label]['specificity']) / len(avg_scores[label]['specificity']))
    print("Sensitivity:", sum(avg_scores[label]['sensitivity']) / len(avg_scores[label]['sensitivity']))
    print("Precision:", sum(avg_scores[label]['precision']) / len(avg_scores[label]['precision']))
    print("Recall:", sum(avg_scores[label]['recall']) / len(avg_scores[label]['recall']))
    print("F1 Score:", sum(avg_scores[label]['f1_score']) / len(avg_scores[label]['f1_score']))
    print("Accuracy:", sum(avg_scores[label]['accuracy']) / len(avg_scores[label]['accuracy']))

Fold 1:

Label '1' Scores:
Specificity: 0.9957627118644068
Sensitivity: 0.9807256235827665
Precision: 0.9988452655889145
Recall: 0.9807256235827665
F1 Score: 0.9897025171624715
Accuracy: 0.9821109123434705

Label '2' Scores:
Specificity: 0.9789823008849557
Sensitivity: 0.9953271028037384
Precision: 0.9181034482758621
Recall: 0.9953271028037384
F1 Score: 0.9551569506726457
Accuracy: 0.9821109123434705

Label '0' Scores:
Specificity: 1.0
Sensitivity: 0.9090909090909091
Precision: 1.0
Recall: 0.9090909090909091
F1 Score: 0.9523809523809523
Accuracy: 0.9821109123434705
Fold 2:

Label '1' Scores:
Specificity: 0.9917695473251029
Sensitivity: 0.9965675057208238
Precision: 0.997709049255441
Recall: 0.9965675057208238
F1 Score: 0.9971379507727534
Accuracy: 0.97224709042077

Label '2' Scores:
Specificity: 0.9966996699669967
Sensitivity: 0.8653846153846154
Precision: 0.9836065573770492
Recall: 0.8653846153846154
F1 Score: 0.9207161125319693
Accuracy: 0.97224709042077

Label '0' Scores:
Specificit

#Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
# Initialize Naive Bayes classifier (GaussianNB for continuous features)
nb = GaussianNB()

# Fit the classifier on the entire training data
nb.fit(X_train, y_train)

# Predict on the test data
y_pred = nb.predict(X_test)

# Generate classification report
report = classification_report(y_test, y_pred, output_dict=True)

# Generate confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=nb.classes_)

# Dictionary to store evaluation metrics
avg_scores = {label: {'specificity': 0, 'sensitivity': 0, 'precision': 0, 'recall': 0, 'f1_score': 0, 'accuracy': 0} for label in labels}

# Extract evaluation metrics for each label
for idx, label in enumerate(nb.classes_):
    print(f"\nLabel '{label}' Scores:")
    tp = cm[idx, idx]  # True Positive
    fn = cm[idx, :].sum() - tp  # False Negative
    fp = cm[:, idx].sum() - tp  # False Positive
    tn = cm.sum() - (tp + fn + fp)  # True Negative

    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    precision = report[str(label)]['precision']
    recall = report[str(label)]['recall']
    f1_score = report[str(label)]['f1-score']
    accuracy = report['accuracy']

    print("Specificity:", specificity)
    print("Sensitivity:", sensitivity)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1_score)
    print("Accuracy:", accuracy)

# Summarize results
print("\nOverall accuracy:", report['accuracy'])


Label '0' Scores:
Specificity: 0.9832089552238806
Sensitivity: 0.9555555555555556
Precision: 0.7049180327868853
Recall: 0.9555555555555556
F1 Score: 0.8113207547169811
Accuracy: 0.9158460161145927

Label '1' Scores:
Specificity: 0.9911894273127754
Sensitivity: 0.9191011235955057
Precision: 0.9975609756097561
Recall: 0.9191011235955057
F1 Score: 0.9567251461988303
Accuracy: 0.9158460161145927

Label '2' Scores:
Specificity: 0.9208556149732621
Sensitivity: 0.8901098901098901
Precision: 0.6864406779661016
Recall: 0.8901098901098901
F1 Score: 0.7751196172248803
Accuracy: 0.9158460161145927

Overall accuracy: 0.9158460161145927


In [None]:
# Initialize Naive Bayes classifier
nb = GaussianNB()

# Initialize KFold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Get unique labels from the 'Outcome' column
labels = y.unique()

# Dictionary to store evaluation metrics
avg_scores = {label: {'specificity': [], 'sensitivity': [], 'precision': [], 'recall': [], 'f1_score': [], 'accuracy': []} for label in labels}

# Perform KFold cross-validation
for i, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Fit the classifier on the training data
    nb.fit(X_train, y_train)

    # Predict on the test data
    y_pred = nb.predict(X_test)

    # Generate classification report
    report = classification_report(y_test, y_pred, output_dict=True)

    # Generate confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels=labels)

    # Extract evaluation metrics for each label
    for idx, label in enumerate(labels):
        tp = cm[idx, idx]  # True Positive
        fn = cm[idx, :].sum() - tp  # False Negative
        fp = cm[:, idx].sum() - tp  # False Positive
        tn = cm.sum() - (tp + fn + fp)  # True Negative

        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
        precision = report[str(label)]['precision']
        recall = report[str(label)]['recall']
        f1_score = report[str(label)]['f1-score']
        accuracy = report['accuracy']

        avg_scores[label]['specificity'].append(specificity)
        avg_scores[label]['sensitivity'].append(sensitivity)
        avg_scores[label]['precision'].append(precision)
        avg_scores[label]['recall'].append(recall)
        avg_scores[label]['f1_score'].append(f1_score)
        avg_scores[label]['accuracy'].append(accuracy)

    # Print evaluation metrics for each label after each iteration
    print(f"Fold {i+1}:")
    for label in labels:
        print(f"\nLabel '{label}' Scores:")
        print("Specificity:", avg_scores[label]['specificity'][-1])
        print("Sensitivity:", avg_scores[label]['sensitivity'][-1])
        print("Precision:", avg_scores[label]['precision'][-1])
        print("Recall:", avg_scores[label]['recall'][-1])
        print("F1 Score:", avg_scores[label]['f1_score'][-1])
        print("Accuracy:", avg_scores[label]['accuracy'][-1])

# Summarize results across all folds
print("\nAverage scores across all folds:")
for label in labels:
    print(f"\nLabel '{label}' Average Scores:")
    print("Specificity:", sum(avg_scores[label]['specificity']) / len(avg_scores[label]['specificity']))
    print("Sensitivity:", sum(avg_scores[label]['sensitivity']) / len(avg_scores[label]['sensitivity']))
    print("Precision:", sum(avg_scores[label]['precision']) / len(avg_scores[label]['precision']))
    print("Recall:", sum(avg_scores[label]['recall']) / len(avg_scores[label]['recall']))
    print("F1 Score:", sum(avg_scores[label]['f1_score']) / len(avg_scores[label]['f1_score']))
    print("Accuracy:", sum(avg_scores[label]['accuracy']) / len(avg_scores[label]['accuracy']))


Fold 1:

Label '1' Scores:
Specificity: 1.0
Sensitivity: 0.9036281179138322
Precision: 1.0
Recall: 0.9036281179138322
F1 Score: 0.9493746277546158
Accuracy: 0.9087656529516994

Label '2' Scores:
Specificity: 0.9059734513274337
Sensitivity: 0.9205607476635514
Precision: 0.6985815602836879
Recall: 0.9205607476635514
F1 Score: 0.7943548387096774
Accuracy: 0.9087656529516994

Label '0' Scores:
Specificity: 0.9844890510948905
Sensitivity: 1.0
Precision: 0.5641025641025641
Recall: 1.0
F1 Score: 0.7213114754098361
Accuracy: 0.9087656529516994
Fold 2:

Label '1' Scores:
Specificity: 1.0
Sensitivity: 0.8947368421052632
Precision: 1.0
Recall: 0.8947368421052632
F1 Score: 0.9444444444444444
Accuracy: 0.88272157564906

Label '2' Scores:
Specificity: 0.8965896589658966
Sensitivity: 0.8221153846153846
Precision: 0.6452830188679245
Recall: 0.8221153846153846
F1 Score: 0.7230443974630021
Accuracy: 0.88272157564906

Label '0' Scores:
Specificity: 0.9658040665434381
Sensitivity: 0.9428571428571428
Preci

#SVM Linear

In [None]:
from sklearn.svm import SVC

In [None]:
# Initialize SVM linear classifier
svm = SVC(kernel='linear')


# Fit the classifier on the training data
svm.fit(X_train, y_train)

# Predict on the test data
y_pred = svm.predict(X_test)

# Generate classification report
report = classification_report(y_test, y_pred, output_dict=True)

# Generate confusion matrix
labels = y.unique()
cm = confusion_matrix(y_test, y_pred, labels=labels)

# Print evaluation metrics
# Iterate over indices and labels simultaneously
for idx, label in enumerate(labels):
    print(f"\nLabel '{label}' Scores:")
    tp = cm[idx, idx]  # True Positive
    fn = cm[idx, :].sum() - tp  # False Negative
    fp = cm[:, idx].sum() - tp  # False Positive
    tn = cm.sum() - (tp + fn + fp)  # True Negative

    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    precision = report[str(label)]['precision']
    recall = report[str(label)]['recall']
    f1_score = report[str(label)]['f1-score']
    accuracy = report['accuracy']

    print("Specificity:", specificity)
    print("Sensitivity:", sensitivity)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1_score)
    print("Accuracy:", accuracy)
# Summarize results
print("\nOverall accuracy:", report['accuracy'])


Label '1' Scores:
Specificity: 1.0
Sensitivity: 0.9977528089887641
Precision: 1.0
Recall: 0.9977528089887641
F1 Score: 0.998875140607424
Accuracy: 0.9973142345568488

Label '2' Scores:
Specificity: 0.9967914438502674
Sensitivity: 1.0
Precision: 0.9837837837837838
Recall: 1.0
F1 Score: 0.9918256130790191
Accuracy: 0.9973142345568488

Label '0' Scores:
Specificity: 1.0
Sensitivity: 0.9777777777777777
Precision: 1.0
Recall: 0.9777777777777777
F1 Score: 0.9887640449438202
Accuracy: 0.9973142345568488

Overall accuracy: 0.9973142345568488


In [None]:
# Initialize SVM classifier with a linear kernel
svm = SVC(kernel='linear')

# Initialize KFold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Get unique labels from the 'Outcome' column
labels = y.unique()

# Dictionary to store evaluation metrics
avg_scores = {label: {'specificity': [], 'sensitivity': [], 'precision': [], 'recall': [], 'f1_score': [], 'accuracy': []} for label in labels}

# Perform KFold cross-validation
for i, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Fit the classifier on the training data
    svm.fit(X_train, y_train)

    # Predict on the test data
    y_pred = svm.predict(X_test)

    # Generate classification report
    report = classification_report(y_test, y_pred, output_dict=True)

    # Generate confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels=labels)

    # Extract evaluation metrics for each label
    for idx, label in enumerate(labels):
        tp = cm[idx, idx]  # True Positive
        fn = cm[idx, :].sum() - tp  # False Negative
        fp = cm[:, idx].sum() - tp  # False Positive
        tn = cm.sum() - (tp + fn + fp)  # True Negative

        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
        precision = report[str(label)]['precision']
        recall = report[str(label)]['recall']
        f1_score = report[str(label)]['f1-score']
        accuracy = report['accuracy']

        avg_scores[label]['specificity'].append(specificity)
        avg_scores[label]['sensitivity'].append(sensitivity)
        avg_scores[label]['precision'].append(precision)
        avg_scores[label]['recall'].append(recall)
        avg_scores[label]['f1_score'].append(f1_score)
        avg_scores[label]['accuracy'].append(accuracy)

    # Print evaluation metrics for each label after each iteration
    print(f"Fold {i+1}:")
    for label in labels:
        print(f"\nLabel '{label}' Scores:")
        print("Specificity:", avg_scores[label]['specificity'][-1])
        print("Sensitivity:", avg_scores[label]['sensitivity'][-1])
        print("Precision:", avg_scores[label]['precision'][-1])
        print("Recall:", avg_scores[label]['recall'][-1])
        print("F1 Score:", avg_scores[label]['f1_score'][-1])
        print("Accuracy:", avg_scores[label]['accuracy'][-1])

# Summarize results across all folds
print("\nAverage scores across all folds:")
for label in labels:
    print(f"\nLabel '{label}' Average Scores:")
    print("Specificity:", sum(avg_scores[label]['specificity']) / len(avg_scores[label]['specificity']))
    print("Sensitivity:", sum(avg_scores[label]['sensitivity']) / len(avg_scores[label]['sensitivity']))
    print("Precision:", sum(avg_scores[label]['precision']) / len(avg_scores[label]['precision']))
    print("Recall:", sum(avg_scores[label]['recall']) / len(avg_scores[label]['recall']))
    print("F1 Score:", sum(avg_scores[label]['f1_score']) / len(avg_scores[label]['f1_score']))
    print("Accuracy:", sum(avg_scores[label]['accuracy']) / len(avg_scores[label]['accuracy']))


Fold 1:

Label '1' Scores:
Specificity: 1.0
Sensitivity: 0.9977324263038548
Precision: 1.0
Recall: 0.9977324263038548
F1 Score: 0.9988649262202043
Accuracy: 0.9973166368515206

Label '2' Scores:
Specificity: 0.9977876106194691
Sensitivity: 0.9953271028037384
Precision: 0.9906976744186047
Recall: 0.9953271028037384
F1 Score: 0.9930069930069931
Accuracy: 0.9973166368515206

Label '0' Scores:
Specificity: 0.9990875912408759
Sensitivity: 1.0
Precision: 0.9565217391304348
Recall: 1.0
F1 Score: 0.9777777777777777
Accuracy: 0.9973166368515206
Fold 2:

Label '1' Scores:
Specificity: 1.0
Sensitivity: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Accuracy: 1.0

Label '2' Scores:
Specificity: 1.0
Sensitivity: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Accuracy: 1.0

Label '0' Scores:
Specificity: 1.0
Sensitivity: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Accuracy: 1.0
Fold 3:

Label '1' Scores:
Specificity: 1.0
Sensitivity: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Accuracy: 0.999104744852

#SVM Non-Linear

In [None]:
# Initialize SVM classifier with a non-linear kernel (e.g., 'rbf')
svm = SVC(kernel='rbf')

# Fit the classifier on the entire dataset
svm.fit(X, y)

# Predict on the same dataset
y_pred = svm.predict(X)

# Generate classification report
report = classification_report(y, y_pred, output_dict=True)

# Get unique labels from the 'Outcome' column
labels = y.unique()

# Dictionary to store evaluation metrics
avg_scores = {label: {'specificity': 0, 'sensitivity': 0, 'precision': 0, 'recall': 0, 'f1_score': 0, 'accuracy': 0} for label in labels}

# Generate confusion matrix
cm = confusion_matrix(y, y_pred, labels=labels)

# Extract evaluation metrics for each label
for idx, label in enumerate(labels):
    tp = cm[idx, idx]  # True Positive
    fn = cm[idx, :].sum() - tp  # False Negative
    fp = cm[:, idx].sum() - tp  # False Positive
    tn = cm.sum() - (tp + fn + fp)  # True Negative

    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    precision = report[str(label)]['precision']
    recall = report[str(label)]['recall']
    f1_score = report[str(label)]['f1-score']
    accuracy = report['accuracy']


# Print evaluation metrics for each label
print("Evaluation Metrics for SVM Non-Linear Classification:")
for label in labels:
    print(f"\nLabel '{label}' Scores:")
    print("Specificity:", specificity)
    print("Sensitivity:", sensitivity)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1_score)
    print("Accuracy:", accuracy)

# Summarize results
print("\nOverall accuracy:", report['accuracy'])


Evaluation Metrics for SVM Non-Linear Classification:

Label '1' Scores:
Specificity: 0.9998154981549815
Sensitivity: 0.9879518072289156
Precision: 0.9939393939393939
Recall: 0.9879518072289156
F1 Score: 0.9909365558912386
Accuracy: 0.9978517722878625

Label '2' Scores:
Specificity: 0.9998154981549815
Sensitivity: 0.9879518072289156
Precision: 0.9939393939393939
Recall: 0.9879518072289156
F1 Score: 0.9909365558912386
Accuracy: 0.9978517722878625

Label '0' Scores:
Specificity: 0.9998154981549815
Sensitivity: 0.9879518072289156
Precision: 0.9939393939393939
Recall: 0.9879518072289156
F1 Score: 0.9909365558912386
Accuracy: 0.9978517722878625

Overall accuracy: 0.9978517722878625


In [None]:
# Initialize SVM classifier with a non-linear kernel (RBF)
svm = SVC(kernel='rbf', probability=True)

# Initialize KFold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Get unique labels from the 'Outcome' column
labels = y.unique()

# Dictionary to store evaluation metrics
avg_scores = {label: {'specificity': [], 'sensitivity': [], 'precision': [], 'recall': [], 'f1_score': [], 'accuracy': []} for label in labels}

# Perform KFold cross-validation
for i, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Fit the classifier on the training data
    svm.fit(X_train, y_train)

    # Predict on the test data
    y_pred = svm.predict(X_test)

    # Generate classification report
    report = classification_report(y_test, y_pred, output_dict=True)

    # Generate confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels=labels)

    # Extract evaluation metrics for each label
    for idx, label in enumerate(labels):
        tp = cm[idx, idx]  # True Positive
        fn = cm[idx, :].sum() - tp  # False Negative
        fp = cm[:, idx].sum() - tp  # False Positive
        tn = cm.sum() - (tp + fn + fp)  # True Negative

        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
        precision = report[str(label)]['precision']
        recall = report[str(label)]['recall']
        f1_score = report[str(label)]['f1-score']
        accuracy = report['accuracy']

        avg_scores[label]['specificity'].append(specificity)
        avg_scores[label]['sensitivity'].append(sensitivity)
        avg_scores[label]['precision'].append(precision)
        avg_scores[label]['recall'].append(recall)
        avg_scores[label]['f1_score'].append(f1_score)
        avg_scores[label]['accuracy'].append(accuracy)

    # Print evaluation metrics for each label after each iteration
    print(f"Fold {i+1}:")
    for label in labels:
        print(f"\nLabel '{label}' Scores:")
        print("Specificity:", avg_scores[label]['specificity'][-1])
        print("Sensitivity:", avg_scores[label]['sensitivity'][-1])
        print("Precision:", avg_scores[label]['precision'][-1])
        print("Recall:", avg_scores[label]['recall'][-1])
        print("F1 Score:", avg_scores[label]['f1_score'][-1])
        print("Accuracy:", avg_scores[label]['accuracy'][-1])

# Summarize results across all folds
print("\nAverage scores across all folds:")
for label in labels:
    print(f"\nLabel '{label}' Average Scores:")
    print("Specificity:", sum(avg_scores[label]['specificity']) / len(avg_scores[label]['specificity']))
    print("Sensitivity:", sum(avg_scores[label]['sensitivity']) / len(avg_scores[label]['sensitivity']))
    print("Precision:", sum(avg_scores[label]['precision']) / len(avg_scores[label]['precision']))
    print("Recall:", sum(avg_scores[label]['recall']) / len(avg_scores[label]['recall']))
    print("F1 Score:", sum(avg_scores[label]['f1_score']) / len(avg_scores[label]['f1_score']))
    print("Accuracy:", sum(avg_scores[label]['accuracy']) / len(avg_scores[label]['accuracy']))


Fold 1:

Label '1' Scores:
Specificity: 0.9915254237288136
Sensitivity: 0.9977324263038548
Precision: 0.9977324263038548
Recall: 0.9977324263038548
F1 Score: 0.9977324263038548
Accuracy: 0.9955277280858676

Label '2' Scores:
Specificity: 0.9977876106194691
Sensitivity: 0.985981308411215
Precision: 0.9906103286384976
Recall: 0.985981308411215
F1 Score: 0.9882903981264637
Accuracy: 0.9955277280858676

Label '0' Scores:
Specificity: 0.9990875912408759
Sensitivity: 1.0
Precision: 0.9565217391304348
Recall: 1.0
F1 Score: 0.9777777777777777
Accuracy: 0.9955277280858676
Fold 2:

Label '1' Scores:
Specificity: 1.0
Sensitivity: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Accuracy: 0.999104744852283

Label '2' Scores:
Specificity: 0.9988998899889989
Sensitivity: 1.0
Precision: 0.9952153110047847
Recall: 1.0
F1 Score: 0.9976019184652278
Accuracy: 0.999104744852283

Label '0' Scores:
Specificity: 1.0
Sensitivity: 0.9714285714285714
Precision: 1.0
Recall: 0.9714285714285714
F1 Score: 0.98550724637