In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split,StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

In [2]:
def to_real(v):
    return (np.sqrt(np.real(v)*2+np.imag(v)*2))
def calculate_metrics(cm):
    num_classes = cm.shape[0]
    metrics = {}

    for class_idx in range(num_classes):
        true_positives = cm[class_idx, class_idx]
        false_positives = np.sum(cm[:, class_idx]) - true_positives
        false_negatives = np.sum(cm[class_idx, :]) - true_positives
        total_samples = np.sum(cm)
        true_negatives = total_samples-sum([true_positives,false_negatives,false_positives])

        precision = true_positives / (true_positives + false_positives)
        accuracy = true_positives / np.sum(cm[class_idx, :])
        specificity = np.sum(true_positives / (true_positives + false_negatives)) / num_classes
        sensitivity = np.sum(true_negatives / (true_negatives + false_positives)) / num_classes
        balanced_accuracy = (specificity + sensitivity) / 2
        f_score = 2 * (precision * sensitivity) / (precision + sensitivity)

        metrics[class_idx] = {
            'Precision': precision,
            'Accuracy': accuracy,
            'Specificity': specificity,
            'Sensitivity': sensitivity,
            'Balanced Accuracy': balanced_accuracy,
            'F-Score': f_score
        }

    for class_idx, metrics in metrics.items():
        print(f"Metrics for Class {class_idx}:")
        for metric, value in metrics.items():
            print(f"{metric}: {value}")    
        print()


In [3]:
df=pd.read_csv("C:/Users/HP/Desktop/Brandnew_CBP/Dataset/c2.csv")#train
tdf=pd.read_csv("C:/Users/HP/Desktop/Brandnew_CBP/Dataset/testf.csv")#test

In [4]:

X_train, X_test, y_train, y_test = df.iloc[:,:-1],tdf.iloc[:,:-1],df.iloc[:,-1],tdf.iloc[:,-1]


In [5]:
#Stratified k-fold with Naive bayes
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB




# Classifier
clf =  GaussianNB()
n_splits = 5
best_acc=0


# Initialize the Stratified K-Fold cross-validator
skf = StratifiedKFold(n_splits=n_splits)
best_train,best_test=None,None
for fold, (train_index, test_index) in enumerate(skf.split(X_train, y_train)):
    print(f"Fold {fold + 1}")

    X_train_fold, X_test_fold = X_train.iloc[train_index,:], X_train.iloc[test_index,:]
    y_train_fold, y_test_fold = y_train.iloc[train_index], y_train.iloc[test_index]
    
    # Spliting the data into training and testing sets for this fold
    clf.fit(X_train_fold, y_train_fold)
    
    
    y_pred = clf.predict(X_test_fold)
    
    # confusion matrix
    cm = confusion_matrix(y_test_fold, y_pred)
    
    acc=accuracy_score(y_pred,y_test_fold)*100
    if acc>best_acc:
        best_train=train_index
        best_test=test_index
        best_acc=acc
    print(acc)
    
   

Fold 1
89.24485125858124
Fold 2
89.7025171624714
Fold 3
92.66055045871559
Fold 4
91.74311926605505
Fold 5
78.89908256880734


In [6]:
X_train_best, X_test_best = X_train.iloc[best_train,:], X_train.iloc[best_test,:]
y_train_best, y_test_best = y_train.iloc[best_train], y_train.iloc[best_test]
clf.fit(X_train_best,y_train_best)
y_pred_best=clf.predict(X_test_best)
cm=confusion_matrix(y_test_best,y_pred_best)
print("accuracy score",accuracy_score(y_test_best,y_pred_best))
calculate_metrics(cm)


accuracy score 0.926605504587156
Metrics for Class 0:
Precision: 1.0
Accuracy: 1.0
Specificity: 0.045454545454545456
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.045454545454545456
F-Score: 0.08695652173913045

Metrics for Class 1:
Precision: 1.0
Accuracy: 0.95
Specificity: 0.04318181818181818
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.04431818181818182
F-Score: 0.08695652173913045

Metrics for Class 2:
Precision: 1.0
Accuracy: 0.95
Specificity: 0.04318181818181818
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.04431818181818182
F-Score: 0.08695652173913045

Metrics for Class 3:
Precision: 0.6060606060606061
Accuracy: 1.0
Specificity: 0.045454545454545456
Sensitivity: 0.04403409090909091
Balanced Accuracy: 0.044744318181818184
F-Score: 0.08210289346487452

Metrics for Class 4:
Precision: 0.875
Accuracy: 0.875
Specificity: 0.03977272727272727
Sensitivity: 0.04523809523809524
Balanced Accuracy: 0.042505411255411255
F-Score: 0.08602846054333764

Metrics for C

In [7]:
y_pred_test=clf.predict(X_test)
print("accuracy score",accuracy_score(y_test,y_pred_test))
cm=confusion_matrix(y_test,y_pred_test)
calculate_metrics(cm)

accuracy score 0.08040201005025126
Metrics for Class 0:
Precision: 1.0
Accuracy: 0.7
Specificity: 0.03333333333333333
Sensitivity: 0.047619047619047616
Balanced Accuracy: 0.04047619047619047
F-Score: 0.0909090909090909

Metrics for Class 1:
Precision: nan
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.047619047619047616
Balanced Accuracy: 0.023809523809523808
F-Score: nan

Metrics for Class 2:
Precision: nan
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.047619047619047616
Balanced Accuracy: 0.023809523809523808
F-Score: nan

Metrics for Class 3:
Precision: 0.0
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.0400604686318972
Balanced Accuracy: 0.0200302343159486
F-Score: 0.0

Metrics for Class 4:
Precision: nan
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.047619047619047616
Balanced Accuracy: 0.023809523809523808
F-Score: nan

Metrics for Class 5:
Precision: nan
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.047619047619047616
Balanced Accuracy: 0.023809523809523808
F-Score: nan

Metrics f

  precision = true_positives / (true_positives + false_positives)
