In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split,StratifiedKFold
from sklearn.metrics import accuracy_score


In [2]:
def to_real(v):
    return (np.sqrt(np.real(v)*2+np.imag(v)*2))
def calculate_metrics(cm):
    num_classes = cm.shape[0]
    metrics = {}

    for class_idx in range(num_classes):
        true_positives = cm[class_idx, class_idx]
        false_positives = np.sum(cm[:, class_idx]) - true_positives
        false_negatives = np.sum(cm[class_idx, :]) - true_positives
        total_samples = np.sum(cm)
        true_negatives = total_samples-sum([true_positives,false_negatives,false_positives])

        precision = true_positives / (true_positives + false_positives)
        accuracy = true_positives / np.sum(cm[class_idx, :])
        specificity = np.sum(true_positives / (true_positives + false_negatives)) / num_classes
        sensitivity = np.sum(true_negatives / (true_negatives + false_positives)) / num_classes
        balanced_accuracy = (specificity + sensitivity) / 2
        f_score = 2 * (precision * sensitivity) / (precision + sensitivity)

        metrics[class_idx] = {
            'Precision': precision,
            'Accuracy': accuracy,
            'Specificity': specificity,
            'Sensitivity': sensitivity,
            'Balanced Accuracy': balanced_accuracy,
            'F-Score': f_score
        }

    for class_idx, metrics in metrics.items():
        print(f"Metrics for Class {class_idx}:")
        for metric, value in metrics.items():
            print(f"{metric}: {value}")    
        print()
# calculate_metrics(cm)

In [3]:
df=pd.read_csv("C:/Users/HP/Desktop/Brandnew_CBP/Dataset/c2.csv")#train
tdf=pd.read_csv("C:/Users/HP/Desktop/Brandnew_CBP/Dataset/testf.csv")#test

In [4]:

X_train, X_test, y_train, y_test = df.iloc[:,:-1],tdf.iloc[:,:-1],df.iloc[:,-1],tdf.iloc[:,-1]


In [5]:
#Stratified k-fold with Decisiontree
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
n_splits = 5
best_acc=0
# Classifier
clf = DecisionTreeClassifier()
# Initialize the Stratified K-Fold cross-validator
skf = StratifiedKFold(n_splits=n_splits)
best_train,best_test=None,None
for fold, (train_index, test_index) in enumerate(skf.split(X_train, y_train)):
    print(f"Fold {fold + 1}")

    X_train_fold, X_test_fold = X_train.iloc[train_index,:], X_train.iloc[test_index,:]
    y_train_fold, y_test_fold = y_train.iloc[train_index], y_train.iloc[test_index]
    # Spliting the data into training and testing sets for this fold
    clf.fit(X_train_fold, y_train_fold)
    
    # predictioning on the testing data
    y_pred = clf.predict(X_test_fold)
    
    # confusion matrix
    cm = confusion_matrix(y_test_fold, y_pred)
    
    acc=accuracy_score(y_pred,y_test_fold)*100
    if acc>best_acc:
        best_train=train_index
        best_test=test_index
        best_acc=acc
    print(acc)
    


Fold 1
92.44851258581235
Fold 2
94.50800915331807
Fold 3
95.64220183486239
Fold 4
97.24770642201835
Fold 5
82.11009174311926


In [6]:
X_train_best, X_test_best = X_train.iloc[best_train,:], X_train.iloc[best_test,:]
y_train_best, y_test_best = y_train.iloc[best_train], y_train.iloc[best_test]
clf.fit(X_train_best,y_train_best)
y_pred_best=clf.predict(X_test_best)
cm=confusion_matrix(y_test_best,y_pred_best)
print("accuracy score",accuracy_score(y_test_best,y_pred_best))
calculate_metrics(cm)


accuracy score 0.9655963302752294
Metrics for Class 0:
Precision: 1.0
Accuracy: 0.95
Specificity: 0.04318181818181818
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.04431818181818182
F-Score: 0.08695652173913045

Metrics for Class 1:
Precision: 1.0
Accuracy: 1.0
Specificity: 0.045454545454545456
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.045454545454545456
F-Score: 0.08695652173913045

Metrics for Class 2:
Precision: 1.0
Accuracy: 1.0
Specificity: 0.045454545454545456
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.045454545454545456
F-Score: 0.08695652173913045

Metrics for Class 3:
Precision: 1.0
Accuracy: 0.9
Specificity: 0.04090909090909091
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.04318181818181818
F-Score: 0.08695652173913045

Metrics for Class 4:
Precision: 0.9411764705882353
Accuracy: 1.0
Specificity: 0.045454545454545456
Sensitivity: 0.04534632034632035
Balanced Accuracy: 0.0454004329004329
F-Score: 0.08652387989391017

Metrics for Class

In [7]:
y_pred_test=clf.predict(X_test)
print("accuracy score",accuracy_score(y_test,y_pred_test))
cm=confusion_matrix(y_test,y_pred_test)
calculate_metrics(cm)

accuracy score 0.135678391959799
Metrics for Class 0:
Precision: 0.4117647058823529
Accuracy: 0.7
Specificity: 0.031818181818181815
Sensitivity: 0.04304954304954305
Balanced Accuracy: 0.03743386243386243
F-Score: 0.07794954741982645

Metrics for Class 1:
Precision: 0.6666666666666666
Accuracy: 0.2
Specificity: 0.009090909090909092
Sensitivity: 0.04521404521404521
Balanced Accuracy: 0.027152477152477154
F-Score: 0.08468468468468468

Metrics for Class 2:
Precision: 0.0
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.04473304473304473
Balanced Accuracy: 0.022366522366522364
F-Score: 0.0

Metrics for Class 3:
Precision: 0.0
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.042809042809042804
Balanced Accuracy: 0.021404521404521402
F-Score: 0.0

Metrics for Class 4:
Precision: 0.1
Accuracy: 0.1
Specificity: 0.004545454545454546
Sensitivity: 0.04329004329004329
Balanced Accuracy: 0.023917748917748918
F-Score: 0.060422960725075525

Metrics for Class 5:
Precision: 0.0
Accuracy: 0.0
Specificity: 0.0


  precision = true_positives / (true_positives + false_positives)
  accuracy = true_positives / np.sum(cm[class_idx, :])
  specificity = np.sum(true_positives / (true_positives + false_negatives)) / num_classes
