In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split,StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import svm

In [2]:
def to_real(v):
    return (np.sqrt(np.real(v)*2+np.imag(v)*2))
def calculate_metrics(cm):
    num_classes = cm.shape[0]
    metrics = {}

    for class_idx in range(num_classes):
        true_positives = cm[class_idx, class_idx]
        false_positives = np.sum(cm[:, class_idx]) - true_positives
        false_negatives = np.sum(cm[class_idx, :]) - true_positives
        total_samples = np.sum(cm)
        true_negatives = total_samples-sum([true_positives,false_negatives,false_positives])

        precision = true_positives / (true_positives + false_positives)
        accuracy = true_positives / np.sum(cm[class_idx, :])
        specificity = np.sum(true_positives / (true_positives + false_negatives)) / num_classes
        sensitivity = np.sum(true_negatives / (true_negatives + false_positives)) / num_classes
        balanced_accuracy = (specificity + sensitivity) / 2
        f_score = 2 * (precision * sensitivity) / (precision + sensitivity)

        metrics[class_idx] = {
            'Precision': precision,
            'Accuracy': accuracy,
            'Specificity': specificity,
            'Sensitivity': sensitivity,
            'Balanced Accuracy': balanced_accuracy,
            'F-Score': f_score
        }

    for class_idx, metrics in metrics.items():
        print(f"Metrics for Class {class_idx}:")
        for metric, value in metrics.items():
            print(f"{metric}: {value}")    
        print()
# calculate_metrics(cm)

In [3]:
df=pd.read_csv("C:/Users/HP/Desktop/Brandnew_CBP/Dataset/c2.csv")
tdf=pd.read_csv("C:/Users/HP/Desktop/Brandnew_CBP/Dataset/testf.csv")#test

In [4]:
#Using mid value approach
mid_col=625//2
for i in range(len(df.columns)-1):
    if i==str(mid_col):
        continue
    df[str(i)]=abs(df[str(i)]-df[str(mid_col)])
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,616,617,618,619,620,621,622,623,624,625
0,1,0,102,94,81,86,93,113,133,134,...,103,105,102,92,82,83,95,107,117,1
1,0,1,89,92,85,94,102,112,135,136,...,99,105,102,88,77,81,94,105,111,1
2,1,100,68,91,84,83,111,0,0,2,...,112,109,100,96,94,85,96,0,0,1
3,0,0,129,86,84,86,87,105,113,119,...,92,98,100,92,80,77,86,96,120,1
4,2,2,93,82,80,90,85,109,120,125,...,90,98,101,92,78,74,83,94,114,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2177,73,34,5,18,19,7,19,35,58,78,...,81,77,87,0,0,0,0,0,0,60
2178,75,37,8,8,15,18,17,33,42,66,...,84,77,88,0,0,0,0,0,0,60
2179,76,53,13,0,16,16,11,25,38,65,...,75,87,91,0,0,0,0,0,0,60
2180,79,67,11,4,20,12,21,24,38,62,...,77,84,90,101,0,0,0,0,0,60


In [5]:
#Using mid value approach
# mid_col=625//2
for i in range(len(tdf.columns)-1):
    if i==str(mid_col):
        continue
    tdf[str(i)]=abs(tdf[str(i)]-tdf[str(mid_col)])
tdf

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,616,617,618,619,620,621,622,623,624,625
0,90,89,90,33,49,57,52,40,37,27,...,53,48,48,21,53,75,77,73,84,1
1,0,2,91,77,76,85,82,112,1,0,...,106,100,97,96,89,79,80,89,101,1
2,0,0,86,72,87,87,82,109,0,1,...,92,95,95,87,77,74,82,92,103,1
3,0,0,88,64,93,86,88,110,112,119,...,92,96,96,87,75,72,82,94,106,1
4,1,1,91,72,82,86,84,106,117,117,...,98,98,96,89,77,72,81,94,105,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,111,111,109,111,111,110,111,111,108,111,...,89,118,0,0,0,0,0,93,76,60
195,118,119,120,119,120,119,120,120,120,120,...,122,0,0,0,0,0,1,0,73,60
196,118,119,120,119,120,119,120,120,120,120,...,122,0,0,0,0,0,1,0,73,60
197,118,119,120,119,120,119,120,120,120,120,...,122,0,0,0,0,0,1,0,73,60


In [6]:

X_train, X_test, y_train, y_test = df.iloc[:,:-1],tdf.iloc[:,:-1],df.iloc[:,-1],tdf.iloc[:,-1]


In [7]:
#stratified k fold using SVC
from sklearn.metrics import confusion_matrix
from sklearn import svm

clf = svm.SVC()
n_splits = 5
best_acc=0
skf = StratifiedKFold(n_splits=n_splits)

    
best_train,best_test=None,None
for fold, (train_index, test_index) in enumerate(skf.split(X_train, y_train)):
    print(f"Fold {fold + 1}")
    X_train_fold, X_test_fold = X_train.iloc[train_index,:], X_train.iloc[test_index,:]
    y_train_fold, y_test_fold = y_train.iloc[train_index], y_train.iloc[test_index]

    clf.fit(X_train_fold, y_train_fold)
    
    # predictioning on the testing data
    y_pred = clf.predict(X_test_fold)
    

    cm = confusion_matrix(y_test_fold, y_pred)
    
    acc=accuracy_score(y_pred,y_test_fold)*100
    if acc>best_acc:
        best_train=train_index
        best_test=test_index
        best_acc=acc
    print(acc)
   


# for i, accuracy in enumerate(fold_accuracy):


Fold 1
99.08466819221968
Fold 2
99.08466819221968
Fold 3
99.54128440366972
Fold 4
98.62385321100918
Fold 5
89.90825688073394


In [8]:
X_train_best, X_test_best = X_train.iloc[best_train,:], X_train.iloc[best_test,:]
y_train_best, y_test_best = y_train.iloc[best_train], y_train.iloc[best_test]
clf.fit(X_train_best,y_train_best)
y_pred_best=clf.predict(X_test_best)
cm=confusion_matrix(y_test_best,y_pred_best)
print("accuracy score",accuracy_score(y_test_best,y_pred_best))
calculate_metrics(cm)


accuracy score 0.9954128440366973
Metrics for Class 0:
Precision: 1.0
Accuracy: 1.0
Specificity: 0.045454545454545456
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.045454545454545456
F-Score: 0.08695652173913045

Metrics for Class 1:
Precision: 1.0
Accuracy: 1.0
Specificity: 0.045454545454545456
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.045454545454545456
F-Score: 0.08695652173913045

Metrics for Class 2:
Precision: 1.0
Accuracy: 1.0
Specificity: 0.045454545454545456
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.045454545454545456
F-Score: 0.08695652173913045

Metrics for Class 3:
Precision: 1.0
Accuracy: 1.0
Specificity: 0.045454545454545456
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.045454545454545456
F-Score: 0.08695652173913045

Metrics for Class 4:
Precision: 1.0
Accuracy: 1.0
Specificity: 0.045454545454545456
Sensitivity: 0.045454545454545456
Balanced Accuracy: 0.045454545454545456
F-Score: 0.08695652173913045

Metrics for Class 5:
Preci

In [9]:
y_pred_test=clf.predict(X_test)
print("accuracy score",accuracy_score(y_test,y_pred_test))
cm=confusion_matrix(y_test,y_pred_test)
calculate_metrics(cm)

accuracy score 0.06532663316582915
Metrics for Class 0:
Precision: 1.0
Accuracy: 0.7
Specificity: 0.03333333333333333
Sensitivity: 0.047619047619047616
Balanced Accuracy: 0.04047619047619047
F-Score: 0.0909090909090909

Metrics for Class 1:
Precision: nan
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.047619047619047616
Balanced Accuracy: 0.023809523809523808
F-Score: nan

Metrics for Class 2:
Precision: nan
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.047619047619047616
Balanced Accuracy: 0.023809523809523808
F-Score: nan

Metrics for Class 3:
Precision: 0.0
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.023683547493071302
Balanced Accuracy: 0.011841773746535651
F-Score: 0.0

Metrics for Class 4:
Precision: 0.0
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.047367094986142604
Balanced Accuracy: 0.023683547493071302
F-Score: 0.0

Metrics for Class 5:
Precision: 0.0
Accuracy: 0.0
Specificity: 0.0
Sensitivity: 0.042328042328042326
Balanced Accuracy: 0.021164021164021163
F-Score: 0.0

Metri

  precision = true_positives / (true_positives + false_positives)
