In [1]:
cd /content/drive/MyDrive/parttime/allmodels

/content/drive/MyDrive/parttime/allmodels


In [2]:
# from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score, accuracy_score
import numpy as np
def report_metrics(y_pred_prob, y_test, model_name):
    y_pred = y_pred_prob.argmax(axis=1)
    # Calculating AUC
    auc = roc_auc_score(y_test, y_pred_prob, multi_class='ovr', average=None)
    print(f"AUC: {auc}")
    # Calculating confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    # Calculating sensitivity and specificity for each class
    num_classes = 8
    sensitivity = np.zeros(num_classes)
    specificity = np.zeros(num_classes)
    for i in range(num_classes):
        true_positive = conf_matrix[i, i]
        false_negative = np.sum(conf_matrix[i, :]) - true_positive
        false_positive = np.sum(conf_matrix[:, i]) - true_positive
        true_negative = np.sum(conf_matrix) - true_positive - false_positive - false_negative
        sensitivity[i] = true_positive / (true_positive + false_negative)
        specificity[i] = true_negative / (true_negative + false_positive)

    # Calculating precision, recall, and F1 score
    precision = precision_score(y_test, y_pred, average=None)
    recall = recall_score(y_test, y_pred, average=None)
    f1 = f1_score(y_test, y_pred, average=None)

    accuracy = accuracy_score(y_test, y_pred)
    # Reporting the evaluation metrics
    for i in range(num_classes):
        print(f"Class {i+1} - Sensitivity: {sensitivity[i]}, Specificity: {specificity[i]}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"Accuracy: {accuracy}")
    print(f"F1 Score: {f1}")
    print(f"Averaged sensitivity: {np.mean(sensitivity)}")
    print(f"Averaged specificity: {np.mean(specificity)}")
    newd = {'Models': model_name,
            'AUC': auc,
            'Avg. Sensitivity': np.mean(sensitivity),
            'Avg. Specificity': np.mean(specificity),
            'PPV (Recall)': np.mean(recall),
            'NPV (Precision)': np.mean(precision),
            'Accuracy': np.mean(accuracy),
            'F1 Score': f1
            }
    return newd


In [8]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import KFold, StratifiedKFold

In [5]:
# Define custom dataset
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [9]:
# Define the ANN model
class ANN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return self.softmax(x)

In [12]:

# Step 1: Split data into train and test set
data = pd.read_csv('/content/drive/MyDrive/parttime/peak_table.csv')

from util import get_labels
y = get_labels(data)
print(y.shape)

X = data.drop(columns=['group', 'label'], axis=1)
kf = StratifiedKFold(n_splits=5)
for i, (train_index, test_index) in enumerate(kf.split(X, y)):
    x_fold_train, x_fold_val, y_fold_train, y_fold_val = X.iloc[train_index], X.iloc[test_index], y[train_index], y[test_index]

    print(f'**********fold--{i}**********')
    print()

    y_train_encoded = y_fold_train

    # Convert data to PyTorch tensors
    X_train_tensor = torch.Tensor(x_fold_train.values)
    y_train_tensor = torch.Tensor(y_train_encoded).long()

    # Create dataloader for training
    train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
    train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)


    model = ANN(X_train_tensor.shape[1], 8)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    num_epochs = 50
    for epoch in range(num_epochs):
        for inputs, labels in train_dataloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Step 3: Predict and calculate metrics
    # Convert test data to PyTorch tensor
    X_test_tensor = torch.Tensor(x_fold_val.values)

    # Make predictions
    with torch.no_grad():
        model.eval()
        y_pred_proba = model(X_test_tensor)
        _, y_pred = torch.max(y_pred_proba, 1)

    report_metrics(y_pred_proba, y_fold_val, f'fold_{i}_ann')


[0 0 0 0 0]
(1072,)
**********fold--0**********

AUC: [0.65585586 0.86911383 0.68720721 0.33923077 0.72689076 0.84018018
 0.83192329 0.76414414]
Class 1 - Sensitivity: 0.03333333333333333, Specificity: 0.9405405405405406
Class 2 - Sensitivity: 0.0, Specificity: 1.0
Class 3 - Sensitivity: 0.2, Specificity: 0.7783783783783784
Class 4 - Sensitivity: 0.0, Specificity: 1.0
Class 5 - Sensitivity: 0.0, Specificity: 1.0
Class 6 - Sensitivity: 0.4666666666666667, Specificity: 0.8972972972972973
Class 7 - Sensitivity: 0.4444444444444444, Specificity: 0.8781725888324873
Class 8 - Sensitivity: 0.7333333333333333, Specificity: 0.6270270270270271
Precision: [0.08333333 0.         0.12765957 0.         0.         0.42424242
 0.25       0.24175824]
Recall: [0.03333333 0.         0.2        0.         0.         0.46666667
 0.44444444 0.73333333]
Accuracy: 0.2372093023255814
F1 Score: [0.04761905 0.         0.15584416 0.         0.         0.44444444
 0.32       0.36363636]
Averaged sensitivity: 0.2347

  _warn_prf(average, modifier, msg_start, len(result))


AUC: [0.69333333 0.59362254 0.8390991  0.90384615 0.92456073 0.84414414
 0.19317541 0.86306306]
Class 1 - Sensitivity: 0.16666666666666666, Specificity: 0.918918918918919
Class 2 - Sensitivity: 0.034482758620689655, Specificity: 0.9193548387096774
Class 3 - Sensitivity: 0.6666666666666666, Specificity: 0.7621621621621621
Class 4 - Sensitivity: 0.0, Specificity: 1.0
Class 5 - Sensitivity: 0.6428571428571429, Specificity: 0.9144385026737968
Class 6 - Sensitivity: 0.36666666666666664, Specificity: 0.8540540540540541
Class 7 - Sensitivity: 0.0, Specificity: 1.0
Class 8 - Sensitivity: 0.6, Specificity: 0.8648648648648649
Precision: [0.25       0.0625     0.3125     0.         0.52941176 0.28947368
 0.         0.41860465]
Recall: [0.16666667 0.03448276 0.66666667 0.         0.64285714 0.36666667
 0.         0.6       ]
Accuracy: 0.3395348837209302
F1 Score: [0.2        0.04444444 0.42553191 0.         0.58064516 0.32352941
 0.         0.49315068]
Averaged sensitivity: 0.30966748768472907
Ave

  _warn_prf(average, modifier, msg_start, len(result))


AUC: [0.63967391 0.77483691 0.825      0.88453608 0.90610599 0.7110904
 0.1074263  0.72481884]
Class 1 - Sensitivity: 0.0, Specificity: 1.0
Class 2 - Sensitivity: 0.0, Specificity: 1.0
Class 3 - Sensitivity: 0.6333333333333333, Specificity: 0.782608695652174
Class 4 - Sensitivity: 0.0, Specificity: 1.0
Class 5 - Sensitivity: 0.6428571428571429, Specificity: 0.8978494623655914
Class 6 - Sensitivity: 0.3448275862068966, Specificity: 0.7945945945945946
Class 7 - Sensitivity: 0.0, Specificity: 1.0
Class 8 - Sensitivity: 0.5666666666666667, Specificity: 0.7119565217391305
Precision: [0.         0.         0.3220339  0.         0.48648649 0.20833333
 0.         0.24285714]
Recall: [0.         0.         0.63333333 0.         0.64285714 0.34482759
 0.         0.56666667]
Accuracy: 0.29906542056074764
F1 Score: [0.         0.         0.42696629 0.         0.55384615 0.25974026
 0.         0.34      ]
Averaged sensitivity: 0.2734605911330049
Averaged specificity: 0.8983761592939363
**********fo

  _warn_prf(average, modifier, msg_start, len(result))


AUC: [0.76105072 0.51295433 0.84764493 0.76829897 0.80203533 0.8389562
 0.96145125 0.94456522]
Class 1 - Sensitivity: 0.1, Specificity: 0.9728260869565217
Class 2 - Sensitivity: 0.10344827586206896, Specificity: 0.7351351351351352
Class 3 - Sensitivity: 0.9, Specificity: 0.6195652173913043
Class 4 - Sensitivity: 0.0, Specificity: 1.0
Class 5 - Sensitivity: 0.0, Specificity: 1.0
Class 6 - Sensitivity: 0.034482758620689655, Specificity: 0.9513513513513514
Class 7 - Sensitivity: 0.0, Specificity: 1.0
Class 8 - Sensitivity: 0.8666666666666667, Specificity: 0.8858695652173914
Precision: [0.375      0.05769231 0.27835052 0.         0.         0.1
 0.         0.55319149]
Recall: [0.1        0.10344828 0.9        0.         0.         0.03448276
 0.         0.86666667]
Accuracy: 0.2803738317757009
F1 Score: [0.15789474 0.07407407 0.42519685 0.         0.         0.05128205
 0.         0.67532468]
Averaged sensitivity: 0.25057471264367814
Averaged specificity: 0.895593419506463
**********fold--

  _warn_prf(average, modifier, msg_start, len(result))


AUC: [0.46594203 0.57238863 0.72336957 0.84845361 0.73513514 0.78583411
 0.06320862 0.58623188]
Class 1 - Sensitivity: 0.0, Specificity: 1.0
Class 2 - Sensitivity: 0.0, Specificity: 1.0
Class 3 - Sensitivity: 0.6333333333333333, Specificity: 0.7282608695652174
Class 4 - Sensitivity: 0.0, Specificity: 1.0
Class 5 - Sensitivity: 0.1724137931034483, Specificity: 0.8648648648648649
Class 6 - Sensitivity: 0.1724137931034483, Specificity: 0.8864864864864865
Class 7 - Sensitivity: 0.0, Specificity: 1.0
Class 8 - Sensitivity: 0.43333333333333335, Specificity: 0.5869565217391305
Precision: [0.         0.         0.27536232 0.         0.16666667 0.19230769
 0.         0.14606742]
Recall: [0.         0.         0.63333333 0.         0.17241379 0.17241379
 0.         0.43333333]
Accuracy: 0.19626168224299065
F1 Score: [0.         0.         0.38383838 0.         0.16949153 0.18181818
 0.         0.21848739]
Averaged sensitivity: 0.1764367816091954
Averaged specificity: 0.8833210928319624


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# y_pred

tensor([5, 5, 2, 2, 2, 5, 7, 4, 4, 2, 2, 4, 7, 5, 4, 7, 4, 2, 4, 2, 2, 7, 4, 2,
        7, 7, 4, 5, 2, 5, 7, 7, 2, 7, 4, 7, 4, 2, 2, 2, 4, 2, 4, 5, 7, 5, 4, 7,
        2, 5, 5, 2, 2, 2, 5, 4, 4, 7, 7, 5, 7, 2, 4, 2, 5, 2, 7, 2, 4, 2, 5, 7,
        4, 2, 7, 5, 4, 5, 7, 7, 2, 5, 4, 4, 2, 2, 2, 5, 4, 5, 5, 4, 5, 7, 2, 5,
        5, 7, 2, 7, 4, 4, 2, 2, 4, 7, 7, 5, 7, 4, 4, 5, 2, 2, 7, 7, 7, 4, 7, 2,
        4, 5, 2, 4, 2, 5, 5, 2, 5, 2, 7, 2, 2, 2, 2, 4, 2, 2, 2, 5, 7, 4, 4, 2,
        4, 4, 7, 4, 7, 2, 7, 4, 7, 2, 4, 4, 2, 2, 2, 4, 4, 5, 7, 7, 2, 2, 2, 2,
        5, 7, 2, 7, 2, 4, 5, 4, 2, 4, 5, 7, 2, 7, 2, 2, 4, 7, 7, 2, 2, 4, 7, 2,
        5, 5, 2, 5, 4, 4, 2, 4, 4, 2, 5, 4, 2, 2, 2, 2, 5, 5, 7, 2, 2, 7, 7])

In [None]:
# X_test_tensor.shape

torch.Size([215, 716])

In [None]:
# print(auc)

0.747740886253269


In [None]:
# report_metrics(cm)

In [None]:
# # from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, classification_report, confusion_matrix
# from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score

# def report_metrics(conf_matrix, auc, model_name):
#     # Calculating sensitivity and specificity for each class
#     num_classes = 8
#     sensitivity = np.zeros(num_classes)
#     specificity = np.zeros(num_classes)
#     for i in range(num_classes):
#         true_positive = conf_matrix[i, i]
#         false_negative = np.sum(conf_matrix[i, :]) - true_positive
#         false_positive = np.sum(conf_matrix[:, i]) - true_positive
#         true_negative = np.sum(conf_matrix) - true_positive - false_positive - false_negative
#         sensitivity[i] = true_positive / (true_positive + false_negative)
#         specificity[i] = true_negative / (true_negative + false_positive)

#     # Calculating precision, recall, and F1 score
#     precision = precision_score(y_test, y_pred, average='macro')
#     recall = recall_score(y_test, y_pred, average='macro')
#     f1 = f1_score(y_test, y_pred, average='macro')

#     accuracy = accuracy_score(y_test, y_pred)
#     # Reporting the evaluation metrics
#     for i in range(num_classes):
#         print(f"Class {i+1} - Sensitivity: {sensitivity[i]}, Specificity: {specificity[i]}")
#     print(f"Precision: {precision}")
#     print(f"Recall: {recall}")
#     print(f"Accuracy: {accuracy}")
#     print(f"F1 Score: {f1}")
#     print(f"Averaged sensitivity: {np.mean(sensitivity)}")
#     print(f"Averaged specificity: {np.mean(specificity)}")
#     newd = {'Models': model_name,
#             'AUC': auc,
#             'Avg. Sensitivity': np.mean(sensitivity),
#             'Avg. Specificity': np.mean(specificity),
#             'PPV (Recall)': recall,
#             'NPV (Precision)': precision,
#             'Accuracy': accuracy,
#             'F1 Score': f1
#             }
#     return newd


In [None]:
# results = pd.read_csv('allmodels.csv')
# results = results.drop(columns=['Unnamed: 0'])


In [None]:
# results = results.append(report_metrics(cm, auc, 'ANN'), ignore_index=True)

Class 1 - Sensitivity: 0.0, Specificity: 1.0
Class 2 - Sensitivity: 0.0, Specificity: 1.0
Class 3 - Sensitivity: 0.6666666666666666, Specificity: 0.7087912087912088
Class 4 - Sensitivity: 0.0, Specificity: 1.0
Class 5 - Sensitivity: 0.782608695652174, Specificity: 0.8229166666666666
Class 6 - Sensitivity: 0.37037037037037035, Specificity: 0.8404255319148937
Class 7 - Sensitivity: 0.0, Specificity: 1.0
Class 8 - Sensitivity: 0.25925925925925924, Specificity: 0.7819148936170213
Precision: 0.1294150641025641
Recall: 0.25986312399355876
Accuracy: 0.2651162790697674
F1 Score: 0.17157269209508014
Averaged sensitivity: 0.25986312399355876
Averaged specificity: 0.8942560376237239


  _warn_prf(average, modifier, msg_start, len(result))
  results = results.append(report_metrics(cm, auc, 'ANN'), ignore_index=True)


In [None]:
# results

Unnamed: 0,Models,AUC,Avg. Sensitivity,Avg. Specificity,PPV (Recall),NPV (Precision),Accuracy,F1 Score
0,RF,0.999595,0.970025,0.996044,0.970025,0.970539,0.972093,0.969855
1,GBM,0.99985,0.986255,0.998024,0.986255,0.984536,0.986047,0.98526
2,XGBoost,0.999672,0.989046,0.998011,0.989046,0.985609,0.986047,0.987038
3,AdaBoost,0.794536,0.430621,0.914413,0.430621,0.374069,0.404651,0.36098
4,DT,0.944938,0.904502,0.985375,0.904502,0.897027,0.897674,0.897673
5,ET,0.999639,0.986255,0.998046,0.986255,0.983748,0.986047,0.984853
6,SVC,0.997607,0.94747,0.992543,0.94747,0.953685,0.948837,0.949909
7,LR,0.857569,0.125,0.875,0.125,0.015698,0.125581,0.027893
8,KNN,0.998678,0.945376,0.991262,0.945376,0.946817,0.939535,0.942957
9,LGB,0.999876,0.987399,0.99806,0.987399,0.983631,0.986047,0.985064


In [None]:
# results.to_csv('allmodels.csv')