In [2]:
import wfdb
import pandas as pd
import numpy as np
import os
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.dists_kernels.dtw import DtwDtaidistMultiv
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score

In [15]:
data_abs_path = '/Users/victor/physionet.org/files/mimic-iv-ecg/1.0/files'

split_csv = pd.read_csv('data/MIMIC-IV-ECG-Ext-Electrolytes/few_shot_splits/128shots/split1/Calcium50893.csv')

split_csv = split_csv[split_csv['subject_id'] <= 12000000] # i haven't downloaded the full dataset yet

classifiers = pd.read_csv('data/MIMIC-IV-ECG-Ext-Electrolytes/mimiciv_ECGv1.1_hospV2.2_Calcium50893.csv')

train = []
test = []
val = []

for index, row in split_csv.iterrows():
    path = data_abs_path + '/p' + f"{row['subject_id']}"[:4] + '/p' + f"{row['subject_id']}"
    samples = os.listdir(path)
    
    for sample in samples: # for each sample (subject) in the split, take all their studies
        sample_path = os.path.join(path, sample)
        if os.path.isdir(sample_path):
            if int(sample[1:]) not in classifiers['study_id'].values:
                continue
            signal, fields = wfdb.rdsamp(sample_path + '/' + sample[1:])

            if row['split'] == 'train':
                train.append(signal)
            elif row['split'] == 'test':
                test.append(signal)
            elif row['split'] == 'val':
                val.append(signal)

In [16]:
reshaped_train = np.empty((len(train), 12), dtype=object)

for i in range(len(train)):
    for j in range(12):
        reshaped_train[i, j] = pd.Series(train[i][:][j]) # reshaping from (# of subjects, 5000, 12) to (# of subjects, 12) where each entry is a pd.series of length 5000

X_train = pd.DataFrame(reshaped_train)
X_train.shape

(39, 12)

In [17]:
subject_ids = split_csv['subject_id']
filtered_classifiers = classifiers[classifiers['subject_id'].isin(split_csv[split_csv['split'] == 'train']['subject_id'])] # only take the classifier rows that are relevant by subject_id
y_train = filtered_classifiers['flag']
y_train = y_train.replace({'abnormal': 1, np.nan: 0}) # abnormal = 1, normal = 0
y_train.shape

  y_train = y_train.replace({'abnormal': 1, np.nan: 0}) # abnormal = 1, normal = 0


(39,)

In [18]:
reshaped_test = np.empty((len(test), 12), dtype=object)
for i in range(len(test)):
    for j in range(12):
        reshaped_test[i, j] = pd.Series(test[i][:][j])

X_test = pd.DataFrame(reshaped_test)
X_test = X_test.map(lambda x: x.fillna(0) if isinstance(x, pd.Series) else x) # there were NaNs in the data, which is odd, so I'm filling them with 0
X_test.shape

(3262, 12)

In [19]:
subject_ids = split_csv['subject_id']
filtered_classifiers = classifiers[classifiers['subject_id'].isin(split_csv[split_csv['split'] == 'test']['subject_id'])]
filtered_classifiers = filtered_classifiers.drop_duplicates(subset=['study_id']) # there are duplicates in the calcium data for the same study_id, which is odd
y_test = filtered_classifiers['flag']
y_test = y_test.replace({'abnormal': 1, np.nan: 0})
y_test.shape

  y_test = y_test.replace({'abnormal': 1, np.nan: 0})


(3262,)

In [23]:
knn_classifier = KNeighborsTimeSeriesClassifier(n_neighbors=8, distance=DtwDtaidistMultiv())
knn_classifier.fit(X_train, y_train)

In [24]:
y_pred = knn_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

auroc = roc_auc_score(y_test, y_pred)

f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"AUROC: {auroc}")
print(f"F1 Score: {f1}")

Accuracy: 0.7857142857142857
AUROC: 0.5
F1 Score: 0.0


In [25]:
for i in range(1, 17):
    knn_classifier = KNeighborsTimeSeriesClassifier(n_neighbors=i, distance=DtwDtaidistMultiv())
    knn_classifier.fit(X_train, y_train)

    y_pred = knn_classifier.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    auroc = roc_auc_score(y_test, y_pred)

    f1 = f1_score(y_test, y_pred)

    print(f"# of Neighbors: {i}")
    print(f"Accuracy: {accuracy}")
    print(f"AUROC: {auroc}")
    print(f"F1 Score: {f1}")

# of Neighbors: 1
Accuracy: 0.6778050275904353
AUROC: 0.4994797763038107
F1 Score: 0.19954303122619954
# of Neighbors: 2
Accuracy: 0.7798896382587369
AUROC: 0.49941474834178695
F1 Score: 0.01643835616438356
# of Neighbors: 3
Accuracy: 0.7645616186388718
AUROC: 0.4985043568734556
F1 Score: 0.056511056511056514
# of Neighbors: 4
Accuracy: 0.7841814837522992
AUROC: 0.5005852516582131
F1 Score: 0.008450704225352112
# of Neighbors: 5
Accuracy: 0.7798896382587369
AUROC: 0.49837430094940827
F1 Score: 0.011019283746556474
# of Neighbors: 6
Accuracy: 0.7857142857142857
AUROC: 0.5
F1 Score: 0.0
# of Neighbors: 7
Accuracy: 0.7857142857142857
AUROC: 0.5
F1 Score: 0.0
# of Neighbors: 8
Accuracy: 0.7857142857142857
AUROC: 0.5
F1 Score: 0.0
# of Neighbors: 9
Accuracy: 0.7857142857142857
AUROC: 0.5
F1 Score: 0.0
# of Neighbors: 10
Accuracy: 0.7857142857142857
AUROC: 0.5
F1 Score: 0.0
# of Neighbors: 11
Accuracy: 0.7857142857142857
AUROC: 0.5
F1 Score: 0.0
# of Neighbors: 12
Accuracy: 0.785714285714285