In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt

from helper_code import *
import numpy as np, os, sys, joblib
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier

# from team_code import get_features
sys.path.append(os.path.abspath("../"))
from resnet import ResNet, BasicBlock, Bottleneck
from typing import Type, Any, Callable, Union, List, Optional
sys.path.append(os.path.abspath("../datasets"))
from physionet2021 import PhysioNet2021Dataset

In [17]:
def get_features(header, recording, leads):
    # Extract age.
    age = get_age(header)
    if age is None:
        age = float('nan')

    # Extract sex. Encode as 0 for female, 1 for male, and NaN for other.
    sex = get_sex(header)
    if sex in ('Female', 'female', 'F', 'f'):
        sex = 0
    elif sex in ('Male', 'male', 'M', 'm'):
        sex = 1
    else:
        sex = float('nan')

    print (age, sex)
    # Reorder/reselect leads in recordings.
    available_leads = get_leads(header)
    indices = list()
    for lead in leads:
        i = available_leads.index(lead)
        indices.append(i)
    recording = recording[indices, :]

    # Pre-process recordings.
    adc_gains = get_adcgains(header, leads)
    baselines = get_baselines(header, leads)
    num_leads = len(leads)
    for i in range(num_leads):
        recording[i, :] = (recording[i, :] - baselines[i]) / adc_gains[i]

    # Compute the root mean square of each ECG lead signal.
    rms = np.zeros(num_leads, dtype=np.float32)
    for i in range(num_leads):
        x = recording[i, :]
        rms[i] = np.sqrt(np.sum(x**2) / np.size(x))

    return age, sex, rms

In [51]:
recording = load_recording('../WFDB_CPSC2018_2/Q0002.mat')


In [18]:
params = {'batch_size': 64}

data_directory = '../data/'
training_set = PhysioNet2021Dataset(data_directory)
train_dataloader = torch.utils.data.DataLoader(training_set, **params)

device = "cuda" if torch.cuda.is_available() else "cpu"
# device = "cpu"
# print("Using {} device".format(device))

model = ResNet(
        block =BasicBlock,
        layers = [2,3,5,7],
        in_channel = 12,
        out_channel = 9
        ).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

KeyError: 'E00001'

Sign loss
sign_p = y - 2py + p^2    |y-p| < 0.5
       = 1                |y-p| >= 0.5

loss_fn = sign_p x nn.BCELoss()

In [None]:
# pos_weight = torch.ones([9])  # All weights are equal to 1
loss_fn = nn.BCEWithLogitsLoss(size_average = True) # need delete sigmoid
loss_fn = nn.BCELoss(size_average = True)
# loss_fn = nn.CrossEntropyLoss()


In [114]:
def evaluation_methods(labels, pred, threshold = 0.5):
    
# threshold = 0.5
    probs_class = pred
    probs_class[probs_class > threshold] = 0.99
    probs_class[probs_class <= threshold] = 0.01
    scalar_outputs = probs_class
    print('- AUROC and AUPRC...')
    auroc, auprc, auroc_classes, auprc_classes = compute_auc(labels, scalar_outputs)
    print ('auroc:', auroc, 'auprc:', auprc)
    
    probs_class = pred
    probs_class[probs_class > threshold] = 1
    probs_class[probs_class <= threshold] = 0
    binary_outputs = probs_class
    print (binary_outputs)
    print('- Accuracy...')
    accuracy = compute_accuracy(labels, binary_outputs)
    print ('accuracy:', accuracy)
    print('- F-measure...')
    f_measure, f_measure_classes = compute_f_measure(labels, binary_outputs)
    print('f_measure:', f_measure)
#     print('- Challenge metric...')
#     challenge_metric = compute_challenge_metric(weights, labels, binary_outputs, classes, normal_class)

In [19]:
sys.path.append(os.path.abspath("../evaluation-2021"))
from evaluate_model import *

def test(dataloader, model):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0
    labels_list = []
    pred_list = []
    with torch.no_grad():
        for features in dataloader:
            
            X = features['sig']
            age = features['age']
            sex = features['sex']
            labels = features['labels']
            rn = features['record_name']
            # Compute prediction error
            age_numpy = age.numpy()
            m = np.nanmean(age_numpy)
            age_numpy = np.nan_to_num(age_numpy, nan=m)
            age = torch.from_numpy(age_numpy)

            age -= age.min()
            age /= age.max()
            demo_features = torch.stack([age, sex.double()], dim= 1)
            demo_features = demo_features.float()
            X, demo_features =  X.to(device), demo_features.to(device)
            pred = model(X, demo_features)
            labels = labels.int().numpy()
            pred = pred.numpy()
            
            labels_list.append(labels)
            pred_list.append(pred)
            print (rn)
            break
#             return labels, pred
    labels = np.concatenate(labels_list)
    pred = np.concatenate(pred_list)
    
    evaluation_methods(labels, pred)
#     return labels, pred  
#             test_loss += loss_fn(pred, labels.float()).item()
#             correct += (pred.argmax(1) == labels).type(torch.float).sum().item()
#     test_loss /= size
#     correct /= size
#     print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [24]:
def train(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    print (size)
    for batch, features in enumerate(dataloader):
        X = features['sig']
        age = features['age']
        sex = features['sex']
        labels = features['labels']

        age_numpy = age.numpy()
        m = np.nanmean(age_numpy)
        age_numpy = np.nan_to_num(age_numpy, nan=m)
        age = torch.from_numpy(age_numpy)
#         age
        age -= age.min()
        age /= age.max()
        demo_features = torch.stack([age, sex.double()], dim= 1)
        demo_features = demo_features.float()
        X, demo_features, labels =  X.to(device), demo_features.to(device), labels.to(device)

        try:
            pred = model(X, demo_features)
            loss = loss_fn(pred, labels.float())
        except:
            print (age_numpy, age)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
twelve_leads = ('I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6')
six_leads = ('I', 'II', 'III', 'aVR', 'aVL', 'aVF')
three_leads = ('I', 'II', 'V2')
two_leads = ('II', 'V5')

In [8]:
sig.shape

(2, 5000)

In [7]:
a = np.random.rand(12,5000)
sig = a[[twelve_leads.index(lead) for lead in two_leads], :]

In [2]:
data_directory = '../WFDB_CPSC2018/'

train_records, val_records = PhysioNet2021Dataset.split_names_cv(
    data_directory, 5, 0
    
)
ds = PhysioNet2021Dataset(
    data_directory, max_seq_len=4096, records=val_records, ensure_equal_len=True, proc=0,leads = ('II', 'V5')
)
val_dl = DataLoader(
    ds,
    batch_size=64,
    num_workers=0
#     collate_fn=PhysioNet2020Dataset.collate_fn,
)
# ds = PhysioNet2021Dataset(
#     data_directory, max_seq_len=4096, records=train_records, ensure_equal_len=True, proc=0
# )
# train_dl = DataLoader(
#     ds,
#     batch_size=64,
#     num_workers=0
# #     collate_fn=PhysioNet2020Dataset.collate_fn,
# )


In [3]:
for batch, features in enumerate(val_dl):
        X = features['sig']
        age = features['age']
        sex = features['sex']
        labels = features['labels']
        break

In [4]:
X.shape

torch.Size([64, 2, 4096])

In [77]:
# data_directory = '../WFDB_CPSC2018/'
# testing_set = PhysioNet2021Dataset(data_directory)
# test_dataloader = torch.utils.data.DataLoader(training_set, **params)

In [13]:
!pwd

/home/weijiesun/physinet2021/python-classifier-2021


In [14]:
# import numpy as np, os, sys
from team_code import load_twelve_lead_model, load_six_lead_model, load_three_lead_model, load_two_lead_model
model_directory = "./model"
twelve_lead_model = load_twelve_lead_model(model_directory)


In [16]:
params = {'batch_size': 64}

data_directory = '../data/'
training_set = PhysioNet2021Dataset(data_directory)
test_dataloader = torch.utils.data.DataLoader(training_set, **params)

In [4]:
ds = PhysioNet2021Dataset(
    data_directory, max_seq_len=4096, records=train_records, ensure_equal_len=True, proc=0
)
dataloader = DataLoader(
    ds,
    batch_size=64,
    num_workers=0
#     collate_fn=PhysioNet2020Dataset.collate_fn,
)

In [6]:
output_directory = '../WFDB_CPSC2018_2'

In [8]:
head, tail = os.path.split('Q0002.mat')
root, extension = os.path.splitext(tail)
output_file = os.path.join(output_directory, root + '.csv')
# save_outputs(output_file, classes, labels, probabilities)

In [9]:
output_file

'../WFDB_CPSC2018_2/Q0002.csv'

In [8]:
for batch, features in enumerate(dataloader):
    X = features['sig']
    age = features['age']
    sex = features['sex']
    labels = features['labels']
    record_names = features['record_name']
    break

NameError: name 'dataloader' is not defined

In [7]:
X.shape

torch.Size([64, 12, 4096])

In [25]:
loss_fn = nn.BCELoss(size_average=True)


epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
#     train(train_dl, model, loss_fn, optimizer)
    test(val_dl, twelve_lead_model, loss_fn)
    break

Epoch 1
-------------------------------


TypeError: test() takes 2 positional arguments but 3 were given