# Author: Nabil Ibtehaz (https://github.com/nibtehaz)


In [1]:
import os
from scipy.io import loadmat
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import  tqdm
import pickle

In [None]:
SNOMED_CODES = {
    '164865005' : 'EKG: myocardial infarction',
    '164889003' : 'ECG: atrial fibrillation',
    '164890007': 'EKG: atrial flutter',
    '164895002' : 'EKG: ventricular tachycardia',
    '164896001' : 'EKG: ventricular fibrillation',
    '426783006' : 'ECG: sinus rhythm',
    '6374002' : 'Bundle branch block',
    '53741008' : 'Coronary arteriosclerosis'
}

In [None]:
set(dx_dict.keys()).intersection(set(cls_lbl.keys()))

In [None]:
[(dx,len(dx_dict[dx])) for dx in dx_dict]

In [None]:

sorted(list(dx_dict.keys()))

## Data Analysis

In [None]:
fp = open('/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/st_petersburg_incart/g1/I0013.hea')
print(fp.read())
fp.close()

In [None]:
S0521

In [11]:
files = [fl.split('.')[0] for fl in next(os.walk('/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1'))[2] if '.mat' in fl]

ecg_data = ['/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1/'+fl for fl in files]



In [None]:
files[:10]

In [None]:
fp = open('/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1/S0173.hea','r')
dt = fp.read().split('\n')[:-1]
fp.close()

dt

In [12]:
dx_dict = {}
myo_label = {0:[],1:[]}

for fl in files:
    fp = open(f'/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1/{fl}.hea','r')
    dt = fp.read().split('\n')[:-1]
    fp.close()

    dxs = dt[15][6:].split(',')
    #print(dxs)
    #print(dt[15])

    for dx in dxs:
        if dx not in dx_dict:
            dx_dict[dx] = []

        dx_dict[dx].append(fl)

    if '164865005' in dt[15]:
        myo_label[1].append(fl)
    else:
        myo_label[0].append(fl)

In [None]:
len(myo_label[0]),len(myo_label[1])

In [None]:
record_files = []
record_labels = []

for c in myo_label:
    for s in myo_label[c]:
        record_files.append(s)
        record_labels.append(c)

record_files = np.array(record_files)
record_labels = np.array(record_labels)

In [None]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)

splits = {}

for i, (train_index, test_index) in enumerate(skf.split(record_files, record_labels)):
    splits[i] = {
        'train' : record_files[train_index],
        'test' : record_files[test_index],
    }
    

In [None]:
#pickle.dump((splits,record_files,record_labels), open('./myo_processed/data.p','wb'))

In [2]:
(splits,record_files,record_labels) = pickle.load(open('./myo_processed/data.p','rb'))

In [None]:
[(cls_lbl[dx], len(dx_dict[dx])) for dx in dx_dict]

## Visualize Signals

In [None]:
idx = 0
sig = loadmat(ecg_data[idx]+'.mat')['val'] * 1.0

In [None]:
sig.shape

In [None]:
plt.plot( (sig[0,17500:22500] - np.mean(sig[0,2500:5000]))/(np.std(sig[0,2500:7500])) )

In [None]:
plt.plot(sig[0,2500:5000])

In [None]:
sig2 = downsample2(sig[0,17500:22500])

In [None]:
plt.plot(sig[0,17500:22500])
plt.figure(0)
plt.plot(sig2)

## Load Models

In [3]:
def downsample2(sig):
    return (sig[np.arange(0,5000,2)] + sig[np.arange(1,5000,2)])/2

def downsample2_12(sig):
    return (sig[:,np.arange(0,5000,2)] + sig[:,np.arange(1,5000,2)])/2

def mean_norm(sig):
    return (sig-np.mean(sig))/(np.std(sig)+1e-6)

def mean_norm12(sig):
    return (sig-np.mean(sig,axis=1,keepdims=True))/(np.std(sig,axis=1,keepdims=True)+1e-6)

In [4]:
from MAE1DCorrelated import MaskedAutoencoderViT1DCorrelated
from MAE1D import MaskedAutoencoderViT1D
from MAEBank import MAEBank
import torch


  warn(


In [None]:
mae12 = MaskedAutoencoderViT1D(window_len=100, in_chans=12, embed_dim=768, depth=12, num_heads=8,
                 decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16)
mae12.load_state_dict(torch.load('./experiments/expb_baseline_200/model_best.pth',map_location='cpu')["model"])

mae12.eval();

In [37]:
mae1 = MaskedAutoencoderViT1D(window_len=100, in_chans=1, embed_dim=768, depth=12, num_heads=8,
                 decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16)
mae1.load_state_dict(torch.load('./experiments/expb_baseline_4_200/model_best.pth',map_location='cpu')["model"])
mae1.eval();

Model config
MaskedAutoencoderViT1D(sig_len=2500, window_len=100, in_chans=1,embed_dim=768, depth=12, num_heads=8,decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=512,mlp_ratio=4.0, norm_layer=<class 'torch.nn.modules.normalization.LayerNorm'>, norm_pix_loss=True)


In [5]:
mae_bank = MAEBank(['cpu'])

Model config
MaskedAutoencoderViT1D(sig_len=2500, window_len=100, in_chans=1,embed_dim=768, depth=12, num_heads=8,decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=512,mlp_ratio=4.0, norm_layer=<class 'torch.nn.modules.normalization.LayerNorm'>, norm_pix_loss=True)
Initializing MAE 1
Initializing MAE 2
Initializing MAE 3
Initializing MAE 4
Initializing MAE 5
Initializing MAE 6
Initializing MAE 7
Initializing MAE 8
Initializing MAE 9
Initializing MAE 10
Initializing MAE 11
Initializing MAE 12


In [33]:

mae1c = mae_bank.maes[3]
mae1c.load_state_dict(torch.load('./experiments/expb3/saved_models/mae_channel_4_best.pth',map_location='cpu'))

mae1c.eval();

In [None]:
mae_bank = MAEBank(['cpu'])

In [7]:
devc = 'cuda:3'

In [None]:
mae12.to(devc);

In [38]:
mae1.to(devc);

In [34]:
mae1c.to(devc);

In [None]:
mae2c.to(devc);

In [39]:
msk_ratio = 0.00
n_tries = 1
chnl = 4

sig1_train = {}
lbl1_train = {}

sig1_valid = {}
lbl1_valid = {}


for split in range(5):

    sig1_train[split] = []
    lbl1_train[split] = []

    sig1_valid[split] = {}
    lbl1_valid[split] = {}


    for prsn in tqdm(splits[split]['train']):

        sig = loadmat(f'/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1/{prsn}.mat')['val'] * 1.0
        sig = sig[chnl-1,:] 

        lbl = 1 if prsn in myo_label[1] else 0
        sig_epsd = []

        for st_smpl in range(0,len(sig)-5000,2500):

            sig_epsd.append(mean_norm(downsample2(sig[st_smpl:st_smpl+5000])))
            lbl1_train[split].append(lbl)

        x = torch.unsqueeze(torch.Tensor(sig_epsd), dim=1)
        x = x.repeat(n_tries,1,1)

        with torch.no_grad():
            x = x.to(devc)
            latent, mask, ids_restore = mae1.forward_encoder(x, msk_ratio)

            latent = torch.mean(latent.reshape(n_tries,latent.shape[0]//n_tries,latent.shape[1],latent.shape[2]),dim=0)

            x = x.to('cpu')
            ids_restore = ids_restore.to('cpu')
            latent = latent.to('cpu')
            mask = mask.to('cpu')

        
        if (len(sig1_train[split]))==0:
            sig1_train[split] = latent[:,0,:].detach().numpy()
        else:
            sig1_train[split] = np.vstack([sig1_train[split],latent[:,0,:].detach().numpy()])


    for prsn in tqdm(splits[split]['test']):

        sig = loadmat(f'/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1/{prsn}.mat')['val'] * 1.0
        sig = sig[chnl-1,:] 

        lbl = 1 if prsn in myo_label[1] else 0
        sig_epsd = []

        for st_smpl in range(0,len(sig)-5000,2500):

            sig_epsd.append(mean_norm(downsample2(sig[st_smpl:st_smpl+5000])))        

        x = torch.unsqueeze(torch.Tensor(sig_epsd), dim=1)
        x = x.repeat(n_tries,1,1)

        with torch.no_grad():
            x = x.to(devc)

            latent, mask, ids_restore = mae1.forward_encoder(x, msk_ratio)

            latent = torch.mean(latent.reshape(n_tries,latent.shape[0]//n_tries,latent.shape[1],latent.shape[2]),dim=0)

            x = x.to('cpu')
            ids_restore = ids_restore.to('cpu')
            latent = latent.to('cpu')
            mask = mask.to('cpu')

        
        
        sig1_valid[split][prsn] = latent[:,0,:].detach().numpy()
        lbl1_valid[split][prsn] = lbl




100%|██████████| 412/412 [02:28<00:00,  2.78it/s]
100%|██████████| 104/104 [00:09<00:00, 11.54it/s]
100%|██████████| 413/413 [00:22<00:00, 18.39it/s]
100%|██████████| 103/103 [00:05<00:00, 18.41it/s]
100%|██████████| 413/413 [00:24<00:00, 16.94it/s]
100%|██████████| 103/103 [00:05<00:00, 19.37it/s]
100%|██████████| 413/413 [00:30<00:00, 13.55it/s]
100%|██████████| 103/103 [00:05<00:00, 17.31it/s]
100%|██████████| 413/413 [00:30<00:00, 13.33it/s]
100%|██████████| 103/103 [00:05<00:00, 19.61it/s]


In [40]:
pickle.dump((sig1_train,lbl1_train,sig1_valid,lbl1_valid), open('./myo_processed/mae4.p','wb'))

In [None]:
msk_ratio = 0.00
n_tries = 1

sig12_train = {}
lbl12_train = {}

sig12_valid = {}
lbl12_valid = {}

for split in range(5):

    sig12_train[split] = []
    lbl12_train[split] = []

    sig12_valid[split] = {}
    lbl12_valid[split] = {}

    for prsn in tqdm(splits[split]['train']):

        sig = loadmat(f'/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1/{prsn}.mat')['val'] * 1.0    

        lbl = 1 if prsn in myo_label[1] else 0
        sig_epsd = []

        for st_smpl in range(0,len(sig[0])-5000,2500):

            sig_epsd.append(mean_norm12(downsample2_12(sig[:,st_smpl:st_smpl+5000])))
            lbl12_train[split].append(lbl)

        x = torch.Tensor(sig_epsd)
        x = x.repeat(n_tries,1,1)
        
        with torch.no_grad():
            x = x.to(devc)

            latent, mask, ids_restore = mae12.forward_encoder(x, msk_ratio)

            latent = torch.mean(latent.reshape(n_tries,latent.shape[0]//n_tries,latent.shape[1],latent.shape[2]),dim=0)

            x = x.to('cpu')
            ids_restore = ids_restore.to('cpu')
            latent = latent.to('cpu')
            mask = mask.to('cpu')

        
        if (len(sig12_train[split]))==0:
            sig12_train[split] = latent[:,0,:].detach().numpy()
        else:
            sig12_train[split] = np.vstack([sig12_train[split],latent[:,0,:].detach().numpy()])


    for prsn in tqdm(splits[split]['test']):

        sig = loadmat(f'/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1/{prsn}.mat')['val'] * 1.0    

        lbl = 1 if prsn in myo_label[1] else 0
        sig_epsd = []

        for st_smpl in range(0,len(sig[0])-5000,2500):

            sig_epsd.append(mean_norm12(downsample2_12(sig[:,st_smpl:st_smpl+5000])))        

        x = torch.Tensor(sig_epsd)
        x = x.repeat(n_tries,1,1)

        with torch.no_grad():
            x = x.to(devc)

            latent, mask, ids_restore = mae12.forward_encoder(x, msk_ratio)

            latent = torch.mean(latent.reshape(n_tries,latent.shape[0]//n_tries,latent.shape[1],latent.shape[2]),dim=0)

            x = x.to('cpu')
            ids_restore = ids_restore.to('cpu')
            latent = latent.to('cpu')
            mask = mask.to('cpu')

        
        
        sig12_valid[split][prsn] = latent[:,0,:].detach().numpy()
        lbl12_valid[split][prsn] = lbl




In [None]:

pickle.dump((sig12_train,lbl12_train,sig12_valid,lbl12_valid), open('./myo_processed/mae12.p','wb'))

In [35]:
msk_ratio = 0.00
n_tries = 1
chnl = 4

sig1c_train = {}
lbl1c_train = {}

sig1c_valid = {}
lbl1c_valid = {}


for split in range(5):

    sig1c_train[split] = []
    lbl1c_train[split] = []

    sig1c_valid[split] = {}
    lbl1c_valid[split] = {}

    for prsn in tqdm(splits[split]['train']):

        sig = loadmat(f'/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1/{prsn}.mat')['val'] * 1.0
        sig = sig[chnl-1,:] 

        lbl = 1 if prsn in myo_label[1] else 0
        sig_epsd = []

        for st_smpl in range(0,len(sig)-5000,2500):

            sig_epsd.append(mean_norm(downsample2(sig[st_smpl:st_smpl+5000])))
            lbl1c_train[split].append(lbl)

        x = torch.unsqueeze(torch.Tensor(sig_epsd), dim=1)
        x = x.repeat(n_tries,1,1)

        with torch.no_grad():
            
            ids_shuffle, ids_restore, ids_keep = mae_bank.propose_masking(len(x), mae_bank.num_patches, msk_ratio)

            x = x.to(devc)
            ids_shuffle = ids_shuffle.to(devc)
            ids_restore = ids_restore.to(devc)
            ids_keep = ids_keep.to(devc)

            latent, mask = mae1c.forward_encoder(x, msk_ratio, ids_shuffle, ids_restore, ids_keep)        

            latent = torch.mean(latent.reshape(n_tries,latent.shape[0]//n_tries,latent.shape[1],latent.shape[2]),dim=0)

            x = x.to('cpu')
            ids_shuffle = ids_shuffle.to('cpu')
            ids_restore = ids_restore.to('cpu')
            ids_keep = ids_keep.to('cpu')
            latent = latent.to('cpu')
            mask = mask.to('cpu')

        
        if (len(sig1c_train[split]))==0:
            sig1c_train[split] = latent[:,0,:].detach().numpy()
        else:
            sig1c_train[split] = np.vstack([sig1c_train[split],latent[:,0,:].detach().numpy()])


    for prsn in tqdm(splits[split]['test']):

        sig = loadmat(f'/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1/{prsn}.mat')['val'] * 1.0
        sig = sig[chnl-1,:] 

        lbl = 1 if prsn in myo_label[1] else 0
        sig_epsd = []

        for st_smpl in range(0,len(sig)-5000,2500):

            sig_epsd.append(mean_norm(downsample2(sig[st_smpl:st_smpl+5000])))        

        x = torch.unsqueeze(torch.Tensor(sig_epsd), dim=1)
        x = x.repeat(n_tries,1,1)

        with torch.no_grad():
            ids_shuffle, ids_restore, ids_keep = mae_bank.propose_masking(len(x), mae_bank.num_patches, msk_ratio)

            x = x.to(devc)
            ids_shuffle = ids_shuffle.to(devc)
            ids_restore = ids_restore.to(devc)
            ids_keep = ids_keep.to(devc)

            latent, mask = mae1c.forward_encoder(x, msk_ratio, ids_shuffle, ids_restore, ids_keep)        
            
            latent = torch.mean(latent.reshape(n_tries,latent.shape[0]//n_tries,latent.shape[1],latent.shape[2]),dim=0)

            x = x.to('cpu')
            ids_shuffle = ids_shuffle.to('cpu')
            ids_restore = ids_restore.to('cpu')
            ids_keep = ids_keep.to('cpu')
            latent = latent.to('cpu')
            mask = mask.to('cpu')

        
        
        sig1c_valid[split][prsn] = latent[:,0,:].detach().numpy()
        lbl1c_valid[split][prsn] = lbl




100%|██████████| 412/412 [01:01<00:00,  6.73it/s]
100%|██████████| 104/104 [00:08<00:00, 11.68it/s]
100%|██████████| 413/413 [00:21<00:00, 19.21it/s]
100%|██████████| 103/103 [00:05<00:00, 20.41it/s]
100%|██████████| 413/413 [00:22<00:00, 18.37it/s]
100%|██████████| 103/103 [00:05<00:00, 20.17it/s]
100%|██████████| 413/413 [00:24<00:00, 17.05it/s]
100%|██████████| 103/103 [00:05<00:00, 20.24it/s]
100%|██████████| 413/413 [00:29<00:00, 13.82it/s]
100%|██████████| 103/103 [00:05<00:00, 19.62it/s]


In [36]:
pickle.dump((sig1c_train,lbl1c_train,sig1c_valid,lbl1c_valid), open('./myo_processed/mae4c.p','wb'))

In [28]:
!ls -all myo_processed

total 2677960
drwxrwxr-x  2 nabil nabil      4096 Aug 14 15:22 .
drwxrwxr-x 12 nabil nabil      4096 Aug 11 22:31 ..
-rw-rw-r--  1 nabil nabil     66663 Aug 14 12:25 data.p
-rw-rw-r--  1 nabil nabil 342764024 Aug 11 17:50 mae12_old.p
-rw-rw-r--  1 nabil nabil 342764008 Aug 14 12:48 mae12.p
-rw-rw-r--  1 nabil nabil 342764008 Aug 14 15:33 mae1c.p
-rw-rw-r--  1 nabil nabil 342764008 Aug 11 17:58 mae1_old.p
-rw-rw-r--  1 nabil nabil 342764008 Aug 14 13:02 mae1.p
-rw-rw-r--  1 nabil nabil 342764008 Aug 14 15:41 mae2c.p
-rw-rw-r--  1 nabil nabil 342764008 Aug 14 13:17 mae2.p
-rw-rw-r--  1 nabil nabil 342764008 Aug 14 15:22 mae3.p


In [None]:
msk_ratio = 0.00
n_tries = 1

sig1c_train = {}
lbl1c_train = {}

sig1c_valid = {}
lbl1c_valid = {}

chnl_id = 1

for split in range(5):

    sig1c_train[split] = []
    lbl1c_train[split] = []

    sig1c_valid[split] = {}
    lbl1c_valid[split] = {}

    for prsn in tqdm(splits[split]['train']):

        sig = loadmat(f'/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1/{prsn}.mat')['val'] * 1.0
        sig = sig[chnl_id,:] 

        lbl = 1 if prsn in myo_label[1] else 0
        sig_epsd = []

        for st_smpl in range(0,len(sig)-5000,2500):

            sig_epsd.append(mean_norm(downsample2(sig[st_smpl:st_smpl+5000])))
            lbl1c_train[split].append(lbl)

        x = torch.unsqueeze(torch.Tensor(sig_epsd), dim=1)
        x = x.repeat(n_tries,1,1)

        with torch.no_grad():
            
            ids_shuffle, ids_restore, ids_keep = mae_bank.propose_masking(len(x), mae_bank.num_patches, msk_ratio)

            x = x.to(devc)
            ids_shuffle = ids_shuffle.to(devc)
            ids_restore = ids_restore.to(devc)
            ids_keep = ids_keep.to(devc)

            latent, mask = mae2c.forward_encoder(x, msk_ratio, ids_shuffle, ids_restore, ids_keep)        

            latent = torch.mean(latent.reshape(n_tries,latent.shape[0]//n_tries,latent.shape[1],latent.shape[2]),dim=0)

            x = x.to('cpu')
            ids_shuffle = ids_shuffle.to('cpu')
            ids_restore = ids_restore.to('cpu')
            ids_keep = ids_keep.to('cpu')
            latent = latent.to('cpu')
            mask = mask.to('cpu')

        
        if (len(sig1c_train[split]))==0:
            sig1c_train[split] = latent[:,0,:].detach().numpy()
        else:
            sig1c_train[split] = np.vstack([sig1c_train[split],latent[:,0,:].detach().numpy()])


    for prsn in tqdm(splits[split]['test']):

        sig = loadmat(f'/data/nabil/ecg_repr/physionet.org/files/challenge-2021/1.0.3/training/ptb/g1/{prsn}.mat')['val'] * 1.0
        sig = sig[chnl_id,:] 

        lbl = 1 if prsn in myo_label[1] else 0
        sig_epsd = []

        for st_smpl in range(0,len(sig)-5000,2500):

            sig_epsd.append(mean_norm(downsample2(sig[st_smpl:st_smpl+5000])))        

        x = torch.unsqueeze(torch.Tensor(sig_epsd), dim=1)
        x = x.repeat(n_tries,1,1)

        with torch.no_grad():
            ids_shuffle, ids_restore, ids_keep = mae_bank.propose_masking(len(x), mae_bank.num_patches, msk_ratio)

            x = x.to(devc)
            ids_shuffle = ids_shuffle.to(devc)
            ids_restore = ids_restore.to(devc)
            ids_keep = ids_keep.to(devc)

            latent, mask = mae2c.forward_encoder(x, msk_ratio, ids_shuffle, ids_restore, ids_keep)        
            
            latent = torch.mean(latent.reshape(n_tries,latent.shape[0]//n_tries,latent.shape[1],latent.shape[2]),dim=0)

            x = x.to('cpu')
            ids_shuffle = ids_shuffle.to('cpu')
            ids_restore = ids_restore.to('cpu')
            ids_keep = ids_keep.to('cpu')
            latent = latent.to('cpu')
            mask = mask.to('cpu')

        
        
        sig1c_valid[split][prsn] = latent[:,0,:].detach().numpy()
        lbl1c_valid[split][prsn] = lbl




In [None]:
pickle.dump((sig1c_train,lbl1c_train,sig1c_valid,lbl1c_valid), open('./myo_processed/mae2c.p','wb'))

In [2]:
(sig1_train,lbl1_train,sig1_valid,lbl1_valid) = pickle.load(open('./myo_processed/mae1.p','rb'))
(sig2_train,lbl2_train,sig2_valid,lbl2_valid) = pickle.load(open('./myo_processed/mae2.p','rb'))
(sig3_train,lbl3_train,sig3_valid,lbl3_valid) = pickle.load(open('./myo_processed/mae3.p','rb'))
(sig4_train,lbl4_train,sig4_valid,lbl4_valid) = pickle.load(open('./myo_processed/mae4.p','rb'))

(sig12_train,lbl12_train,sig12_valid,lbl12_valid) = pickle.load(open('./myo_processed/mae12.p','rb'))

(sig1c_train,lbl1c_train,sig1c_valid,lbl1c_valid) = pickle.load(open('./myo_processed/mae1c.p','rb'))
(sig2c_train,lbl2c_train,sig2c_valid,lbl2c_valid) = pickle.load(open('./myo_processed/mae2c.p','rb'))
(sig3c_train,lbl3c_train,sig3c_valid,lbl3c_valid) = pickle.load(open('./myo_processed/mae3c.p','rb'))
(sig4c_train,lbl4c_train,sig4c_valid,lbl4c_valid) = pickle.load(open('./myo_processed/mae4c.p','rb'))

In [3]:
from sklearn.preprocessing import StandardScaler

for split in range(5):
    sclr1 = StandardScaler()
    sclr2 = StandardScaler()
    sclr3 = StandardScaler()
    sclr4 = StandardScaler()
    
    sclr12 = StandardScaler()
    
    sclr1c = StandardScaler()
    sclr2c = StandardScaler()
    sclr3c = StandardScaler()
    sclr4c = StandardScaler()

    sig1_train[split] = sclr1.fit_transform(sig1_train[split])
    sig2_train[split] = sclr2.fit_transform(sig2_train[split])
    sig3_train[split] = sclr3.fit_transform(sig3_train[split])
    sig4_train[split] = sclr4.fit_transform(sig4_train[split])

    sig1c_train[split] = sclr1c.fit_transform(sig1c_train[split])
    sig2c_train[split] = sclr2c.fit_transform(sig2c_train[split])
    sig3c_train[split] = sclr3c.fit_transform(sig3c_train[split])
    sig4c_train[split] = sclr4c.fit_transform(sig4c_train[split])
    
    sig12_train[split] = sclr12.fit_transform(sig12_train[split])


    for prsn in sig1_valid[split]:
        sig1_valid[split][prsn] = sclr1.transform(sig1_valid[split][prsn])
        sig2_valid[split][prsn] = sclr2.transform(sig2_valid[split][prsn])
        sig3_valid[split][prsn] = sclr3.transform(sig3_valid[split][prsn])
        sig4_valid[split][prsn] = sclr4.transform(sig4_valid[split][prsn])

        sig1c_valid[split][prsn] = sclr1c.transform(sig1c_valid[split][prsn])
        sig2c_valid[split][prsn] = sclr2c.transform(sig2c_valid[split][prsn])
        sig3c_valid[split][prsn] = sclr3c.transform(sig3c_valid[split][prsn])
        sig4c_valid[split][prsn] = sclr4c.transform(sig4c_valid[split][prsn])

        sig12_valid[split][prsn] = sclr12.transform(sig12_valid[split][prsn])

In [23]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier



clf12s = {}
for fld in tqdm(range(5)):
    clf12s[fld] = RandomForestClassifier(random_state=2)
    #clf12s[fld] = LogisticRegression(random_state=2)
    #clf12s[fld] = DecisionTreeClassifier(random_state=2)
    clf12s[fld].fit(sig12_train[fld],lbl12_train[fld])

100%|██████████| 5/5 [04:00<00:00, 48.05s/it]


In [24]:
y_pred12 = {}


for fld in range(5):
    y_pred12[fld] = {}
    for prsn in sig12_valid[fld]:

        y_pred12[fld][prsn] = clf12s[fld].predict(sig12_valid[fld][prsn])



In [25]:
from sklearn.ensemble import RandomForestClassifier


clf1cs = {}
clf2cs = {}
clf3cs = {}
clf4cs = {}

for fld in tqdm(range(5)):
    clf1cs[fld] = RandomForestClassifier(random_state=2)
    clf2cs[fld] = RandomForestClassifier(random_state=2)
    clf3cs[fld] = RandomForestClassifier(random_state=2)
    clf4cs[fld] = RandomForestClassifier(random_state=2)
    
    #clf1cs[fld] = LogisticRegression(random_state=2)
    #clf2cs[fld] = LogisticRegression(random_state=2)
    #clf3cs[fld] = LogisticRegression(random_state=2)
    #clf4cs[fld] = LogisticRegression(random_state=2)

    #clf1cs[fld] = DecisionTreeClassifier(random_state=2)
    
    clf1cs[fld].fit(sig1c_train[fld],lbl1c_train[fld])
    clf2cs[fld].fit(sig2c_train[fld],lbl2c_train[fld])
    clf3cs[fld].fit(sig3c_train[fld],lbl3c_train[fld])
    clf4cs[fld].fit(sig4c_train[fld],lbl4c_train[fld])


100%|██████████| 5/5 [16:17<00:00, 195.55s/it]


In [26]:
y_pred1c = {}
y_pred2c = {}
y_pred3c = {}
y_pred4c = {}

for fld in range(5):
    y_pred1c[fld] = {}
    y_pred2c[fld] = {}
    y_pred3c[fld] = {}
    y_pred4c[fld] = {}

    for prsn in sig1c_valid[fld]:
        y_pred1c[fld][prsn] = clf1cs[fld].predict(sig1c_valid[fld][prsn])
        y_pred2c[fld][prsn] = clf2cs[fld].predict(sig2c_valid[fld][prsn])
        y_pred3c[fld][prsn] = clf3cs[fld].predict(sig3c_valid[fld][prsn])
        y_pred4c[fld][prsn] = clf4cs[fld].predict(sig4c_valid[fld][prsn])



In [27]:
from sklearn.ensemble import RandomForestClassifier
clf1s = {}
clf2s = {}
clf3s = {}
clf4s = {}

for fld in tqdm(range(5)):

    clf1s[fld] = RandomForestClassifier(random_state=2)
    clf2s[fld] = RandomForestClassifier(random_state=2)
    clf3s[fld] = RandomForestClassifier(random_state=2)
    clf4s[fld] = RandomForestClassifier(random_state=2)
    
    #clf1s[fld] = LogisticRegression(random_state=2)
    #clf2s[fld] = LogisticRegression(random_state=2)
    #clf3s[fld] = LogisticRegression(random_state=2)
    #clf4s[fld] = LogisticRegression(random_state=2)

    #clf1s[fld] = DecisionTreeClassifier(random_state=2)
    
    clf1s[fld].fit(sig1_train[fld],lbl1_train[fld])
    clf2s[fld].fit(sig2_train[fld],lbl2_train[fld])
    clf3s[fld].fit(sig3_train[fld],lbl3_train[fld])
    clf4s[fld].fit(sig4_train[fld],lbl4_train[fld])

100%|██████████| 5/5 [16:31<00:00, 198.24s/it]


In [28]:
y_pred1 = {}
y_pred2 = {}
y_pred3 = {}
y_pred4 = {}

for fld in range(5):

    y_pred1[fld] = {}
    y_pred2[fld] = {}
    y_pred3[fld] = {}
    y_pred4[fld] = {}
    
    for prsn in sig1_valid[fld] :

        y_pred1[fld][prsn] = clf1s[fld].predict(sig1_valid[fld][prsn])
        y_pred2[fld][prsn] = clf2s[fld].predict(sig2_valid[fld][prsn])
        y_pred3[fld][prsn] = clf3s[fld].predict(sig3_valid[fld][prsn])
        y_pred4[fld][prsn] = clf4s[fld].predict(sig4_valid[fld][prsn])



In [29]:
from sklearn.metrics import classification_report


for fld in range(5):

    print(f'Fold : {fld}')
    YT_1 = []
    YT_2 = []
    YT_3 = []
    YT_4 = []    
    YT_1C = []    
    YT_2C = []    
    YT_3C = []    
    YT_4C = []    
    YT_12 = []

    YP_1 = []
    YP_2 = []
    YP_3 = []
    YP_4 = []
    YP_1C = []
    YP_2C = []
    YP_3C = []
    YP_4C = []
    YP_12 = []

    for prsn in sig1_valid[fld]:
        YT_1.append(lbl1_valid[fld][prsn])
        YT_2.append(lbl2_valid[fld][prsn])
        YT_3.append(lbl3_valid[fld][prsn])
        YT_4.append(lbl4_valid[fld][prsn])
        YT_1C.append(lbl1c_valid[fld][prsn])
        YT_2C.append(lbl2c_valid[fld][prsn])
        YT_3C.append(lbl3c_valid[fld][prsn])
        YT_4C.append(lbl4c_valid[fld][prsn])
        YT_12.append(lbl12_valid[fld][prsn])

        YP_1.append(round(np.mean(y_pred1[fld][prsn])))
        YP_2.append(round(np.mean(y_pred2[fld][prsn])))
        YP_3.append(round(np.mean(y_pred3[fld][prsn])))
        YP_4.append(round(np.mean(y_pred4[fld][prsn])))
        YP_1C.append(round(np.mean(y_pred1c[fld][prsn])))
        YP_2C.append(round(np.mean(y_pred2c[fld][prsn])))
        YP_3C.append(round(np.mean(y_pred3c[fld][prsn])))
        YP_4C.append(round(np.mean(y_pred4c[fld][prsn])))
        YP_12.append(round(np.mean(y_pred12[fld][prsn])))
        
    print('Single Channel')
    print('Channel 1')
    print(classification_report(YT_1, YP_1))
    print('Channel 2')
    print(classification_report(YT_2, YP_2))
    print('Channel 3')
    print(classification_report(YT_3, YP_3))
    print('Channel 4')
    print(classification_report(YT_4, YP_4))

    print('Correlated')
    print(classification_report(YT_1C, YP_1C))
    print('Correlated')
    print(classification_report(YT_2C, YP_2C))
    print('Correlated')
    print(classification_report(YT_3C, YP_3C))
    print('Correlated')
    print(classification_report(YT_4C, YP_4C))

    print('All Channel')
    print(classification_report(YT_12, YP_12))
    

Fold : 0
Single Channel
Channel 1
              precision    recall  f1-score   support

           0       0.76      0.53      0.63        30
           1       0.83      0.93      0.88        74

    accuracy                           0.82       104
   macro avg       0.80      0.73      0.75       104
weighted avg       0.81      0.82      0.81       104

Channel 2
              precision    recall  f1-score   support

           0       1.00      0.60      0.75        30
           1       0.86      1.00      0.92        74

    accuracy                           0.88       104
   macro avg       0.93      0.80      0.84       104
weighted avg       0.90      0.88      0.87       104

Channel 3
              precision    recall  f1-score   support

           0       0.89      0.53      0.67        30
           1       0.84      0.97      0.90        74

    accuracy                           0.85       104
   macro avg       0.86      0.75      0.78       104
weighted avg       0

In [30]:
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

df = []

for fld in range(5):
    YT_1 = []
    YT_2 = []
    YT_3 = []
    YT_4 = []    
    YT_1C = []    
    YT_2C = []    
    YT_3C = []    
    YT_4C = []    
    YT_12 = []

    YP_1 = []
    YP_2 = []
    YP_3 = []
    YP_4 = []
    YP_1C = []
    YP_2C = []
    YP_3C = []
    YP_4C = []
    YP_12 = []

    for prsn in sig1_valid[fld]:
        YT_1.append(lbl1_valid[fld][prsn])
        YT_2.append(lbl2_valid[fld][prsn])
        YT_3.append(lbl3_valid[fld][prsn])
        YT_4.append(lbl4_valid[fld][prsn])
        YT_1C.append(lbl1c_valid[fld][prsn])
        YT_2C.append(lbl2c_valid[fld][prsn])
        YT_3C.append(lbl3c_valid[fld][prsn])
        YT_4C.append(lbl4c_valid[fld][prsn])
        YT_12.append(lbl12_valid[fld][prsn])

        YP_1.append(round(np.mean(y_pred1[fld][prsn])))
        YP_2.append(round(np.mean(y_pred2[fld][prsn])))
        YP_3.append(round(np.mean(y_pred3[fld][prsn])))
        YP_4.append(round(np.mean(y_pred4[fld][prsn])))
        YP_1C.append(round(np.mean(y_pred1c[fld][prsn])))
        YP_2C.append(round(np.mean(y_pred2c[fld][prsn])))
        YP_3C.append(round(np.mean(y_pred3c[fld][prsn])))
        YP_4C.append(round(np.mean(y_pred4c[fld][prsn])))
        YP_12.append(round(np.mean(y_pred12[fld][prsn])))
    
    df.append(['Single Channel 1',fld+1,accuracy_score(YT_1, YP_1),precision_score(YT_1, YP_1,average='macro'),recall_score(YT_1, YP_1,average='macro'),f1_score(YT_1, YP_1,average='macro'),roc_auc_score(YT_1, YP_1,average='macro')])
    df.append(['Single Channel 2',fld+1,accuracy_score(YT_2, YP_2),precision_score(YT_2, YP_2,average='macro'),recall_score(YT_2, YP_2,average='macro'),f1_score(YT_2, YP_2,average='macro'),roc_auc_score(YT_2, YP_2,average='macro')])
    df.append(['Single Channel 3',fld+1,accuracy_score(YT_3, YP_3),precision_score(YT_3, YP_3,average='macro'),recall_score(YT_3, YP_3,average='macro'),f1_score(YT_3, YP_3,average='macro'),roc_auc_score(YT_3, YP_3,average='macro')])
    df.append(['Single Channel 4',fld+1,accuracy_score(YT_4, YP_1),precision_score(YT_4, YP_4,average='macro'),recall_score(YT_4, YP_4,average='macro'),f1_score(YT_4, YP_4,average='macro'),roc_auc_score(YT_4, YP_4,average='macro')])

    df.append(['Correlated Channel 1',fld+1,accuracy_score(YT_1C, YP_1C),precision_score(YT_1C, YP_1C,average='macro'),recall_score(YT_1C, YP_1C,average='macro'),f1_score(YT_1C, YP_1C,average='macro'),roc_auc_score(YT_1C, YP_1C,average='macro')])
    df.append(['Correlated Channel 2',fld+1,accuracy_score(YT_2C, YP_2C),precision_score(YT_2C, YP_2C,average='macro'),recall_score(YT_2C, YP_2C,average='macro'),f1_score(YT_2C, YP_2C,average='macro'),roc_auc_score(YT_2C, YP_2C,average='macro')])
    df.append(['Correlated Channel 3',fld+1,accuracy_score(YT_3C, YP_3C),precision_score(YT_3C, YP_3C,average='macro'),recall_score(YT_3C, YP_3C,average='macro'),f1_score(YT_3C, YP_3C,average='macro'),roc_auc_score(YT_3C, YP_3C,average='macro')])
    df.append(['Correlated Channel 4',fld+1,accuracy_score(YT_4C, YP_4C),precision_score(YT_4C, YP_4C,average='macro'),recall_score(YT_4C, YP_4C,average='macro'),f1_score(YT_4C, YP_4C,average='macro'),roc_auc_score(YT_4C, YP_4C,average='macro')])
    
    df.append(['All 12 Channels',fld+1,accuracy_score(YT_12, YP_12),precision_score(YT_12, YP_12,average='macro'),recall_score(YT_12, YP_12,average='macro'),f1_score(YT_12, YP_12,average='macro'),roc_auc_score(YT_12, YP_12,average='macro')])


df = pd.DataFrame(df, columns=['Model','Fold','Accuracy','Precision','Recall','F1 Score', 'AUC'])
    

In [22]:
#fp = open('./myo_processed/LR.csv','w')
#fp.write(df.to_csv())
#fp.close()

In [15]:
df.set_index('Model', inplace=True)

In [18]:
df.groupby('Model').mean()

Unnamed: 0_level_0,Fold,Accuracy,Precision,Recall,F1 Score,AUC
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
All 12 Channels,3.0,0.901214,0.88423,0.873847,0.877521,0.873847
Correlated Channel 1,3.0,0.837248,0.813563,0.77669,0.789627,0.77669
Correlated Channel 2,3.0,0.870108,0.847646,0.830561,0.837622,0.830561
Correlated Channel 3,3.0,0.819698,0.789279,0.75431,0.76758,0.75431
Correlated Channel 4,3.0,0.854668,0.832552,0.801586,0.813884,0.801586
Single Channel 1,3.0,0.821695,0.804769,0.73563,0.75688,0.73563
Single Channel 2,3.0,0.860437,0.842762,0.807355,0.821428,0.807355
Single Channel 3,3.0,0.827446,0.80184,0.761794,0.776763,0.761794
Single Channel 4,3.0,0.821695,0.81076,0.764496,0.781249,0.764496


In [19]:
df.groupby('Model').std()

Unnamed: 0_level_0,Fold,Accuracy,Precision,Recall,F1 Score,AUC
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
All 12 Channels,1.581139,0.02177,0.02742,0.032091,0.028205,0.032091
Correlated Channel 1,1.581139,0.021722,0.02981,0.032458,0.028735,0.032458
Correlated Channel 2,1.581139,0.031354,0.041945,0.038865,0.038611,0.038865
Correlated Channel 3,1.581139,0.033015,0.052946,0.033139,0.039467,0.033139
Correlated Channel 4,1.581139,0.021552,0.024858,0.032492,0.029015,0.032492
Single Channel 1,1.581139,0.026304,0.041313,0.034029,0.036781,0.034029
Single Channel 2,1.581139,0.031296,0.043347,0.033323,0.035959,0.033323
Single Channel 3,1.581139,0.03427,0.057157,0.031462,0.039702,0.031462
Single Channel 4,1.581139,0.026304,0.051718,0.047383,0.049107,0.047383
