In [1]:
# import all packages needed
import string, math
import numpy as np
import pandas as pd
from matplotlib import pyplot
from base64 import b64decode as decode
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config


import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.model_selection import train_test_split

## Data Processing / Cleaning

In [2]:
# use class base64 to decode waveform data
def to_array(wf):
    barr = bytearray(decode(wf))
    vals = np.array(barr)
    return vals.view(np.int16).astype(np.float32)

# read in data
exam_data = pd.read_csv("data/d_exam.csv").drop(columns = ["site_num", "patient_id_edit"])
waveform_data = pd.read_csv("data/d_waveform.csv")
lead_data = pd.read_csv("data/d_lead_data.csv").drop(columns = ["exam_id"])
diagnosis_data = pd.read_csv("data/d_diagnosis.csv").drop(columns = ["user_input"])

# add decoded data as a column to lead data
waveforms = list(lead_data['waveform_data'])
lead_data['decoded_waveform'] = [to_array(i) for i in waveforms]

# merge waveform data and lead data
waveform_lead = lead_data.merge(waveform_data, how = "left", left_on = "waveform_id", right_on = "waveform_id", suffixes = (None, None))

#  sort by exam id and lead id
waveform_lead.sort_values(by = ["waveform_id", "lead_id"], inplace = True)

waveform_lead.loc[:, ['exam_id', 'lead_id', 'decoded_waveform', 'waveform_type']]
waveform_lead

Unnamed: 0,lead_data_id,waveform_id,WavfmType,lead_id,lead_byte_count_total,lead_time_offset,waveform_data,lead_sample_count_total,lead_amplitude,lead_units,...,exam_id,waveform_type,number_of_leads,Waveform_Start_Time,Sample_Type,Sample_Base,Sample_Exponent,High_Pass_Filter,Low_Pass_Filter,AC_Filter
10,9078054,1095618,,I,5000,0,+P/4//j/+P/4//j/+P/5//r/+//8//z//P/7//r/+f/4/...,2500,4.88,MICROVOLTS,...,549871,Rhythm,8,0,CONTINUOUS_SAMPLES,250,0,5,150,NONE
15,9081703,1095618,,II,5000,0,9v/2//b/8//w//D/8P/x//L/8//0//T/9P/z//L/8f/w/...,2500,4.88,MICROVOLTS,...,549871,Rhythm,8,0,CONTINUOUS_SAMPLES,250,0,5,150,NONE
8,9074278,1095618,,V1,5000,0,/v/+//7//v/+////AAAAAAAAAQACAAIAAgACAAIAAgACA...,2500,4.88,MICROVOLTS,...,549871,Rhythm,8,0,CONTINUOUS_SAMPLES,250,0,5,150,NONE
1,9066887,1095618,,V2,5000,0,9v/1//T/9P/0//T/9P/0//T/9f/2//b/9v/2//b/9v/2/...,2500,4.88,MICROVOLTS,...,549871,Rhythm,8,0,CONTINUOUS_SAMPLES,250,0,5,150,NONE
18,9082771,1095618,,V3,5000,0,7v/u/+7/7f/s/+z/7P/t/+7/7v/u/+7/7v/u/+7/7v/u/...,2500,4.88,MICROVOLTS,...,549871,Rhythm,8,0,CONTINUOUS_SAMPLES,250,0,5,150,NONE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,9187141,1109067,,V4,1200,0,KAApACoAKwAsACwALQAtAC4ALgAuAC4ALgAuAC4ALgAvA...,600,4.88,MICROVOLTS,...,554080,Rhythm,8,0,CONTINUOUS_SAMPLES,500,0,16,150,NONE
152,9190675,1109067,,V5,1200,0,FgAXABkAGQAbABsAGwAbABsAGwAbABwAHQAeAB4AHgAfA...,600,4.88,MICROVOLTS,...,554080,Rhythm,8,0,CONTINUOUS_SAMPLES,500,0,16,150,NONE
155,9177603,1109067,,V5,10000,0,+v/6//r/+v/7//z//f/+//z//P/8//z//v/+//7//v/+/...,5000,4.88,MICROVOLTS,...,554080,Rhythm,8,0,CONTINUOUS_SAMPLES,500,0,16,150,NONE
140,9172851,1109067,,V6,10000,0,7v/u/+7/7v/x//L/8//0//T/9P/0//T/9P/0//T/9P/0/...,5000,4.88,MICROVOLTS,...,554080,Rhythm,8,0,CONTINUOUS_SAMPLES,500,0,16,150,NONE


In [3]:
# concatenate all leads into a single array
waveform_lead_concat = waveform_lead.groupby(["exam_id", "waveform_type"])['decoded_waveform'].apply(lambda x: tuple(x)).reset_index()
waveform_lead_concat
# remove irregular observations, concat tuple into numpy array
waveform_lead_concat = waveform_lead_concat.drop([12,17], axis = 0)
waveform_lead_concat['decoded_waveform'] = waveform_lead_concat['decoded_waveform'].apply(lambda x: np.vstack(x))#.apply(lambda x: np.transpose(x))

waveform_lead_rhythm = waveform_lead_concat[waveform_lead_concat['waveform_type'] == "Rhythm"]
#waveform_lead_median = waveform_lead_concat[waveform_lead_concat['waveform_type'] == "Median"]

for value in waveform_lead_rhythm["decoded_waveform"]:
    value /= 1024
    value += .5
    print(np.max(value))
    print(np.min(value))

0.63183594
0.3154297
0.75097656
0.31347656
0.7832031
0.08496094
0.8984375
0.29296875
0.78222656
0.31152344
0.8417969
0.19335938
0.6904297
0.33691406
0.6972656
0.40234375


In [4]:
# Adding the labels/sentences
exams = diagnosis_data["exam_id"].unique()

# Let's look over this tomorrow
diagnosis_data = diagnosis_data[diagnosis_data['Original_Diag'] == 1].dropna()
searchfor = ['previous', 'unconfirmed', 'compared', 'interpretation', 'significant']
diagnosis_data = diagnosis_data.loc[diagnosis_data['Full_text'].str.contains('|'.join(searchfor)) != 1]
#

diagnosis_data.sort_values(by=["exam_id", "statement_order"], inplace=True)
diagnoses = []
curr_id = 0
curr_string = ""
for i, row in diagnosis_data.iterrows():
    if row["statement_order"] == 1 and curr_string != "":
        curr_string = curr_string.lower().translate(str.maketrans('', '', string.punctuation))
        val = [curr_id, curr_string[1:]]
        diagnoses.append(val)
        curr_string = ""
        curr_id = row["exam_id"]

    if curr_id == 0:
        curr_id = row["exam_id"]
    
    curr_string += " " + row["Full_text"]

diagnosis_df = pd.DataFrame(diagnoses, columns = ['exam_id', 'diagnosis'])
waveform_lead_rhythm_diag = pd.merge(left=waveform_lead_rhythm, right=diagnosis_df, left_on='exam_id', right_on='exam_id')

#waveform_lead_rhythm_diag
for i in waveform_lead_rhythm_diag["diagnosis"]:
    print(i)

normal sinus rhythm low voltage qrs borderline ecg
sinus bradycardia otherwise normal ecg
sinus tachycardia otherwise normal ecg
normal sinus rhythm normal ecg
normal sinus rhythm normal ecg
normal sinus rhythm with sinus arrhythmia minimal voltage criteria for lvh may be normal variant borderline ecg
atrial fibrillation abnormal ecg normal sinus rhythm with sinus arrhythmia normal ecg


In [5]:
unique_words = set()
for num, sentence in diagnoses:
    for word in sentence.split():
        unique_words.add(word)
print(unique_words)
unique_words = list(unique_words)
word_map = dict()
for i, word in enumerate(unique_words):
    word_map[word] = i+1
word_map[""] = 0
print(word_map)

{'ischemia', 'lvh', 'with', 'fibrillation', 'normal', 'abnormal', 'for', 'arrhythmia', 'wave', 'sinus', 'rhythm', 'may', 'minimal', 'ecg', 't', 'voltage', 'otherwise', 'inferior', 'consider', 'be', 'low', 'variant', 'bradycardia', 'criteria', 'qrs', 'abnormality', 'borderline', 'tachycardia', 'atrial'}
{'ischemia': 1, 'lvh': 2, 'with': 3, 'fibrillation': 4, 'normal': 5, 'abnormal': 6, 'for': 7, 'arrhythmia': 8, 'wave': 9, 'sinus': 10, 'rhythm': 11, 'may': 12, 'minimal': 13, 'ecg': 14, 't': 15, 'voltage': 16, 'otherwise': 17, 'inferior': 18, 'consider': 19, 'be': 20, 'low': 21, 'variant': 22, 'bradycardia': 23, 'criteria': 24, 'qrs': 25, 'abnormality': 26, 'borderline': 27, 'tachycardia': 28, 'atrial': 29, '': 0}


In [6]:
# split data into training and testing datasets
# y not included for now
def one_hot(x, dict_words):
    x = x.split(" ")
    array = []
    for i in x:
        array.append([0] + [1 if y == i else 0 for y in dict_words] + [0,0])
    for i in range(17-len(x)):
        array.append([1 if i == 30 else 0 for i in range(32)])
    return array

dict_words = list(unique_words)
#waveform_lead_rhythm_diag['diagnosis'] = waveform_lead_rhythm_diag['diagnosis'].apply(lambda x: one_hot(x, dict_words))

len(waveform_lead_rhythm_diag["diagnosis"][5])
train_x, test_x, train_y, test_y = train_test_split(waveform_lead_rhythm_diag['decoded_waveform'], waveform_lead_rhythm_diag['diagnosis'], test_size = 0.1, random_state = 2021)
train_x = torch.tensor(list(train_x)).float()
train_x.shape
train_x = torch.tensor(list(waveform_lead_rhythm_diag['decoded_waveform'])).float()
train_x.shape

torch.Size([7, 8, 2500])

## Model 1 - Conv1D Encoder w/ LSTM Decoder

In [7]:
# HYPERPARAMETERS
J = 8 # max number of filters per class
LR = 1e-3

# define global max pooling
class global_max_pooling_1d(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, x):
        x, _ = torch.max(x, dim = 2)
        return(x)

# 1D grouped encoder model
encoder_conv = nn.Sequential()
encoder_conv.add_module('initial_norm', nn.BatchNorm1d(8))
encoder_conv.add_module('conv_1', nn.Conv1d(in_channels = 8, out_channels = 8, groups = 8, kernel_size = 5, padding = 2))
for i in range(2, (J+2), 2):
    if (i-2) == 0: 
        prev = 8
    else:
        prev = (i-2)*8
    encoder_conv.add_module('conv_{num}'.format(num = int(i / 2 + 1)), nn.Conv1d(in_channels = prev, out_channels = i*8, groups = 8, kernel_size = 5, padding = 2, stride = 3))
    encoder_conv.add_module('activation_{num}'.format(num = int(i / 2 + 1)), nn.ELU())
    encoder_conv.add_module('batch_norm_{num}'.format(num = int(i / 2 + 1)), nn.BatchNorm1d(i*8))
    
#encoder_conv.add_module('final_conv', nn.Conv1d(in_channels = J * 8, out_channels = 8, groups = 8, kernel_size = 5, padding = 2))
#encoder_conv.add_module('max_pool', nn.MaxPool1d(kernel_size = 5, padding = 2, stride = 1))
encoder_conv.add_module('reshape', nn.MaxPool1d(kernel_size = 5, padding = 2, stride = 1))


# summarize model, verify output is of desired shape
print(train_x[0].shape)
print(encoder_conv(torch.unsqueeze(train_x[0], 0)).shape)

torch.Size([8, 2500])
torch.Size([1, 64, 31])


In [8]:
# ResConv

# HYPERPARAMETERS
J = 10 # max number of filters per class
LR = 1e-3
KER_SIZE = 11
PADDING = 5
# define global max pooling
class global_max_pooling_1d(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, x):
        x, _ = torch.max(x, dim = 2)
        return(x)

# define resblock for neural nets
class ResBlock1D(nn.Module):
    def __init__(self, num_filters, kernel_size, padding, groups = 1, stride = 1):
        super(ResBlock1D, self).__init__()
        self.act = nn.ReLU()
        self.conv1d_1 = nn.Conv1d(num_filters, num_filters, kernel_size = kernel_size, padding = padding, groups = groups, stride = 1)
        self.conv1d_2 = nn.Conv1d(num_filters, num_filters, kernel_size = kernel_size, padding = padding, groups = groups, stride = 1)
        self.batch_norm_1 = nn.BatchNorm1d(num_filters)
        self.batch_norm_2 = nn.BatchNorm1d(num_filters)

    def forward(self, x):
        res = x
        x = self.batch_norm_1(self.act(self.conv1d_1(x)))
        x = self.batch_norm_2(self.act(self.conv1d_2(x)))
        return x + res

conv_model = nn.Sequential()
init_channels = 8
for i in range(5):
    next_channels = 2 * init_channels
    conv_model.add_module('conv_{num}'.format(num = i), nn.Conv1d(in_channels = init_channels, out_channels = next_channels, kernel_size = KER_SIZE, padding = PADDING, stride = 1))
    conv_model.add_module('act_{num}'.format(num = i), nn.ReLU())
    conv_model.add_module('batch_norm_{num}'.format(num = i), nn.BatchNorm1d(next_channels))
    conv_model.add_module('res_{num}'.format(num = i), ResBlock1D(num_filters = next_channels, kernel_size = KER_SIZE, padding = PADDING))
    conv_model.add_module('act_res_{num}'.format(num = i), nn.ReLU())
    init_channels = next_channels
conv_model.add_module('conv_fin', nn.Conv1d(in_channels = init_channels, out_channels = 768, kernel_size = KER_SIZE, padding = PADDING))
conv_model.add_module('act_fin', nn.ReLU())
conv_model.add_module('batch_fin', nn.BatchNorm1d(768))
print(conv_model)
#print(conv_model(train_x).shape)
conv_embedder = conv_model





Sequential(
  (conv_0): Conv1d(8, 16, kernel_size=(11,), stride=(1,), padding=(5,))
  (act_0): ReLU()
  (batch_norm_0): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (res_0): ResBlock1D(
    (act): ReLU()
    (conv1d_1): Conv1d(16, 16, kernel_size=(11,), stride=(1,), padding=(5,))
    (conv1d_2): Conv1d(16, 16, kernel_size=(11,), stride=(1,), padding=(5,))
    (batch_norm_1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (batch_norm_2): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (act_res_0): ReLU()
  (conv_1): Conv1d(16, 32, kernel_size=(11,), stride=(1,), padding=(5,))
  (act_1): ReLU()
  (batch_norm_1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (res_1): ResBlock1D(
    (act): ReLU()
    (conv1d_1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
    (conv1d_2): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(

In [9]:
deconv_model = nn.Sequential()
init_channels = 768
for i in range(5):
    next_channels = init_channels // 2
    deconv_model.add_module('conv_{num}'.format(num = i), nn.Conv1d(in_channels = init_channels, out_channels = next_channels, kernel_size = KER_SIZE, padding = PADDING, stride = 1))
    deconv_model.add_module('act_{num}'.format(num = i), nn.ReLU())
    deconv_model.add_module('batch_norm_{num}'.format(num = i), nn.BatchNorm1d(next_channels))
    deconv_model.add_module('res_{num}'.format(num = i), ResBlock1D(num_filters = next_channels, kernel_size = KER_SIZE, padding = PADDING))
    deconv_model.add_module('act_res_{num}'.format(num = i), nn.ReLU())
    init_channels = next_channels
deconv_model.add_module('conv_fin', nn.Conv1d(in_channels = init_channels, out_channels = 8, kernel_size = KER_SIZE, padding = PADDING))
deconv_model.add_module('act_fin', nn.ReLU())
deconv_model.add_module('batch_fin', nn.BatchNorm1d(8))

print(train_x.shape)
#print(data.shape)
print(conv_model(train_x).shape)
print(deconv_model(conv_model(train_x)).shape)

torch.Size([7, 8, 2500])
torch.Size([7, 768, 2500])
torch.Size([7, 8, 2500])


In [10]:
class ConvAutoEncoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(ConvAutoEncoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
    
    def forward(self, x):
        return self.decoder(self.encoder(x))
    
    def make_encoder(self):
        return self.encoder
    
    def make_decoder(self):
        return self.decoder
    
    


In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model to set to
auto_model = ConvAutoEncoder(conv_model, deconv_model)
auto_optimizer = torch.optim.Adam(auto_model.parameters(), lr = 1e-3)
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7ff60b449a50>

In [46]:
# Training params
loss_function = nn.MSELoss()

#print(train_x[0])
for i in range(180):
    #print(train_x.shape)
    auto_optimizer.zero_grad()
    outputs = auto_model(train_x)
    #print(outputs.shape)
    losses = loss_function(outputs, train_x)
    losses.backward(retain_graph=True)
    auto_optimizer.step()
    print(losses)
    if losses < .001:
        break

tensor(0.0028, grad_fn=<MseLossBackward>)
tensor(0.0027, grad_fn=<MseLossBackward>)
tensor(0.0027, grad_fn=<MseLossBackward>)
tensor(0.0027, grad_fn=<MseLossBackward>)
tensor(0.0027, grad_fn=<MseLossBackward>)
tensor(0.0027, grad_fn=<MseLossBackward>)
tensor(0.0027, grad_fn=<MseLossBackward>)
tensor(0.0027, grad_fn=<MseLossBackward>)
tensor(0.0027, grad_fn=<MseLossBackward>)
tensor(0.0026, grad_fn=<MseLossBackward>)
tensor(0.0026, grad_fn=<MseLossBackward>)
tensor(0.0026, grad_fn=<MseLossBackward>)
tensor(0.0026, grad_fn=<MseLossBackward>)
tensor(0.0026, grad_fn=<MseLossBackward>)
tensor(0.0026, grad_fn=<MseLossBackward>)
tensor(0.0026, grad_fn=<MseLossBackward>)
tensor(0.0026, grad_fn=<MseLossBackward>)
tensor(0.0026, grad_fn=<MseLossBackward>)
tensor(0.0025, grad_fn=<MseLossBackward>)
tensor(0.0025, grad_fn=<MseLossBackward>)
tensor(0.0025, grad_fn=<MseLossBackward>)
tensor(0.0025, grad_fn=<MseLossBackward>)
tensor(0.0025, grad_fn=<MseLossBackward>)
tensor(0.0025, grad_fn=<MseLossBac

In [12]:
#torch.save(auto_model.state_dict(), 'model/autoencoder.pt')

conv_embedder = auto_model.make_encoder()


#torch.save(conv_embedder.state_dict(), "model/embedder.pt")
conv_embedder.load_state_dict(torch.load("model/embedder.pt"))

<All keys matched successfully>

## Model 2 - LSTM Encoder w/ Huggingface Decoder

In [None]:
# define hyperparameters 
hidden_layers = 512
embedding_dim = 8
num_words = len(unique_words)

class ECG_LSTM(nn.Module):
    def __init__(self, encoder, h_dim, e_dim, word_list_length):
        super(ECG_LSTM, self).__init__()
        self.encoder = encoder
        self.lstm = nn.LSTM(e_dim, h_dim)
        self.linear = nn.Linear(h_dim, word_list_length)
        
    def forward(self, seq):
        seq_embedded = self.encoder(seq)
        final_hidd, _ = self.lstm(seq_embedded)
        dec_seq = self.linear(final_hidd)
        return F.log_softmax(dec_seq)
    
lstm_dec = ECG_LSTM(encoder_conv, hidden_layers, embedding_dim, num_words)
lstm_dec(train_x).shape

## Model 3 - Basic Transformer Architecture with Multi-Head Attention

In [13]:
new_data = conv_embedder(train_x)

new_data = new_data.detach()
print(new_data)

tensor([[[-5.4335e-01, -5.4335e-01, -5.4335e-01,  ..., -5.4335e-01,
          -5.4335e-01, -5.4335e-01],
         [-8.0321e-01, -8.0321e-01, -8.0321e-01,  ..., -4.1189e-01,
           6.4523e-01,  6.8533e-01],
         [-5.0046e-01, -5.0046e-01, -5.0046e-01,  ..., -5.0046e-01,
          -5.0046e-01, -5.0046e-01],
         ...,
         [-3.7941e-01, -3.7941e-01, -3.7941e-01,  ..., -3.7941e-01,
          -3.7941e-01, -1.0683e-01],
         [-4.7724e-02, -4.7724e-02, -4.7724e-02,  ...,  8.6893e+00,
          -4.7724e-02, -4.7724e-02],
         [-9.0351e-01, -9.0351e-01, -9.0351e-01,  ..., -8.6267e-01,
          -4.7783e-01, -8.9724e-01]],

        [[-5.4335e-01, -5.4335e-01, -5.4335e-01,  ..., -5.4335e-01,
          -5.4335e-01, -5.4335e-01],
         [-8.0321e-01, -8.0321e-01, -8.0321e-01,  ..., -8.0321e-01,
          -8.0321e-01, -8.0321e-01],
         [-5.0046e-01, -5.0046e-01, -5.0046e-01,  ..., -5.0046e-01,
          -5.0046e-01, -5.0046e-01],
         ...,
         [-3.7941e-01, -3

# Transformer testing

In [14]:
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class ECGTransformerEncoder(nn.Module):
    # Takes the ECG discrete signals sequence and maps into a probability distribution of diagnosis
    # For working/verification purposes
    def __init__(self, vector_size, embed_dim, n_heads, hidden_linear_dim, n_layers, dropout):
        super(ECGTransformerEncoder, self).__init__()
        self.model_type = "Transformer"
        self.positional_encoder = PositionalEncoder(embed_dim=embed_dim, dropout=dropout, batch_size = 7)
    
        #Since our data is already discrete numbers, might need some tweaking for this
        self.embedder = conv_embedder
                        #64 31              #39        64
        
        
        self.encoder = TransformerEncoder(
            TransformerEncoderLayer(embed_dim, n_heads, hidden_linear_dim, dropout),
            n_layers)
        
        self.n_inputs = embed_dim
        self.n_layers = n_layers
        
        # Simple linear decoder
        self.decoder = nn.Sequential(
                        nn.Linear(768, 17),
                        Transpose(17, 2500),
                        nn.Linear(2500, 30),
                        nn.LogSoftmax()
                        )
        self.init_weights()
        
    def init_weights(self):
        #self.embedder.weight.data.uniform_(-.1, .1)
        #self.decoder.bias.data.zero_()
        #self.decoder.weight.data.uniform_(-.1, .1)
        pass
        
    def forward(self, x):
        #x = self.embedder(x) # * math.sqrt(self.n_inputs)
        #x = x.squeeze(0)
        #x = x.view(2500, 8)
        #x = x.unsqueeze(1)
        x = self.positional_encoder(x)
        print(x.shape)
        x = self.encoder(x)
        print(x.shape)
        x = x.squeeze(1) 
        #x = self.decoder(x)
        return x

class Transpose(nn.Module):
    def __init__(self, *args):
        super(Transpose, self).__init__()
        self.shape = args

    def forward(self, x):
        # If the number of the last batch sample in the data set is smaller than the defined batch_batch size, mismatch problems will occur. You can modify it yourself, for example, just pass in the shape behind, and then enter it through x.szie(0).
        return x.view(self.shape)

class SignalEmbedder(nn.Module):
    # Necessary to convert the signal into "word" vectors for transformer processing.
    # Currently a simple group and slice method, but will modify later for multi-channel inputs
    
    def __init__(self, num_slices, size_of_slice):
        super(SignalEmbedder, self).__init__()
        self.num_slices = num_slices
        self.size_of_slice = size_of_slice
        
    def forward(self, x):
        x = x[: self.num_slices * self.size_of_slice]
        x = x.reshape((self.num_slices, self.size_of_slice))
        return x
'''
class OneHotConverter(nn.Module):
    # Converts the sigmoid output into one-hots
    
    def __init__(self, size, sentence_length):
        super(OneHotConverter, self).__init__()
        self.arr_length = size
        self.num_words = sentence_length
        
    def forward(self, x):
        output = []
        for num in x:
            num = num.item()
            num *= self.arr_length
            val = np.zeros(self.arr_length)
            val[int(round(num))] = 1
        
            output.append(val)
        output = torch.as_tensor(output)
        output.requires_grad_()
        return output
'''    

class PositionalEncoder(nn.Module):
    # Necessary to store positional data about the input data
    def __init__(self, embed_dim, dropout=0.1, max_len=2500, batch_size = 1):
        super(PositionalEncoder, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        pos_encoding = torch.zeros(max_len, 1, embed_dim)
        position = torch.arange(max_len, dtype=torch.float).unsqueeze(1)
        
        divisor = torch.exp(torch.arange(0, embed_dim, 2).float() * (- math.log(10000.0) / embed_dim))
        
        pos_encoding[:, 0, 0::2] = torch.sin(position * divisor)
        pos_encoding[:, 0, 1::2] = torch.cos(position * divisor)
        pos_encoding = pos_encoding.repeat(1, batch_size, 1)
        self.register_buffer("pos_encoding", pos_encoding)

        
    def forward(self, x):
        x = x + self.pos_encoding[:x.size(0), :]
        return self.dropout(x)

In [15]:
# Training pipeline
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model to set to
model2 = ECGTransformerEncoder(vector_size=5, embed_dim=768, n_heads=16, hidden_linear_dim=2048, n_layers=2, dropout=0.3).to(device)

# Training params
loss_function = nn.L1Loss()

optimizer = torch.optim.Adam(model2.parameters(), lr = 1e-3)
torch.autograd.set_detect_anomaly(True)
data = []
for arr in waveform_lead_rhythm_diag["decoded_waveform"]:
    #print(arr)
    arr = arr.transpose()
    data.append(arr)

labels = []
for sentence in waveform_lead_rhythm_diag["diagnosis"]:
    #label = one_hot(sentence, dict_words)
    label = []
    for word in sentence.split():
        label.append(word_map[word])
    
    while len(label) < 17:
        label.append(0)
    labels.append(np.array(label))
data = torch.from_numpy(np.array(data, dtype=np.float64)).type(torch.FloatTensor)
print(labels[1])
labels = torch.from_numpy(np.array(labels))
print(data)
model2.train()

[10 23 17  5 14  0  0  0  0  0  0  0  0  0  0  0  0]
tensor([[[0.5039, 0.5059, 0.4980,  ..., 0.5117, 0.5078, 0.5059],
         [0.5029, 0.5049, 0.4980,  ..., 0.5088, 0.5049, 0.5039],
         [0.5020, 0.5039, 0.4980,  ..., 0.5059, 0.5020, 0.5020],
         ...,
         [0.4932, 0.4941, 0.5020,  ..., 0.4814, 0.4824, 0.4844],
         [0.4902, 0.4922, 0.5059,  ..., 0.4805, 0.4805, 0.4805],
         [0.4902, 0.4922, 0.5059,  ..., 0.4805, 0.4805, 0.4805]],

        [[0.4922, 0.4902, 0.4980,  ..., 0.4746, 0.4746, 0.4844],
         [0.4922, 0.4902, 0.4980,  ..., 0.4756, 0.4756, 0.4854],
         [0.4922, 0.4902, 0.4980,  ..., 0.4766, 0.4766, 0.4863],
         ...,
         [0.5098, 0.5088, 0.4980,  ..., 0.5088, 0.5107, 0.5068],
         [0.5059, 0.5020, 0.5000,  ..., 0.4990, 0.5020, 0.5000],
         [0.5059, 0.5020, 0.5000,  ..., 0.4990, 0.5020, 0.5000]],

        [[0.4785, 0.4961, 0.5078,  ..., 0.5195, 0.5176, 0.5137],
         [0.4805, 0.4961, 0.5088,  ..., 0.5215, 0.5195, 0.5146],
     

ECGTransformerEncoder(
  (positional_encoder): PositionalEncoder(
    (dropout): Dropout(p=0.3, inplace=False)
  )
  (embedder): Sequential(
    (conv_0): Conv1d(8, 16, kernel_size=(11,), stride=(1,), padding=(5,))
    (act_0): ReLU()
    (batch_norm_0): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (res_0): ResBlock1D(
      (act): ReLU()
      (conv1d_1): Conv1d(16, 16, kernel_size=(11,), stride=(1,), padding=(5,))
      (conv1d_2): Conv1d(16, 16, kernel_size=(11,), stride=(1,), padding=(5,))
      (batch_norm_1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (batch_norm_2): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (act_res_0): ReLU()
    (conv_1): Conv1d(16, 32, kernel_size=(11,), stride=(1,), padding=(5,))
    (act_1): ReLU()
    (batch_norm_1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (res_1): ResBlock1D(
      (a

In [None]:
loss_function = nn.MSELoss()

new_data = new_data.detach().transpose(1,2)
data2 = new_data.permute(1,0,2)
for i in range(20):
    losses = 0
    optimizer.zero_grad()
    print("hey", data2.shape)
    outputs = model2(data2)
    loss = loss_function(outputs, data2)
    print("loss: " + str(loss.item()))
    losses += loss
    losses.backward(retain_graph=True)
    optimizer.step()
    print("epoch loss: ", str(losses.item()))
    if losses < .001:
        break

        '''        
for x, y in zip(new_data, labels):
    print(np.argmax(model(x.view(2500,768)).detach().numpy(), axis=1))
    print(y.detach().numpy())
'''

hey torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
loss: 1.354344367980957
epoch loss:  1.354344367980957
hey torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
loss: 1.1237390041351318
epoch loss:  1.1237390041351318
hey torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
loss: 0.958724856376648
epoch loss:  0.958724856376648
hey torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
loss: 0.9055512547492981
epoch loss:  0.9055512547492981
hey torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
loss: 0.8697413802146912
epoch loss:  0.8697413802146912
hey torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
loss: 0.8411292433738708


In [71]:
torch.save(model.state_dict(), 'model/transformer_768.pt')

In [None]:
from transformers import BertTokenizer, BertForNextSentencePrediction
import torch

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')

prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
next_sentence = "The sky is blue due to the shorter wavelength of blue light."
encoding = tokenizer(prompt, next_sentence, return_tensors='pt')

outputs = model(**encoding, labels=torch.LongTensor([1]))
logits = outputs.logits
#assert logits[0, 0] < logits[0, 1] # next sentence was random
print(logits)

In [22]:
# define tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained('gpt2', config = GPT2Config(add_cross_attention = True))
model.train()
# preprocess training labels and tokenize
train_labels = list(waveform_lead_rhythm_diag['diagnosis'])
inputs = tokenizer(train_labels, padding = True, verbose = False, return_tensors="pt")


Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at gpt2 and are newly initialized: ['h.7.crossattention.c_proj.weight', 'h.6.crossattention.c_proj.bias', 'h.9.crossattention.masked_bias', 'h.2.ln_cross_attn.weight', 'h.10.crossattention.masked_bias', 'h.3.crossattention.bias', 'h.10.crossattention.c_proj.weight', 'h.4.ln_cross_attn.weight', 'h.3.crossattention.c_proj.weight', 'h.8.ln_cross_attn.weight', 'h.5.crossattention.bias', 'h.5.crossattention.c_proj.weight', 'h.6.crossattention.c_proj.weight', 'h.1.crossattention.bias', 'h.2.crossattention.masked_bias', 'h.0.crossattention.bias', 'h.0.ln_cross_attn.weight', 'h.6.crossattention.masked_bias', 'h.7.crossattention.c_proj.bias', 'h.11.crossattention.c_proj.bias', 'h.9.crossattention.q_attn.weight', 'h.3.crossattention.c_proj.bias', 'h.1.crossattention.c_attn.weight', 'h.7.crossattention.q_attn.weight', 'h.8.crossattention.c_attn.weight', 'h.7.crossattention.c_attn.weight', 'h.11.crossattention.q_attn.we

In [23]:
# pretrain decoder
torch.device("cuda" if torch.cuda.is_available() else "cpu")
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
torch.autograd.set_detect_anomaly(True)

# set number of epochs
epochs = 30

for i in range(epochs):
    optimizer.zero_grad()
    outputs = model(**inputs, labels = inputs["input_ids"])
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    
    print(loss)
    
torch.save(model.state_dict(), 'model/gpt2.pt')

tensor(7.8076, grad_fn=<NllLossBackward>)
tensor(4.5785, grad_fn=<NllLossBackward>)
tensor(6.0879, grad_fn=<NllLossBackward>)
tensor(2.2366, grad_fn=<NllLossBackward>)
tensor(2.2169, grad_fn=<NllLossBackward>)
tensor(1.4636, grad_fn=<NllLossBackward>)
tensor(1.2779, grad_fn=<NllLossBackward>)
tensor(1.2848, grad_fn=<NllLossBackward>)
tensor(0.9116, grad_fn=<NllLossBackward>)
tensor(0.5223, grad_fn=<NllLossBackward>)
tensor(0.4559, grad_fn=<NllLossBackward>)
tensor(0.4105, grad_fn=<NllLossBackward>)
tensor(0.3340, grad_fn=<NllLossBackward>)
tensor(0.3590, grad_fn=<NllLossBackward>)
tensor(0.2671, grad_fn=<NllLossBackward>)
tensor(0.1978, grad_fn=<NllLossBackward>)
tensor(0.1615, grad_fn=<NllLossBackward>)
tensor(0.1973, grad_fn=<NllLossBackward>)
tensor(0.1498, grad_fn=<NllLossBackward>)
tensor(0.1566, grad_fn=<NllLossBackward>)
tensor(0.1327, grad_fn=<NllLossBackward>)
tensor(0.1118, grad_fn=<NllLossBackward>)
tensor(0.0869, grad_fn=<NllLossBackward>)
tensor(0.0633, grad_fn=<NllLossBac

## Model 4 - Cohesive 1 Wrapper Transformer Architecture

In [33]:
# create encoder decoder model with GPT2 
class CustEncoderDecoder(nn.Module):
    def __init__(self, encoder, decoder, embedder):
        super(CustEncoderDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.pos_enb = PositionalEncoder(768)
        self.embedder = embedder
    
    def forward(self, x):
        ecgs, labels = x
        #x = self.embedder(ecgs).permute(2, 0, 1)
        #print(x.shape)
        x = self.encoder(ecgs).permute(1,0,2)
        print(x.shape)
        out = self.decoder(**labels, labels = labels["input_ids"], encoder_hidden_states = x.contiguous())
        return out
    
    def predict(self, ecgs):
        start = tokenizer.convert_tokens_to_ids(tokenizer.bos_token)
        x = self.encoder(ecgs).permute(1,0,2)
        out = self.decoder(start, past_key_values=True, encoder_hidden_states = x.contiguous())
        
    def return_enc(self):
        return self.encoder


#encoder = ECGTransformerEncoder(vector_size=5, embed_dim=768, n_heads=16, hidden_linear_dim=2048, n_layers=2, dropout=0.3).to(device)
#model2.load_state_dict(torch.load('model/transformer_768.pt', map_location=device))

enc_dec_model = CustEncoderDecoder(model2, model, conv_embedder)


In [None]:
enc_dec_model.predict(data2)

torch.Size([2500, 7, 768])


In [25]:
# train encoder decoder model!
optimizer = torch.optim.Adam(enc_dec_model.parameters(), lr = 1e-5)
torch.autograd.set_detect_anomaly(True)

# set number of epochs
epochs = 10

for i in range(epochs):
    optimizer.zero_grad()
    outputs = enc_dec_model((data2, inputs))
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    
    print(loss)

torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
torch.Size([7, 2500, 768])
tensor(0.0654, grad_fn=<NllLossBackward>)
torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
torch.Size([7, 2500, 768])
tensor(0.0460, grad_fn=<NllLossBackward>)
torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
torch.Size([7, 2500, 768])
tensor(0.0439, grad_fn=<NllLossBackward>)
torch.Size([2500, 7, 768])


KeyboardInterrupt: 

## Model 5 - FNET/Basic Mixup Architecture 

In [26]:
values = enc_dec_model((data2, inputs))

torch.Size([2500, 7, 768])
torch.Size([2500, 7, 768])
torch.Size([7, 2500, 768])


In [27]:
tokens = np.argmax(values.logits.detach(), axis = 2)
#tokenizer.decode(tokens)
for l in tokens:
    print(tokenizer.decode(l))

 sinus rhythm normal voltage qrs borderline ecg<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>
us tadycardia otherwise normal ecg<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>
us tachycardia otherwise normal ecg<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>
 sinus rhythm normal ecg<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|

In [None]:
enc_dec_model((data2, inputs))