<a href="https://colab.research.google.com/github/cgjeong23/ECG_Anomaly_Detector/blob/master/ECG_anomaly_detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd

import torch
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F

device = torch.device('cuda')

### Hyperparameters

In [None]:
#Dataloader
batch_size = 50
num_workers = 8

#Model
outer_hidden_dim = 128
inner_hidden_dim = 64
latent_dim = 32

#Training
lr = 1e-3
num_epochs = 20

### Precprocessing the data

In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/ECG5000_data/ECG5000_TRAIN.csv', header=None)
test_df = pd.read_csv('/content/drive/MyDrive/ECG5000_data/ECG5000_TEST.csv', header=None)

complete_df = train_df.append(test_df) 
complete_df.sample(frac=1)
complete_df.shape

(5000, 141)

Normal Beat has last index value of 1  
1: Normal   
2: R on T  
3: PVC  
4: SP  
5: UB

In [None]:
new_columns = list(complete_df.columns)
new_columns[-1] = 'beat_type'
complete_df.columns = new_columns

In [None]:
complete_df.beat_type.value_counts()

1    2919
2    1767
4     194
3      96
5      24
Name: beat_type, dtype: int64

In [None]:
train_df = complete_df.loc[complete_df['beat_type']==1] # Only selecting normal data
abnormal_df = complete_df.loc[complete_df['beat_type']!=1]
train_df = train_df.drop('beat_type', axis=1) # Deleteing index column
abnormal_df = abnormal_df.drop('beat_type', axis=1) # Deleteing index column
train_df.shape

(2919, 140)

Reallocating the size of datasets

In [None]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(train_df,
                                         test_size=0.2)

valid_df, test_df = train_test_split(test_df,
                                         test_size=0.5)

Changing the test dataset to include anomalies

In [None]:
print(test_df.shape,abnormal_df.shape)


(292, 140) (2081, 140)


### Dataset / Dataloader

In [None]:
class ECGDataset(Dataset):
  
  heartbeats = df.astype(np.float32).to_numpy().tolist() # [2919, 140]
  dataset = [torch.tensor(beat).unsqueeze(dim=1) for beat in heartbeats] # [2919, 140, 1]

  def __init__(self)
  return dataset

In [None]:
train_data = ECGDataset(train_df)
valid_data = ECGDataset(valid_df)
test_data = ECGDataset(test_df)

In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_data, batch_size=batch_size, num_workers=num_workers) # List 아닙니다... Class 만들어서 넣으세요...
test_loader = DataLoader(test_data, batch_size=batch_size, num_workers=num_workers)

  cpuset_checked))


### Model

In [None]:
class Encoder(nn.Module):
    
    def __init__(self, input_dim, outer_hidden_dim, inner_hidden_dim, latent_dim):
        super(Encoder, self).__init__()
        self.in_layer1 = nn.LSTM(input_dim, outer_hidden_dim)
        self.in_layer2 = nn.LSTM(outer_hidden_dim, inner_hidden_dim)
        self.VAE_mean  = nn.LSTM(inner_hidden_dim, latent_dim)
        self.VAE_var   = nn.LSTM(inner_hidden_dim, latent_dim)
        
        self.LeakyReLU = nn.LeakyReLU(0.2)
        self.training = True
        
    def forward(self, x):
        h_1 = self.LeakyReLU(self.in_layer1(x))
        h_2 = self.LeakyReLU(self.in_layer2(h_1))
        mean = self.VAE_mean(h_2)
        log_var = self.VAE_var(h_2)                 
        
        return mean, log_var

In [None]:
class Decoder(nn.Module):

    def __init__(self, latent_dim, outer_hidden_dim, inner_hidden_dim, output_dim):
        super(Decoder, self).__init__()
        self.out_layer1 = nn.LSTM(latent_dim, inner_hidden_dim)
        self.out_layer2 = nn.LSTM(inner_hidden_dim, outer_hidden_dim)
        self.output = nn.LSTM(outer_hidden_dim, output_dim)
        
        self.LeakyReLU = nn.LeakyReLU(0.2)
        
    def forward(self, x):
        h_1 = self.LeakyReLU(self.out_layer1(x))
        h_2 = self.LeakyReLU(self.out_layer2(h_1))
        
        x_hat = torch.sigmoid(self.output(h_2))
        return x_hat

In [None]:
class RecurrentVAE(nn.Module):

    def __init__(self, Encoder, Decoder):
        super(RecurrentVAE, self).__init__()
        self.Encoder = Encoder
        self.Decoder = Decoder
        
    def reparameterization(self, mean, var):
        epsilon = torch.randn_like(var).to(device)        # sampling epsilon        
        z = mean + var * epsilon                          # reparameterization trick
        return z
        
    
    def forward(self, x):
        mean, log_var = self.Encoder(x)
        z = self.reparameterization(mean, torch.exp(0.5 * log_var)) # takes exponential function (log var -> var)
        x_hat = self.Decoder(z)
        
        return x_hat, mean, log_var

In [None]:
encoder = Encoder(input_dim=140, outer_hidden_dim=outer_hidden_dim, inner_hidden_dim=inner_hidden_dim,latent_dim=latent_dim)
decoder = Decoder(latent_dim=latent_dim, outer_hidden_dim=outer_hidden_dim, inner_hidden_dim=inner_hidden_dim, output_dim=140)

model = RecurrentVAE(Encoder=encoder, Decoder=decoder).to(device)

### Training

In [None]:
from torch.optim import Adam

BCE_loss = nn.BCELoss()

def loss_function(x, x_hat, mean, log_var):
    reproduction_loss = F.binary_cross_entropy(x_hat, x, reduction='sum')
    KLD = - 0.5 * torch.sum(1+ log_var - mean.pow(2) - log_var.exp())

    return reproduction_loss + KLD


In [None]:
def train(model, train_loader, num_epochs, optimizer):

  history = {'train': [], 'valid': [], 'test': []}

  for epoch in range(num_epochs):
    model = model.train()

    train_losses = []
    
    for batch_idx, x in enumerate(train_loader):

        x = x.view(batch_size, 140)
        print(x.shape)
        x = x.to(device)
        optimizer.zero_grad()

        x_hat, mean, log_var = model(x)
        loss = loss_function(x, x_hat, mean, log_var)
        
        train_losses += loss.item()
        
        loss.backward()
        optimizer.step()
        
    print("\tEpoch", epoch + 1, "complete", "\tAverage Loss: ", overall_loss / (batch_idx * batch_size))

  history['train'].append(train_losses.sum())

  return history

In [None]:
optim = torch.optim.Adam(model.parameters(), lr=0.001)
history = train(model, train_loader, num_epochs, optimizer=optim)

  cpuset_checked))


torch.Size([50, 140])


TypeError: ignored