In [None]:
#set up the environment
!pip install -qq arff2pandas
import torch
torch.manual_seed(1)

import torch.nn as nn

import copy
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from glob import glob
import time
import copy
import shutil

from torch import nn, optim

import torch.nn.functional as F
from torchvision import datasets, transforms, models 
from arff2pandas import a2p


%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))

rcParams['figure.figsize'] = 12, 8

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

#set the gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
#load the train and test datasets
with open('ECG5000_TRAIN.arff') as f:
  train = a2p.load(f)

with open('ECG5000_TEST.arff') as f:
  test = a2p.load(f)

# append datasets
df = train.append(test)
df = df.sample(frac=1.0)

# set class names(5 type of heartbeats)
CLASS_NORMAL = 1
class_names = ['Normal','R on T','PVC','SP','UB']

# data preprocessing 
new_columns = list(df.columns)
new_columns[-1] = 'target'
df.columns = new_columns


#Let's get all normal heartbeats and drop the target (class) column:
normal_df = df[df.target == str(CLASS_NORMAL)].drop(labels='target', axis=1)


#We'll merge all other classes and mark them as anomalies:
anomaly_df = df[df.target != str(CLASS_NORMAL)].drop(labels='target', axis=1)

# We'll split the normal examples into train, validation and test sets:
#split the normal to tranining and validation
train_df, val_df = train_test_split(
  normal_df,
  test_size=0.15,
  random_state=RANDOM_SEED
)
#taking the validation and spliting it further  
val_df, test_df = train_test_split(
  val_df,
  test_size=0.5, 
  random_state=RANDOM_SEED
)

# We need to convert our examples into tensors, so we can use them to train our Autoencoder. Let's write a helper function for that:
def create_dataset(df):

  sequences = df.astype(np.float32).to_numpy().tolist()

  dataset = [torch.tensor(s).unsqueeze(1).float() for s in sequences]

  n_seq, seq_len, n_features = torch.stack(dataset).shape

  return dataset, seq_len, n_features

  #Each Time Series will be converted to a 2D Tensor in the shape sequence length x number of features 
train_dataset, seq_len, n_features = create_dataset(train_df)
val_dataset, _, _ = create_dataset(val_df)
test_normal_dataset, _, _ = create_dataset(test_df)
test_anomaly_dataset, _, _ = create_dataset(anomaly_df)

