In [1]:
import sys
import scipy.io
import scipy.signal as sig
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np
import pandas as pd
import os

In [2]:
dataset = pd.read_pickle("CinC2017Data/database.pk")

In [3]:
def adaptive_gain_norm(x, w):
    x_mean_sub = np.pad(x - x.mean(), int((w-1)/2), "reflect")
    window = np.ones(w)
    sigma_square = np.convolve(x_mean_sub**2, window, mode="valid")/w
    gain = 1/np.sqrt(sigma_square)

    return x * gain

In [4]:
dataset["length"] = dataset["data"].map(lambda arr: arr.shape[-1])
dataset["data"] = dataset["data"].map(lambda d: d[0])

In [6]:
# select only the 30s length records
dataset = dataset[dataset["length"] == 9000]

In [None]:
# dataset["data"] = dataset["data"].map(lambda d: adaptive_gain_norm(d, 501))

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix

In [8]:
import torch.nn as nn
import torch

In [9]:
# Check cuda
print(torch.cuda.is_available())

True


In [42]:
# Now define a model
class CVAE(nn.Module):

    def __init__(self):
        super(CVAE, self).__init__()

        self.conv_section1 = nn.Sequential(
            nn.Conv1d(1, 16, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(16),
            nn.Conv1d(16, 16, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(16)
        )

        self.conv_section2 = nn.Sequential(
            nn.Conv1d(16, 16, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(16),
            nn.Conv1d(16, 16, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(16)
        )

        self.conv_section3 = nn.Sequential(
            nn.Conv1d(16, 32, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Conv1d(32, 32, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(32)
        )

        self.conv_section4 = nn.Sequential(
            nn.Conv1d(32, 48, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(48),
            nn.Conv1d(48, 48, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(48)
        )

        self.conv_section5 = nn.Sequential(
            nn.Conv1d(48, 64, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Conv1d(64, 64, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(64)
        )

        self.conv_section6 = nn.Sequential(
            nn.Conv1d(64, 64, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Conv1d(64, 64, 19, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(64)
        )

        self.conv_section7 = nn.Sequential(
            nn.Conv1d(64, 80, 9, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(80),
            nn.Conv1d(80, 80, 9, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(80)
        )

        self.encoder_linear = nn.Linear(5120, 120)
        self.decoder_linear = nn.Linear(60, 5120)
        self.decoder_batchnorm = nn.BatchNorm1d(5120)

        self.transconv_section1 = nn.Sequential(
            nn.ConvTranspose1d(16, 1, 20, padding=9, stride=1),
        )

        self.transconv_section2 = nn.Sequential(
            nn.ConvTranspose1d(16, 16, 20, padding=9, stride=2),
            nn.ReLU(),
            nn.BatchNorm1d(16),
        )

        self.transconv_section3 = nn.Sequential(
            nn.ConvTranspose1d(32, 16, 20, padding=9, stride=2),
            nn.ReLU(),
            nn.BatchNorm1d(16),
        )

        self.transconv_section4 = nn.Sequential(
            nn.ConvTranspose1d(48, 32, 20, padding=9, stride=2),
            nn.ReLU(),
            nn.BatchNorm1d(32),
        )

        self.transconv_section5 = nn.Sequential(
            nn.ConvTranspose1d(64, 48, 20, padding=9, stride=2),
            nn.ReLU(),
            nn.BatchNorm1d(48),
        )

        self.transconv_section6 = nn.Sequential(
            nn.ConvTranspose1d(64, 64, 20, padding=9, stride=2),
            nn.ReLU(),
            nn.BatchNorm1d(64)
        )

        self.transconv_section7 = nn.Sequential(
            nn.ConvTranspose1d(80, 64, 10, padding=4, stride=1),
            nn.ReLU(),
            nn.BatchNorm1d(64)
        )

        self.dropout = nn.Dropout()

    def forward(self, x):

        # [1, 2048]
        x = self.conv_section1(x)
        x = nn.functional.max_pool1d(x, 2)

        # [16, 1024]
        x = self.conv_section2(x)
        x = nn.functional.max_pool1d(x, 2)

        # [32, 512]
        x = self.conv_section3(x)
        x = nn.functional.max_pool1d(x, 2)

        # [48, 256]
        x = self.conv_section4(x)
        x = nn.functional.max_pool1d(x, 2)

        # [64, 128]
        x = self.conv_section5(x)
        x = nn.functional.max_pool1d(x, 2)

        # [64, 64]
        x = self.conv_section6(x)

        # [64, 64]
        x = self.conv_section7(x)

        # [80, 64]
        x = torch.flatten(x, -2)

        # [5120]
        x = self.encoder_linear(x)

        # Sample from the latent distribution
        z = torch.normal(x[:, :60], torch.abs(x[:, 60:]))

        # [60]
        z = self.decoder_linear(z)
        z = self.decoder_batchnorm(z)
        z = torch.nn.functional.relu(z)

        # [5120]
        z = torch.reshape(z, (-1, 80, 64))
        # [80, 64]
        z = self.transconv_section7(z)
        print(z.shape)
        # [64, 64]
        z = self.transconv_section6(z)
        print(z.shape)
        # [64, 64]
        z = self.transconv_section5(z)
        print(z.shape)
        # [64, 128]
        z = self.transconv_section4(z)
        print(z.shape)
        # [48, 256]
        z = self.transconv_section3(z)
        print(z.shape)
        # [32, 512]
        z = self.transconv_section2(z)
        print(z.shape)
        # [16, 1024]
        z = self.transconv_section1(z)
        print(z.shape)
        # [1, 2048]

        return z

In [26]:
# Onehot encoding
from torch.utils.data import Dataset, DataLoader

def generate_onehot(c):
    if c == "N":
        return np.array([1, 0, 0, 0])
    if c == "O":
        return np.array([0, 1, 0, 0])
    if c == "A":
        return np.array([0, 0, 1, 0])
    if c == "~":
        return np.array([0, 0, 0, 1])

def generate_index(c):
    if c == "N":
        return 0
    if c == "O":
        return 0
    if c == "A":
        return 0
    if c == "~":
        return 1

# dataset["onehot"] = dataset["class"].map(generate_onehot)
dataset["class_index"] = dataset["class"].map(generate_index)

class Dataset(torch.utils.data.Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, dataset):
        'Initialization'
        self.dataset = dataset

  def __len__(self):
        'Denotes the total number of samples'
        return len(self.dataset.index)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        row = self.dataset.iloc[index]

        X = row["data"]
        y = row["class_index"]

        return X, y

In [43]:
train_dataset, test_dataset = train_test_split(dataset, test_size=0.15, stratify=dataset["class_index"])

# Normalise the data
train_dataset["data"] = (train_dataset["data"] - train_dataset["data"].map(lambda x: x.mean()))/train_dataset["data"].map(lambda x: x.std())
test_dataset["data"] = (test_dataset["data"] - test_dataset["data"].map(lambda x: x.mean()))/test_dataset["data"].map(lambda x: x.std())

print(train_dataset["data"].map(lambda x: x.mean()))
print(train_dataset["data"].map(lambda x: x.std()))

def split_to_segments(dataset, new_len, orig_len, overlap=0):
    sections = []

    num_sections = orig_len // new_len
    for _, series in dataset.iterrows():
        for i in range(num_sections):
            data = series["data"][i*new_len: (i+1)*new_len]
            label = series["class_index"]
            sections.append({"data": data, "class_index": label})

    return pd.DataFrame(sections)

split_to_segments(test_dataset, 2048, 9000, 0)

torch_dataset_train = Dataset(split_to_segments(train_dataset, 2048, 9000, 0))
torch_dataset_test = Dataset(split_to_segments(test_dataset, 2048, 9000, 0))

train_dataloader = DataLoader(torch_dataset_train, batch_size=32, shuffle=True, pin_memory=True)
test_dataloader = DataLoader(torch_dataset_test, batch_size=32, shuffle=True, pin_memory=True)

A00941    4.736952e-18
A08370   -1.184238e-17
A05125   -7.500173e-18
A03387   -1.894781e-17
A06901    1.026340e-17
              ...     
A07345    3.157968e-18
A06524   -3.157968e-17
A03281    6.315935e-18
A00856   -1.736882e-17
A07043    3.947460e-18
Name: data, Length: 5080, dtype: float64
A00941    1.0
A08370    1.0
A05125    1.0
A03387    1.0
A06901    1.0
         ... 
A07345    1.0
A06524    1.0
A03281    1.0
A00856    1.0
A07043    1.0
Name: data, Length: 5080, dtype: float64


In [44]:
num_epochs = 10

if torch.cuda.is_available():
    print("Using Cuda")
    device = torch.device("cuda")
else:
    print("Using CPU")
    device = torch.device("cpu")

model = CVAE().to(device)

# Use weightings to avoid

# class_counts = torch.tensor(dataset["class_index"].value_counts().values.astype(np.float32))
# class_weights = torch.nn.functional.normalize(1.0/class_counts, dim=0)
loss_func = torch.nn.MSELoss()

optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.8)
num_batches = len(train_dataloader)
num_test_batches = len(test_dataloader)

Using Cuda


In [45]:
model = model.to(device)

for epoch in range(num_epochs):
    total_loss = 0
    print(f"starting epoch {epoch} ...")
    # Train
    model.train()
    for i, (signals, _) in enumerate(train_dataloader):
        signals = torch.unsqueeze(signals.to(device), 1).float()
        # fft = torch.abs(torch.fft.fft(signals))
        # signals = torch.cat([signals, fft], dim=1)
        # labels = labels.type(torch.LongTensor)

        optimizer.zero_grad()
        output = model(signals)
        loss = loss_func(output, signals)
        loss.backward()
        optimizer.step()
        total_loss += float(loss)

    print(f"Epoch {epoch} finished with average loss {total_loss/num_batches}")
    print("Testing ...")
    # Test
    test_loss = 0
    with torch.no_grad():
        model.eval()
        for i, (signals, _) in enumerate(test_dataloader):
            signals = torch.unsqueeze(signals.to(device), 1).float()
            # fft = torch.abs(torch.fft.fft(signals))
            # signals = torch.cat([signals, fft], dim=1)
            # labels = labels.type(torch.LongTensor)

            output = model(signals)
            loss = loss_func(output, signals)
            test_loss += float(loss)

    print(f"Average test loss: {test_loss/num_test_batches}")

starting epoch 0 ...
torch.Size([32, 64, 65])
torch.Size([32, 64, 130])
torch.Size([32, 48, 260])
torch.Size([32, 32, 520])
torch.Size([32, 16, 1040])
torch.Size([32, 16, 2080])
torch.Size([32, 1, 2081])


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (2081) must match the size of tensor b (2048) at non-singleton dimension 2