In [None]:
!git clone https://github.com/clementgr/detect-sleep-apnea.git

Cloning into 'detect-sleep-apnea'...
remote: Enumerating objects: 9, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 9 (delta 0), reused 6 (delta 0), pack-reused 0[K
Unpacking objects: 100% (9/9), done.


In [None]:
import os
os.chdir('detect-sleep-apnea')

In [None]:
!sh scripts/data_download.sh

Downloading...
From: https://drive.google.com/uc?id=1wK0S9dmFqwNZV_Uq3ToD9zsWzltcsi8x
To: /content/detect-sleep-apnea/data.zip
3.94GB [01:08, 57.8MB/s]


In [None]:
%%capture
!unzip data.zip
!rm data.zip
!rm -r __MACOSX
!rm -r /content/sample_data

In [None]:
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

%matplotlib inline

In [None]:
path_to_train_data = 'data/X_train.h5'
path_to_label_data = 'data/y_train_tX9Br0C.csv'
x_train = h5py.File(path_to_train_data, mode='r')
y_train = pd.read_csv(path_to_label_data)

In [None]:
x_train['data'].shape

(4400, 72002)

In [None]:
y_train.head()

Unnamed: 0,ID,y_0,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,y_10,y_11,y_12,y_13,y_14,y_15,y_16,y_17,y_18,y_19,y_20,y_21,y_22,y_23,y_24,y_25,y_26,y_27,y_28,y_29,y_30,y_31,y_32,y_33,y_34,y_35,y_36,y_37,y_38,...,y_50,y_51,y_52,y_53,y_54,y_55,y_56,y_57,y_58,y_59,y_60,y_61,y_62,y_63,y_64,y_65,y_66,y_67,y_68,y_69,y_70,y_71,y_72,y_73,y_74,y_75,y_76,y_77,y_78,y_79,y_80,y_81,y_82,y_83,y_84,y_85,y_86,y_87,y_88,y_89
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
dset = x_train['data']
len(dset)

4400

In [None]:
import torch

class SleepApneaDataset(torch.utils.data.Dataset):

  def __init__(self, data_path, csv_path, N_signals=8, signal_freq=100):

    self.dset = h5py.File(data_path, mode='r')['data']
    self.targets = pd.read_csv(csv_path)
    self.N = N_signals
    self.freq = signal_freq
  
  def __len__(self):
    return len(self.dset)
  
  def __getitem__(self, idx):
    
    sample_index = self.dset[idx, 0]
    subject_index = self.dset[idx, 1]
    x = self.dset[idx, 2:].reshape(-1, self.N)
    y = self.targets[self.targets['ID'] == sample_index].values[0][1:]

    return x, y

In [None]:
train_dset = SleepApneaDataset('data/X_train.h5', 'data/y_train_tX9Br0C.csv')

In [None]:
import torch

class OneChannelDataset(torch.utils.data.Dataset):

  def __init__(self, data_path, csv_path, signal_id=0, signal_freq=100):

    self.dset = h5py.File(data_path, mode='r')['data']
    self.targets = pd.read_csv(csv_path)
    self.signal_id = signal_id
    self.freq = signal_freq
  
  def __len__(self):
    return len(self.dset)
  
  def __getitem__(self, idx):
    
    sample_index = self.dset[idx, 0]
    subject_index = self.dset[idx, 1]
    x = self.dset[idx, 2+9000*self.signal_id:2+9000*(self.signal_id+1)]
    x = x.reshape(-1, self.freq)
    y = self.targets[self.targets['ID'] == sample_index].values[0][1:]

    return x, y

In [None]:
train_dset = OneChannelDataset('data/X_train.h5', 'data/y_train_tX9Br0C.csv')

In [None]:
train_loader = torch.utils.data.DataLoader(train_dset, batch_size=16, shuffle=False)
next(iter(train_loader))[0].shape

torch.Size([16, 90, 100])

In [None]:
class LSTM(nn.Module):
  def __init__(self, seq_length, input_size, hidden_dim, output_dim, n_layers, 
              bidirectional, dropout_p):
    
    super().__init__()
    
    self.bidirectional = bidirectional
    
    self.rnn = nn.LSTM(input_size=input_size, 
                      hidden_size=256, 
                      num_layers=n_layers, 
                      bidirectional=bidirectional, 
                      dropout=dropout_p)
    
    fc_input_dim = 2*hidden_dim if self.bidirectional else hidden_dim
    self.fc = nn.Linear(fc_input_dim, output_dim)
    self.dropout = nn.Dropout(dropout_p)
    
  def forward(self, x):
    
    output, (hidden, cell) = self.rnn(x)

    if self.bidirectional:
      hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
    else:
      hidden = self.dropout(hidden[-1,:,:])
            
    # hidden = [batch size, hid dim * num directions]
        
    return torch.sigmoid(self.fc(hidden))

In [None]:
seq_length = 90
input_size = 100
hidden_dim = 256
output_dim = 90
n_layers = 2
bidir = True
dropout_p = 0.1

model = LSTM(seq_length, input_size, hidden_dim, output_dim, n_layers, bidir, dropout_p)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [None]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())
model = model.to(device)
criterion = nn.BCELoss()
criterion = criterion.to(device)

In [None]:
tmp_criterion = nn.BCELoss()
a = np.zeros(90)
b = y_train.iloc[1].values[1:].astype('float')
tmp_criterion(torch.from_numpy(b), torch.from_numpy(a))

tensor(0., dtype=torch.float64)

In [None]:
y_train.iloc[1].values[1:].astype('float')

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.])

In [None]:
from metric_dreem import dreem_sleep_apnea_custom_metric

In [None]:
def train(model, train_loader, optimizer, criterion):
    
  epoch_loss = 0
  epoch_acc = 0
  model.train()
  
  for signal, target in train_loader:
      
    optimizer.zero_grad()
    signal = signal.type(torch.FloatTensor)
    signal, target = signal.to(device), target.to(device)
    # print(f'signal.dtype: {signal.dtype}')
    signal = signal.permute(1,0,2)
    # print(f'signal.shape: {signal.shape}')
    preds = model(signal).squeeze(1)
    # print(f'preds.shape: {preds.shape}')
    # print(f'preds.dtype: {preds.dtype}')
    # print(f'target.shape: {target.shape}')
    # print(f'target.dtype: {target.dtype}')
    preds = preds.type(torch.FloatTensor).cpu()
    target = target.type(torch.FloatTensor).cpu()
    # print((preds.detach()>0.5).float())
    # print(target)
    # print()
    loss = criterion(preds, target)
    # acc = dreem_sleep_apnea_custom_metric((preds.detach()>0.5).float(), target.detach())
    
    loss.backward()
    optimizer.step()
    
    epoch_loss += loss.item()
    # epoch_acc += acc

  # return epoch_loss / len(train_loader), epoch_acc / len(train_loader)
  return epoch_loss / len(train_loader)

In [None]:
def evaluate(model, val_loader, criterion):
    
  epoch_loss = 0
  epoch_acc = 0
  model.eval()
  
  with torch.no_grad():
  
    for signal, target in val_loader:

      signal, target = signal.to(device), target.to(device)
      preds = model(signal).squeeze(1)
      loss = criterion(preds, target)
      # acc = dreem_sleep_apnea_custom_metric(preds.cpu(), target.cpu())

      epoch_loss += loss.item()
      # epoch_acc += acc
    
  # return epoch_loss / len(val_loader), epoch_acc / len(val_loader)
  return epoch_loss / len(val_loader)

In [None]:
def epoch_time(start_time, end_time):
  elapsed_time = end_time - start_time
  elapsed_mins = int(elapsed_time / 60)
  elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
  return elapsed_mins, elapsed_secs

In [None]:
n_epochs = 20
best_valid_loss = float('inf')

for epoch in range(n_epochs):

  start_time = time.time()
  
  # train_loss, train_acc = train(model, train_loader, optimizer, criterion)
  # valid_loss, valid_acc = evaluate(model, val_iter, criterion)

  train_loss = train(model, train_loader, optimizer, criterion)
  # valid_loss = evaluate(model, val_loader, criterion)

  end_time = time.time()

  epoch_mins, epoch_secs = epoch_time(start_time, end_time)
  
  # if valid_loss < best_valid_loss:
  #   best_valid_loss = valid_loss
  #   torch.save(model.state_dict(), 'best_model.pt')

  print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
  print(f'\tTrain Loss: {train_loss:.3f}')
  # print(f'\t Val. Loss: {valid_loss:.3f}')
  # print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
  # print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

In [None]:
a = pd.read_csv('/content/y_random.csv')
a.head()