In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch  as th
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pickle
import pandas as pd
import seaborn as sns
import random
import os
import glob
import numpy as np
import gzip as gz
import pickle
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader, random_split
from BESTRq_classes.BESTRq import BestRqFramework, RandomProjectionQuantizer
from compute_fft import compute_spectrogram, plot_spectrogram, mask_and_replace
from models.CNN_BiLSTM_Attention import ParallelModel

# GAN implementation : comparison between spectrum and time serie 
https://towardsdatascience.com/synthetic-time-series-data-a-gan-approach-869a984f2239

In [3]:
data_path = '/Users/martinblot/Desktop/sleep-edf-prepared/5-cassette'  ## path towards the 5-cassette file
fp = gz.open(data_path+'/SC4001E0.npz.gz','rb')
data_test = np.load(fp,allow_pickle=True)
fnames = glob.glob(os.path.join(data_path, "*npz.gz"))
devpart = 10
xtrain , xvalid = None , None
ytrain , yvalid = None , None
measurement=data_test['ch_label'][2]

for fn in tqdm(fnames):
    fp = gz.open(fn,'rb')
    data = np.load(fp,allow_pickle=False) # for now, don't care about headers
    x = data['x'][:,:,2] # EEG and EOG
    y = data['y'] # Take the labels
    idx = np.arange(x.shape[0])
    np.random.shuffle(idx)
    devlim = x.shape[0]//devpart
    devpart = 10
    idx = np.arange(x.shape[0])
    np.random.shuffle(idx)
    devlim = x.shape[0]//devpart
    if xtrain is None:
        xtrain = np.zeros((1,x.shape[1]))    ##np.zeros((1,x.shape[1],2)) if we include EOG  
        xvalid = np.zeros((1,x.shape[1]))
        ytrain , yvalid = np.zeros(1) , np.zeros(1)
    xvalid = np.concatenate((xvalid,x[idx[:devlim]]), axis=0)
    yvalid = np.concatenate((yvalid,y[idx[:devlim]]), axis=0)
    xtrain = np.concatenate((xtrain,x[idx[devlim:]]), axis=0)
    ytrain = np.concatenate((ytrain,y[idx[devlim:]]), axis=0)
    del x,y

xtrain , xvalid = xtrain[1:] , xvalid[1:]
ytrain , yvalid = ytrain[1:] , yvalid[1:]
xtrain, xvalid = th.FloatTensor(xtrain), th.FloatTensor(xvalid)
ytrain, yvalid = th.IntTensor(ytrain), th.IntTensor(yvalid)

100%|██████████| 153/153 [00:59<00:00,  2.57it/s]


In [7]:
outf="/Users/martinblot/Desktop/sleep-edf-prepared/cassette-th-data-all.pck"
fp = open(outf,"wb")
pickle.dump((xtrain , xvalid , ytrain , yvalid), fp)
filepath = '/Users/martinblot/Desktop/sleep-edf-prepared/cassette-th-data-all.pck'
xtrain,xvalid, ytrain, yvalid = np.load(filepath, allow_pickle = True)
print(xtrain.shape)

torch.Size([175995, 600])


In [8]:
batch=100
xtrain_tensor,ytrain_tensor=th.tensor(xtrain[:1000]),th.tensor(ytrain[:1000])
xvalid_tensor,yvalid_tensor=th.tensor(xvalid[:1000]),th.tensor(yvalid[:1000])

dataset_t = TensorDataset(xtrain_tensor, ytrain_tensor)
train_loader = DataLoader(dataset_t, batch_size= batch, shuffle=True)
dataset_v = TensorDataset(xvalid_tensor, yvalid_tensor)
valid_loader = DataLoader(dataset_v, batch_size= batch, shuffle=True)

  xtrain_tensor,ytrain_tensor=th.tensor(xtrain[:1000]),th.tensor(ytrain[:1000])
  xvalid_tensor,yvalid_tensor=th.tensor(xvalid[:1000]),th.tensor(yvalid[:1000])


In [130]:
class Generator(nn.Module):
    def __init__(self, hidden_dim, data_dim,n_layers):
        super(Generator, self).__init__()
        self.hidden_dim = hidden_dim
        self.data_dim = data_dim
        self.n_layers=n_layers
        # Couches du générateur
        self.GRU=nn.GRU(data_dim,hidden_dim,batch_first=True,num_layers=n_layers)
        self.gen_block=nn.Linear(hidden_dim,data_dim)
        self.activation=nn.Sigmoid()
    def forward(self, x):
        x=self.GRU(x)
        x = x[0]
        x=self.gen_block(x)
        x=self.activation(x)   ##classification binaire utile au GAN
        return x
    

class Discriminator(nn.Module):
    def __init__(self,hidden_dim,data_dim,n_layers):
        super(Discriminator,self).__init__()
        self.hidden_dim = hidden_dim
        self.data_dim = data_dim
        self.n_layers=n_layers
        # Couches du générateur
        self.GRU=nn.GRU(data_dim,hidden_dim,batch_first=True,num_layers=n_layers)
        self.gen_block=nn.Linear(hidden_dim,1)
        self.activation=nn.Sigmoid()
    def forward(self, x):
        x=self.GRU(x)
        x = x[0]
        x=self.gen_block(x)
        x=self.activation(x)
        return x

In [131]:
def generator_loss(fake_output, real_batch_labels, criterion):
  loss = criterion(fake_output, real_batch_labels)
  return loss

def discriminator_loss(real_output, fake_output, real_labels, fake_labels, criterion):
    real_loss = criterion(real_output, real_labels)
    fake_loss = criterion(fake_output, fake_labels)
    return real_loss + fake_loss

https://github.com/ydataai/ydata-synthetic/blob/dev/src/ydata_synthetic/synthesizers/timeseries/timegan/model.py

In [132]:
def train_GAN(discriminator, generator, batch, trainloader, hidden_dim, dis_loss, gen_loss, input_dim=600, lr = 0.0002, nepoch = 10):
  real_batch_labels = th.ones(hidden_dim, 1)
  fake_batch_labels = th.zeros(hidden_dim, 1)
  optim_generator = optim.Adam(generator.parameters(), lr = lr)
  optim_discriminator = optim.Adam(discriminator.parameters(), lr = lr)
  g_losses, d_losses = [], []

  for epoch in tqdm(range(nepoch)):
      running_d_loss = 0
      running_g_loss = 0

      for inputs,_ in trainloader:
          z=th.randn((input_dim,hidden_dim))
          # Train discriminator
          outputs = discriminator(inputs)
          h = generator(z)
          fake_outputs = discriminator(h)
          print(h.shape,fake_outputs.shape)

          optim_discriminator.zero_grad()
          d_loss = discriminator_loss(outputs,fake_outputs,real_batch_labels, fake_batch_labels, dis_loss)
          d_loss.backward()
          running_d_loss += d_loss.item()
          optim_discriminator.step()

          # Train generator
          optim_generator.zero_grad()
          fake_outputs = discriminator(h)
          g_loss = generator_loss(fake_outputs,real_batch_labels,gen_loss)
          g_loss.backward()
          running_g_loss += g_loss.item()
          optim_generator.step()

      g_losses.append(running_g_loss / len(trainloader))
      d_losses.append(running_d_loss / len(trainloader))

      print(f"Epoch {epoch + 1}/{nepoch} -> "
            f"Generator Loss: {running_g_loss :.4f}, "
            f"Discriminator Loss: {running_d_loss:.4f}")


  return g_losses, d_losses

In [133]:
learning_rate=0.0001
hidden_dim = 128
num_epochs = 10
input_dim=600
n_layers=5

# Instanciation des modèles
generator = Generator(hidden_dim,input_dim,n_layers)
discriminator = Discriminator(hidden_dim,input_dim,n_layers)

bceloss=nn.BCELoss()
logloss=nn.NLLLoss()

g_losses, d_losses=train_GAN(discriminator, generator, batch, train_loader, hidden_dim, bceloss, logloss, lr = 0.0002, nepoch = 10)

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]


RuntimeError: input.size(-1) must be equal to input_size. Expected 600, got 128

In [None]:

    # Plot loss curves
plt.figure(figsize=(10, 5))
plt.plot(gen_losses, label='Generator Loss')
plt.plot(dis_losses, label='Discriminator Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Evolution de la perte pendant l\'entraînement')
plt.legend()
plt.show()