In [10]:
import numpy as np
import matplotlib.pyplot as plt
import torch  as th
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pickle
import pandas as pd
import seaborn as sns
import random
import os
import glob
import numpy as np
import gzip as gz
import pickle
from tqdm import tqdm
from transformers import AutoModel, AutoConfig
from torch.utils.data import TensorDataset, DataLoader, random_split
from BESTRq_classes.BESTRq import BestRqFramework, RandomProjectionQuantizer
from compute_fft import compute_spectrogram, plot_spectrogram, mask_and_replace
from models.CNN_BiLSTM_Attention import ParallelModel

# GAN implementation : comparison between spectrum and time serie 
https://towardsdatascience.com/synthetic-time-series-data-a-gan-approach-869a984f2239

In [11]:
def make_net(model, n_layers, hidden_units, output_units):
    for i in range(n_layers):
        model.add_module(f'GRU_{i + 1}', nn.GRU(hidden_units, hidden_units, batch_first=True))
        model.add_module('OUT', nn.Sequential(nn.Linear(hidden_units, output_units), nn.Sigmoid()))
    return model

In [12]:
class Supervisor(nn.Module):
    def __init__(self, hidden_dim):
        super(Supervisor, self).__init__()
        self.hidden_dim = hidden_dim
        self.model = self.build()

    def build(self):
        model = nn.Sequential()
        return make_net(model,
                        n_layers=2,
                        hidden_units=self.hidden_dim,
                        output_units=self.hidden_dim)


In [54]:
class Generator(nn.Module):
    def __init__(self,hidden_dim,data_dim):
        super(Generator,self).__init__()
        self.hidden_dim=hidden_dim
        self.data_dim=data_dim
        self.gen_block=nn.Sequential(
    nn.Linear(hidden_dim, 32),
    nn.ReLU(),
    nn.Linear(32, 32),
    nn.ReLU(),
    nn.Linear(32, data_dim)
    )
    
    def forward(self,x):
        return self.gen_block(x)
    
class Discriminator(nn.Module):
    def __init__(self,data_dim):
        super(Generator,self).__init__()
        self.data_dim=data_dim
        self.dim_block=nn.Sequential(
    nn.Linear(data_dim, 32),
    nn.ReLU(),
    nn.Linear(32, 32),
    nn.ReLU(),
    nn.Linear(32, 1),
    nn.Sigmoid()
)   
    def forward(self,x):
        return self.dim_block(x)


class Recovery(nn.Module):
    def __init__(self, hidden_dim, n_seq):
        super(Recovery, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_seq = n_seq
        self.model = self.build()

    def build(self):
        layers = []
        layers.append(nn.Linear(self.hidden_dim, self.hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Linear(self.hidden_dim, self.n_seq))
        return nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

class Embedder(nn.Module):
    def __init__(self, hidden_dim):
        super(Embedder, self).__init__()
        self.hidden_dim = hidden_dim
        self.model = self.build()

    def build(self):
        layers = []
        layers.append(nn.Linear(self.hidden_dim, self.hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Linear(self.hidden_dim, self.hidden_dim))
        return nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

class Supervisor(nn.Module):
    def __init__(self, hidden_dim):
        super(Supervisor, self).__init__()
        self.hidden_dim = hidden_dim
        self.model = self.build()

    def build(self):
        layers = []
        layers.append(nn.Linear(self.hidden_dim, self.hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Linear(self.hidden_dim, self.hidden_dim))
        return nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [41]:
data_path = '/Users/martinblot/Desktop/sleep-edf-prepared/5-cassette'  ## path towards the 5-cassette file
fp = gz.open(data_path+'/SC4001E0.npz.gz','rb')
data_test = np.load(fp,allow_pickle=True)
fnames = glob.glob(os.path.join(data_path, "*npz.gz"))
devpart = 10
xtrain , xvalid = None , None
ytrain , yvalid = None , None
measurement=data_test['ch_label'][2]

for fn in tqdm(fnames):
    fp = gz.open(fn,'rb')
    data = np.load(fp,allow_pickle=False) # for now, don't care about headers
    x = data['x'][:,:,2] # EEG and EOG
    y = data['y'] # Take the labels
    idx = np.arange(x.shape[0])
    np.random.shuffle(idx)
    devlim = x.shape[0]//devpart
    devpart = 10
    idx = np.arange(x.shape[0])
    np.random.shuffle(idx)
    devlim = x.shape[0]//devpart
    if xtrain is None:
        xtrain = np.zeros((1,x.shape[1]))    ##np.zeros((1,x.shape[1],2)) if we include EOG  
        xvalid = np.zeros((1,x.shape[1]))
        ytrain , yvalid = np.zeros(1) , np.zeros(1)
    xvalid = np.concatenate((xvalid,x[idx[:devlim]]), axis=0)
    yvalid = np.concatenate((yvalid,y[idx[:devlim]]), axis=0)
    xtrain = np.concatenate((xtrain,x[idx[devlim:]]), axis=0)
    ytrain = np.concatenate((ytrain,y[idx[devlim:]]), axis=0)
    del x,y

xtrain , xvalid = xtrain[1:] , xvalid[1:]
ytrain , yvalid = ytrain[1:] , yvalid[1:]
xtrain, xvalid = th.FloatTensor(xtrain), th.FloatTensor(xvalid)
ytrain, yvalid = th.IntTensor(ytrain), th.IntTensor(yvalid)

100%|██████████| 153/153 [01:02<00:00,  2.44it/s]


In [48]:
outf="/Users/martinblot/Desktop/sleep-edf-prepared/cassette-th-data-all.pck"
fp = open(outf,"wb")
pickle.dump((xtrain , xvalid , ytrain , yvalid), fp)
filepath = '/Users/martinblot/Desktop/sleep-edf-prepared/cassette-th-data-all.pck'
xtrain,xvalid, ytrain, yvalid = np.load(filepath, allow_pickle = True)
print(xtrain.shape)

torch.Size([175995, 600])


In [49]:
batch=100
xtrain_tensor,ytrain_tensor=th.tensor(xtrain[:1000]),th.tensor(ytrain[:1000])
xvalid_tensor,yvalid_tensor=th.tensor(xvalid[:1000]),th.tensor(yvalid[:1000])

dataset_t = TensorDataset(xtrain_tensor, ytrain_tensor)
train_loader = DataLoader(dataset_t, batch_size= batch, shuffle=True)
dataset_v = TensorDataset(xvalid_tensor, yvalid_tensor)
valid_loader = DataLoader(dataset_v, batch_size= batch, shuffle=True)

  xtrain_tensor,ytrain_tensor=th.tensor(xtrain[:1000]),th.tensor(ytrain[:1000])
  xvalid_tensor,yvalid_tensor=th.tensor(xvalid[:1000]),th.tensor(yvalid[:1000])


In [50]:
# Initialisation du GAN
generator = Generator(hidden_dim=128)
discriminator = Discriminator(hidden_dim=128)
supervisor = Supervisor(hidden_dim=128)


In [63]:
learning_rate=0.0002
hidden_dim = 64
latent_dim = 100
num_epochs = 10

# Instanciation des modèles
generator = Generator(hidden_dim)
discriminator = Discriminator(hidden_dim)
recovery = Recovery(hidden_dim, n_seq=100)
embedder = Embedder(hidden_dim)
supervisor = Supervisor(hidden_dim)

# Définition des optimiseurs
gen_optimizer = optim.Adam(generator.parameters(), lr=learning_rate)
dis_optimizer = optim.Adam(discriminator.parameters(), lr=learning_rate)
supervisor_optimizer = optim.Adam(supervisor.parameters(), lr=learning_rate)

# Définition des fonctions de perte
criterion = nn.BCELoss()
supervisor_criterion = nn.MSELoss()


 # Boucle d'apprentissage
for epoch in range(num_epochs):
    # Entraînement
    generator.train()
    discriminator.train()
    supervisor.train()
    train_loss = 0.0  # Initialisation de la perte d'entraînement
    correct_predictions = 0
    total_samples = 0
    for real_data, labels in train_loader:
        print(real_data.shape)
        # Entraînement du discriminateur
        discriminator.zero_grad()
        real_decision = discriminator(real_data)
        real_loss = criterion(real_decision, th.ones_like(real_decision))
        
        latent_vector = th.randn(batch, latent_dim)
        fake_data = generator(latent_vector)
        fake_decision = discriminator(fake_data.detach())
        fake_loss = criterion(fake_decision, th.zeros_like(fake_decision))
        
        d_loss = real_loss + fake_loss
        d_loss.backward()
        dis_optimizer.step()

        # Entraînement du superviseur
        supervisor.zero_grad()
        supervised_fake_data = supervisor(fake_data)
        supervised_real_data = supervisor(real_data)
        supervisor_loss = supervisor_criterion(supervised_fake_data, supervised_real_data.detach())
        supervisor_loss.backward()
        supervisor_optimizer.step()

        # Entraînement du générateur
        generator.zero_grad()
        latent_vector = th.randn(batch, latent_dim)
        fake_data = generator(latent_vector)
        fake_decision = discriminator(fake_data)
        g_loss = criterion(fake_decision, th.ones_like(fake_decision))
        g_loss.backward()
        gen_optimizer.step()

        # Calcul de la perte d'entraînement et des prédictions correctes
        train_loss += d_loss.item() + g_loss.item() + supervisor_loss.item()
        _, predicted_labels = th.max(fake_decision, 1)
        correct_predictions += (predicted_labels == labels).sum().item()
        total_samples += labels.size(0)

    # Normalisation de la perte d'entraînement par le nombre de lots
    train_loss /= len(train_loader.dataset)
    accuracy = correct_predictions / total_samples

    # Validation, calcul de la perte moyenne et de l'exactitude
    generator.eval()
    discriminator.eval()
    supervisor.eval()
    valid_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    with th.no_grad():
        for real_data, labels in valid_loader:
            real_decision = discriminator(real_data)
            real_loss = criterion(real_decision, th.ones_like(real_decision))
            
            latent_vector = th.randn(batch, latent_dim)
            fake_data = generator(latent_vector)
            fake_decision = discriminator(fake_data.detach())
            fake_loss = criterion(fake_decision, th.zeros_like(fake_decision))
            
            valid_loss += real_loss.item() + fake_loss.item()

            # Calcul des prédictions correctes
            _, predicted_labels = th.max(fake_decision, 1)
            correct_predictions += (predicted_labels == labels).sum().item()
            total_samples += labels.size(0)

    valid_loss /= len(valid_loader.dataset)
    accuracy = correct_predictions / total_samples

    # Affichage de la progression, enregistrement des modèles, etc.
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss}, Train Accuracy: {accuracy}, Valid Loss: {valid_loss}, Valid Accuracy: {accuracy}')



torch.Size([100, 600])


RuntimeError: input.size(-1) must be equal to input_size. Expected 64, got 600