In [2]:
import os
import warnings
warnings.filterwarnings(action='ignore')

from __future__ import print_function
from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
from IPython.display import display, HTML

from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn import preprocessing

# PyTorch Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, Dataset, DataLoader


In [3]:

import numpy as np
import pandas as pd
import h5py
import torch
from torch.utils.data import Dataset
import math

def load_dataset(filename):
    with h5py.File(filename, 'r') as f:
        if 'data' in f:
            data = f['data']
            if isinstance(data, h5py.Dataset):
                print("Type du dataset 'data':", type(data))
                print("Forme du dataset 'data':", data.shape)
                data_matrix = data[:]  
                print("Matrice de données:", data_matrix)
                return data_matrix
            elif isinstance(data, h5py.Group):
                data_list = []
                trace_names = []
                for key in data.keys():
                    dataset = data[key]
                    if isinstance(dataset, h5py.Dataset):
                        # Vérifier la forme des données extraites
                        if dataset.ndim == 2:
                            data_array = np.zeros((1, 3, dataset.shape[1]), dtype=np.float32)  # Créer une matrice vide de quatre dimensions
                            for i in range(3):  # Boucle à travers les canaux
                                data_array[:, i, :] = dataset[i, :].reshape(1,  dataset.shape[1])  # Réorganiser les données dans le bon ordre
                            data_list.append(data_array)
                            #trace_names.append(key)
                            # Concaténer les noms de clés avec un tableau de zéros
                            trace_names = np.concatenate([np.array([key] ).reshape(-1, 1) for key in data.keys()])
                            # Remodeler en un vecteur colonne
                            trace_names=trace_names.reshape(-1, 1)                          
                            #trace_names['trace_names']=trace_names
                        
                        else:
                            print(f"Les données pour la clé '{key}' ne sont pas sous forme de tableau 2D.")
                if data_list:
                    stacked_data_matrix = np.vstack(data_list)  # Empiler les matrices de données dans une seule matrice
                    return stacked_data_matrix, trace_names
                    
# Utilisation de la foncti, our charger et convertir le dataset à partir du fichier HDF5
filename1 = 'Instance_noise_1k.hdf5'
noise_matrix, trace_names_noise = load_dataset(filename1)

filename2 = 'Instance_events_counts_10k.hdf5'
events_matrix, trace_names_events = load_dataset(filename2)

print (events_matrix.shape)

(10000, 3, 12000)


In [4]:
# Création d'une colonne 'source_type' avec des valeurs constantes de 1
source_type_n = np.zeros((len(trace_names_noise), 1), dtype=int)

# Création d'une colonne 'source_type' avec des valeurs constantes de 1
source_type_e = np.ones((len(trace_names_events), 1), dtype=int)

# Concaténation de la colonne 'source_type' avec trace_names_noise
trace_names_n = np.concatenate((trace_names_noise, source_type_n), axis=1)
trace_names_e = np.concatenate((trace_names_events, source_type_e), axis=1)


trace_names_e = np.concatenate((trace_names_events[:len(trace_names_noise)], source_type_e[:len(trace_names_noise)]), axis=1)
#Prendre autant d'echantillon evant que de noise
events_matrix= events_matrix[:len(noise_matrix),:,:]
print(events_matrix.shape)
print(noise_matrix.shape)

# Vérification de la forme de la matrice résultante
print(trace_names_e)

(1000, 3, 12000)
(1000, 3, 12000)
[['11030611.IV.OFFI..HH' '1']
 ['11030611.IV.PIEI..HH' '1']
 ['11030611.IV.PIEI..HN' '1']
 ...
 ['11052061.IV.TERO..HH' '1']
 ['11052061.IV.VCEL..EH' '1']
 ['11052061.XO.AM05..EH' '1']]


In [5]:
# Préparation des données d'entrainement, validation et de test
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset
import torch 
from torch.utils.data import DataLoader


# Diviser les données de bruit en ensemble d'entraînement et ensemble de test
df_noise_train1, df_noise_test = train_test_split(noise_matrix, test_size=0.20, random_state=42)
df_noise_train, df_noise_validation = train_test_split(df_noise_train1, test_size=0.20, random_state=42)

trace_noise1, trace_noise_test =train_test_split(trace_names_n, test_size=0.20, random_state=42)
trace_noise_train, trace_noise_val = train_test_split(trace_noise1, test_size=0.20, random_state=42)

trace_events1, trace_events_test =train_test_split(trace_names_e, test_size=0.20, random_state=42)
trace_events_train, trace_events_val = train_test_split(trace_events1, test_size=0.20, random_state=42)


df_train1, df_test = train_test_split(events_matrix, test_size=0.20, random_state=42)
df_train, df_validation = train_test_split(df_train1, test_size=0.20, random_state=42)


# Concaténer df_noise_train avec df_train
df_train_concat = np.concatenate([df_train, df_noise_train], axis=0)
trace_train = np.concatenate([trace_events_train, trace_noise_train], axis=0)
trace_train = trace_train[:,1].astype(int)

# Concaténer df_noise_test avec df_test
df_test_concat = np.concatenate([df_test, df_noise_test], axis=0)
trace_test= np.concatenate([trace_events_test, trace_noise_test], axis=0)
# Convertir les étiquettes en entiers
trace_test = trace_test[:,1].astype(int)

#print (df_test_concat)
# Concaténer df_noise_validation avec df_validation
df_val_concat = np.concatenate([df_validation, df_noise_validation], axis=0)
trace_val= np.concatenate([trace_events_val, trace_noise_val], axis=0)
trace_val = trace_val[:,1].astype(int)
"""
# Normaliser toute la donnée
mean_t = np.mean(df_train_concat)
std_t = np.std(df_train_concat)
norm_data_t = (df_train_concat - mean_t) / std_t

#print(norm_data_t)

mean_v = np.mean(df_val_concat)
std_v = np.std(df_val_concat)
norm_data_v = (df_val_concat - mean_v) / std_v

mean_ts = np.mean(df_test_concat)
std_ts = np.std(df_test_concat)
norm_data_ts = (df_test_concat - mean_ts) / std_ts
"""
# Normaliser toute la donnée
def normalize_data(data):
    mean = np.mean(data, axis=(1, 2), keepdims=True)  # Calculer la moyenne de chaque échantillon
    std = np.std(data, axis=(1, 2), keepdims=True)    # Calculer l'écart-type de chaque échantillon
    norm_data = (data - mean) / std                  # Normaliser chaque échantillon
    return norm_data

# Appliquer la normalisation sur les données d'entraînement, de validation et de test
norm_data_train = normalize_data(df_train_concat)
norm_data_val = normalize_data(df_val_concat)
norm_data_test = normalize_data(df_test_concat)


"""
# Appliquer l'aplatissement sur les données normalisées
fla_data_train = norm_data_train.view(norm_data_train.size(0), -1)
fla_data_val = norm_data_val.view(norm_data_val.size(0), -1)
fla_data_test = norm_data_test.view(norm_data_test.size(0), -1)
"""
norm_data_t = norm_data_train 
norm_data_v = norm_data_val 
norm_data_ts = norm_data_test


#print(trace_train.shape)

#print(norm_data_t.shape)

# Convert your numpy arrays to PyTorch tensors
x_train_tensor = torch.tensor(norm_data_t, dtype=torch.float32)
y_train_tensor = torch.tensor(trace_train, dtype=torch.long)  # long for CrossEntropyLoss

x_test_tensor = torch.tensor(norm_data_ts, dtype=torch.float32)
y_test_tensor = torch.tensor(trace_test, dtype=torch.long)

x_val_tensor = torch.tensor(norm_data_v, dtype=torch.float32)
y_val_tensor = torch.tensor(trace_val, dtype=torch.long)

"""
####################
# Appliquer l'aplatissement sur les données normalisées
x_train_tensor= x_train_tensor.view(x_train_tensor.size(0), -1)
x_val_tensor = x_val_tensor .view(x_val_tensor .size(0), -1)
x_test_tensor = x_test_tensor.view(x_test_tensor.size(0), -1)
#########################
"""
y_train_tensor = y_train_tensor.unsqueeze(1)
y_val_tensor = y_val_tensor.unsqueeze(1)
y_test_tensor = y_test_tensor.unsqueeze(1)

print(x_val_tensor)
#print( y_val_tensor)
# Create TensorDatasets
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
val_dataset = TensorDataset(x_val_tensor, y_val_tensor)


# Créer les DataLoaders pour l'entraînement, la validation et le test
batch_size =64  # Vous pouvez changer cette valeur selon vos besoins


val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)





tensor([[[ 3.7057e-03,  3.7057e-03,  3.7057e-03,  ...,  3.7057e-03,
           8.6438e-01, -8.5697e-01],
         [ 3.7057e-03,  3.7057e-03,  3.7057e-03,  ...,  3.7057e-03,
           3.7057e-03, -8.5697e-01],
         [ 3.7057e-03,  3.7057e-03,  3.7057e-03,  ...,  3.7057e-03,
           3.7057e-03, -8.5697e-01]],

        [[ 2.2457e-02, -4.1898e-01, -2.8410e-01,  ...,  1.5938e-01,
           4.2894e-02,  1.7778e-01],
         [ 6.1287e-02,  3.8624e-01,  8.5811e-02,  ..., -2.6979e-01,
          -2.7592e-01, -4.7825e-01],
         [-1.2265e-01, -2.4548e-02, -6.1335e-02,  ..., -2.1666e-01,
          -1.6965e-01, -1.0017e-01]],

        [[ 1.1620e+00,  1.1403e+00,  1.0969e+00,  ...,  9.2311e-01,
           1.1186e+00,  1.2163e+00],
         [ 2.0635e+00,  2.0635e+00,  2.1612e+00,  ...,  2.5413e+00,
           2.3133e+00,  1.9223e+00],
         [-7.8201e-01, -8.6889e-01, -9.0147e-01,  ..., -2.0093e+00,
          -1.9332e+00, -1.9984e+00]],

        ...,

        [[ 1.1839e+00, -3.1468e+00,

In [7]:
#Configuration 1 :
import numpy as np
import random
# PyTorch Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


class cnnModel(nn.Module):
    def __init__(self, time_periods, n_sensors, n_classes):
        super(cnnModel, self).__init__()
        self.time_periods = time_periods
        self.n_sensors = n_sensors
        self.n_classes = n_classes

        # Convolutional layers
        self.conv1 = nn.Conv1d(n_sensors, 100, kernel_size=5)
        self.conv2 = nn.Conv1d(100, 160, kernel_size=5)
        
        # Pooling and dropout
        self.pool = nn.MaxPool1d(kernel_size=3)
        self.adaptive_pool = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(p=0.2)
        
        # Fully connected layer
        self.fc = nn.Linear(160, n_classes)

        # Initialiser les poids avec Xavier initialization
        self.init_weights()
    
    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv1d) or isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = x.view(-1, self.n_sensors, self.time_periods)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = self.adaptive_pool(x)
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return torch.sigmoid(x)
        
        
  

In [8]:
def show_confusion_matrix(validations, predictions, title=None):
    matrix = metrics.confusion_matrix(validations, predictions)

    plt.figure(figsize=(6, 4))
    sns.heatmap(matrix,
                cmap='coolwarm',
                linecolor='white',
                linewidths=1,
                xticklabels=LABELS,
                yticklabels=LABELS,
                annot=True,
                fmt='d')
    if title: plt.title(title)
    else: plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

In [9]:
def plot_performance(train_losses, val_losses, train_accs, val_accs, params=None, save_filename=None ):
    epochs = len(train_losses)  # Obtenez le nombre d'époques réellement effectuées
    #plt.title('Loss and Accuracy over epochs\n' + f'Params: {params}')

    #plt.figure(figsize=(15, 5))

    # Plot losses
    plt.subplot(1, 2, 1)
    plt.plot(range(1, epochs + 1), train_losses, label='Train Loss', color='red')
    plt.plot(range(1, epochs + 1), val_losses, label='Validation Loss', color='blue')

   # plt.title('Loss over epochs\n' + f'Params: {params}')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Plot accuracies
    plt.subplot(1, 2, 2)
    plt.plot(range(1, epochs + 1), train_accs, label='Train Accuracy', color='red')
    plt.plot(range(1, epochs + 1), val_accs, label='Validation Accuracy', color='blue')

   # plt.title('Accuracy over epochs\n' + f'Params: {params}')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Sauvegarder la figure dans un fichier image
    if save_filename:
        param_str = '_'.join([f'{key}_{value}' if not isinstance(value, types.FunctionType) else f'{key}_{value.__name__}' for key, value in params.items()])
        filename = f'{save_filename}_{param_str}.png'
        plt.savefig(filename)
    
    plt.tight_layout()
    plt.show()
    # Sauvegarder la figure dans un fichier image
   

In [12]:
import torch.optim as optim
import torch.optim as optim
from torch.optim import AdamW
from torch.optim import Adam
#Check if CUDA is available (for GPU usage)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


EPOCHS =10 # Vous pouvez ajuster le nombre d'époques selon vos besoins
n_sensors = 3
n_classes = 2
TIME_PERIODS =12000

#input_shape = (batch_size, 3, 12000)

#criterion = nn.CrossEntropyLoss()
criterion = nn.BCEWithLogitsLoss()

# Initialize lists to store losses and accuracies
train_losses = []
val_losses = []
train_accs = []
val_accs = []

def train_cnn(model_cnn, device, num_epochs, my_optimizer):
    # Initialize lists to store losses and accuracies
    for epoch in range(num_epochs):
        # Training mode
        model_cnn.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        
        for inputs, labels in train_loader:
            #print ( len(labels))
            #print(inputs.shape)
            
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Conversion des étiquettes en tenseurs PyTorch et déclaration de l'exigence de gradients
            labels = torch.tensor(labels, dtype=torch.float, device=device, requires_grad=True)
           
            #print(labels)
            my_optimizer.zero_grad()
            outputs = model_cnn(inputs)
            
            #print(outputs.shape)

            # Calcul des classes prédites
            predicted_class = torch.argmax(outputs, dim=1)
            # Réduction de dimension des classes prédites à (64, 1)
            predicted_class = predicted_class.view(-1, 1)
            
            predicted_cl = predicted_class.float()
            #print(predicted_cl)
           
            # Calculer la perte
            loss = criterion(predicted_cl, labels)
            loss.backward()
            my_optimizer.step()
            lr_scheduler.step()
            running_loss += loss.item()
        
            correct_train += (predicted_cl == labels).sum().item()
            #print(correct_train)
            total_train += labels.size(0)
            #print (total_train)
            #print (train_loader)
       
        # Update the learning rate with the scheduler
     
        epoch_train_loss = running_loss / len(train_loader)
        epoch_train_acc = correct_train / total_train
        
        train_losses.append(epoch_train_loss)
        train_accs.append(epoch_train_acc)

        print(f"Epoch [{epoch + 1}/{num_epochs}], Train Loss: {epoch_train_loss:.4f}, Train Accuracy: {epoch_train_acc:.4f}")



        # Validation mode
        model_cnn.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs_val = model_cnn(inputs)
                #print(outputs)
                
                # Assurez-vous que les étiquettes (labels) sont également des tenseurs de type float
                labels = labels.float()
                predicted_class_val = torch.argmax(outputs_val, dim=1)
                
                # Réduction de dimension des classes prédites à (64, 1)
                predicted_class_val = predicted_class_val.view(-1, 1)
            
                predicted_cl_val = predicted_class_val.float()
                #print(predicted_cl_val)

                # Calculer la perte
                loss = criterion(predicted_cl_val, labels)
                val_loss += loss.item()
               
                # Calculer l'exactitude
              
                correct_val += (predicted_cl_val == labels).sum().item()
                total_val += labels.size(0)

            epoch_val_loss = val_loss / len(val_loader)
            epoch_val_acc = correct_val / total_val
        
            val_losses.append(epoch_val_loss)
            val_accs.append(epoch_val_acc)

            print(f"Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {epoch_val_loss:.4f}, Validation Accuracy: {epoch_val_acc:.4f}")

    return train_losses, val_losses, train_accs, val_accs
 
def test_cnn(model_cnn, device):
    model_cnn.eval()
    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs_test = model_cnn(inputs)
            predicted_class_test = torch.argmax(outputs_test, dim=1)
                
            # Réduction de dimension des classes prédites à (64, 1)
            predicted_class_test = predicted_class_test.view(-1, 1)
            
            predicted_cl_test = predicted_class_test.float()
                       
            # Assurez-vous que les étiquettes (labels) sont également des tenseurs de type float
            labels = labels.float()
            #print(predicated)   
            # Calculer la perte en utilisant les prédictions binaires et les étiquettes
            loss = criterion(predicted_cl_test, labels)
            test_loss += loss.item()
            # Calculer l'exactitude
            correct_test += (predicted_cl_test== labels).sum().item()
            total_test += labels.size(0)

        # Calculer la perte moyenne et l'exactitude
        test_loss /= len(test_loader)
        test_acc = correct_test / total_test

        print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")
        LABELS= [0, 1]
        show_confusion_matrix(labels, predicted)
        
    return test_loss, test_acc
    
# Assuming TIME_PERIODS, n_sensors, and n_classes are defined
model_cnn = cnnModel(TIME_PERIODS, n_sensors, n_classes)
# Initialiser les poids avec Xavier initialization
model_cnn.init_weights()
# Move the model to the device (CPU or GPU)
model_cnn.to(device)
# Print model summary
print(model_cnn)

train_losses = []
val_losses = []
train_accs = []
val_accs = []
test_accs = []
test_losses = []



# Choose your optimizer
#my_optimizer = torch.optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.9)

my_optimizer = torch.optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.6)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(my_optimizer, T_max=100, eta_min=0)

#lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
momentum_scheduler = torch.optim.lr_scheduler.ExponentialLR(my_optimizer, gamma=0.9)
print('Training the model...')
train_losses, val_losses, train_accs, val_accs = train_cnn(model_cnn, device, EPOCHS, my_optimizer)
plot_performance(train_losses, val_losses, train_accs, val_accs)



cnnModel(
  (conv1): Conv1d(3, 100, kernel_size=(5,), stride=(1,))
  (conv2): Conv1d(100, 160, kernel_size=(5,), stride=(1,))
  (pool): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  (adaptive_pool): AdaptiveAvgPool1d(output_size=1)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=160, out_features=2, bias=True)
)
Training the model...
Epoch [1/10], Train Loss: 0.7877, Train Accuracy: 0.5055
Epoch [1/10], Validation Loss: 0.8079, Validation Accuracy: 0.5062
Epoch [2/10], Train Loss: 0.7782, Train Accuracy: 0.5266
Epoch [2/10], Validation Loss: 0.8079, Validation Accuracy: 0.5062
Epoch [3/10], Train Loss: 0.7806, Train Accuracy: 0.5172
Epoch [3/10], Validation Loss: 0.8079, Validation Accuracy: 0.5062
Epoch [4/10], Train Loss: 0.7882, Train Accuracy: 0.5023
Epoch [4/10], Validation Loss: 0.8079, Validation Accuracy: 0.5062
Epoch [5/10], Train Loss: 0.7776, Train Accuracy: 0.5227
Epoch [5/10], Validation Loss: 0.8079, Validation Accuracy

In [None]:
######################################################