# Training a CNN for Motor Vibration Spectrograms

This notebook trains a Convolutional Neural Network (CNN) using spectrogram images of motor vibrations. The dataset is divided into training, validation, and test sets. The trained model is saved with a timestamp and performance-based naming convention.

## Import Required Libraries

We will import the necessary libraries for data loading, preprocessing, and building the CNN model.

In [7]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

## Load and Preprocess Data

We will load the spectrogram images from the `data/05_cnn_input` directory, preprocess them, and split them into training, validation, and test sets.

In [None]:
data_dir = "./../../data/05_cnn_input"

print("Contenu du dossier 05_cnn_input :", os.listdir(data_dir))

image_paths = []
labels = []

for label, category in enumerate(['balourd', 'sain']):
    category_dir = os.path.join(data_dir, category)
    if not os.path.exists(category_dir):
        print(f"Directory does not exist: {category_dir}")
        continue
    print(f"Checking directory: {category_dir}")
    for root, _, files in os.walk(category_dir):
        print(f"Found {len(files)} files in {root}")
        for file in files:
            if file.endswith(".png"):
                image_paths.append(os.path.join(root, file))
                labels.append(label)

print(f"Total directories checked: {len(['balourd', 'sain'])}")
print(f"Total image paths collected: {len(image_paths)}")
print("Sample image paths:", image_paths[:5])
print("Labels distribution:", {label: labels.count(label) for label in set(labels)})

image_paths = np.array(image_paths)
labels = np.array(labels)

# Removed redundant checks for already normalized data

Contenu du dossier 05_cnn_input : ['balourd', 'sain', 'test', 'train', 'val']
Checking directory: ./../../data/05_cnn_input/balourd
Found 246 files in ./../../data/05_cnn_input/balourd
Checking directory: ./../../data/05_cnn_input/sain
Found 551 files in ./../../data/05_cnn_input/sain
Total directories checked: 2
Total image paths collected: 797
Sample image paths: ['./../../data/05_cnn_input/balourd/spec_rgb_0551.png', './../../data/05_cnn_input/balourd/spec_rgb_0552.png', './../../data/05_cnn_input/balourd/spec_rgb_0553.png', './../../data/05_cnn_input/balourd/spec_rgb_0554.png', './../../data/05_cnn_input/balourd/spec_rgb_0555.png']
Labels distribution: {0: 246, 1: 551}


In [None]:
from tensorflow.keras.utils import img_to_array, load_img

# Prétraitement des images : charger les images telles quelles (sans redimensionnement ni rognage)
def preprocess_image(image_path):
    try:
        img = load_img(image_path)  # Charger l'image sans redimensionnement
        img_array = img_to_array(img)
        return img_array
    except Exception as e:
        print(f"Erreur lors du traitement de l'image {image_path} : {e}")
        return None

# Débogage : Afficher les informations sur le jeu de données
print(f"Nombre total de chemins d'images trouvés : {len(image_paths)}")
print("Exemples de chemins d'images :", image_paths[:5])
print("Répartition des étiquettes :", {label: labels.tolist().count(label) for label in set(labels)})

# Appliquer le prétraitement à toutes les images
images = np.array([img for img in (preprocess_image(path) for path in image_paths) if img is not None])

# Débogage : Vérifier les images traitées
print(f"Nombre total d'images valides traitées : {len(images)}")
if len(images) > 0:
    print("Dimensions de la première image :", images[0].shape)

# Diviser le jeu de données en ensembles d'entraînement, de validation et de test
if len(images) == 0:
    raise ValueError("Aucune image valide n'a été traitée. Veuillez vérifier le jeu de données.")

X_train, X_temp, y_train, y_temp = train_test_split(images, labels, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Débogage : Afficher les répartitions des ensembles de données
print(f"Ensemble d'entraînement : {len(X_train)} échantillons")
print(f"Ensemble de validation : {len(X_val)} échantillons")
print(f"Ensemble de test : {len(X_test)} échantillons")

Total image paths found: 797
Sample image paths: ['./../../data/05_cnn_input/balourd/spec_rgb_0551.png'
 './../../data/05_cnn_input/balourd/spec_rgb_0552.png'
 './../../data/05_cnn_input/balourd/spec_rgb_0553.png'
 './../../data/05_cnn_input/balourd/spec_rgb_0554.png'
 './../../data/05_cnn_input/balourd/spec_rgb_0555.png']
Labels distribution: {0: 246, 1: 551}
Total valid images processed: 797
Shape of first image: (128, 128, 3)
Training set: 478 samples
Validation set: 159 samples
Test set: 160 samples


## Define the CNN Model

We will define a Convolutional Neural Network (CNN) architecture to process the spectrogram images.

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(129, 101, 3)),  # Updated input shape
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Display the model summary
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 63, 63, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_4 (Conv2D)           (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 30, 30, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 28, 28, 128)       73856     
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 14, 14, 128)      

## Train the CNN Model

We will compile the model, train it using the training and validation datasets, and save the trained model with a performance-based naming convention.

In [5]:
from datetime import datetime

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=32
)

# Generate a timestamped filename based on performance
def generate_model_name(history, test_accuracy):
    val_accuracy = max(history.history['val_accuracy']) * 100
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    return f"{timestamp}_validation_res_{val_accuracy:.2f}_test_set_{test_accuracy:.2f}.h5"

# Save the model
model.save(generate_model_name(history, 0))

Epoch 1/10


2025-11-25 21:46:47.420779: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8906
2025-11-25 21:46:49.146754: I external/local_xla/xla/service/service.cc:168] XLA service 0x7dd4b52f6080 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-11-25 21:46:49.146795: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
2025-11-25 21:46:49.151746: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1764107209.217541    3518 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


## Evaluate the CNN Model

We will evaluate the trained model on the test dataset and calculate the test accuracy.

In [6]:
# Evaluate the model on the test dataset
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Save the model with the updated test accuracy
model.save(generate_model_name(history, test_accuracy * 100))

Test Accuracy: 96.25%
