In [3]:
from pathlib import Path
import numpy as np
import pandas as pd

# chemins de base
PROJECT_ROOT = Path("..").resolve().parent

print("PROJECT_ROOT =", PROJECT_ROOT)

DATA_MERGED = PROJECT_ROOT / "data" / "02_merged"
DATA_OUT = PROJECT_ROOT / "data" / "03_windowed"

DATA_OUT.mkdir(parents=True, exist_ok=True)

print("DATA_MERGED =", DATA_MERGED)
print("DATA_OUT    =", DATA_OUT)

# colonnes qu'on veut utiliser comme features
FEATURE_COLS = [
    "ax_vibration_ms2",
    "ay_vibration_ms2",
    "az_vibration_ms2",
    "gx_vibration_gx",
    "gy_vibration_gy",
    "gz_vibration_gz",
]


def load_merged_csv(path: Path) -> pd.DataFrame:
    """Charge un CSV fusionné, trie par date et crée une colonne temps en secondes"""
    print("Lecture du fichier :", path)
    df = pd.read_csv(path)

    # la colonne temps s'appelle "date" dans tes fichiers
    df["date"] = pd.to_datetime(df["date"])

    # on trie par date
    df = df.sort_values("date").reset_index(drop=True)

    # on crée une colonne temps relative en secondes depuis le début de la mesure
    t0 = df["date"].iloc[0]
    df["time_s"] = (df["date"] - t0).dt.total_seconds()

    return df


def infer_dt(df: pd.DataFrame) -> float:
    """Devine le pas de temps moyen (en secondes)"""
    t = df["time_s"].values
    dt = np.median(np.diff(t))
    return float(dt)


def make_windows(
    df: pd.DataFrame,
    label: int,
    window_seconds: float = 2.0,
    overlap: float = 0.5,
):
    """
    Découpe le signal en fenêtres de durée window_seconds avec un chevauchement donné, et assigne un label (0=sain, 1=balourd).
    Retourne X (fenêtres) et y (labels).
    """
    dt = infer_dt(df)
    n_per_window = int(window_seconds / dt)
    if n_per_window < 1:
        raise ValueError(
            f"Fenêtre trop courte: window_seconds={window_seconds}, dt={dt}"
        )

    step = int(n_per_window * (1 - overlap))
    if step < 1:
        step = 1

    X_list = []
    y_list = []

    # on garde uniquement les colonnes features
    data = df[FEATURE_COLS].values  # shape: (N, n_features)
    N = data.shape[0]

    for start in range(0, N - n_per_window + 1, step):
        end = start + n_per_window
        window = data[start:end, :]  # shape: (n_per_window, n_features)
        X_list.append(window)
        y_list.append(label)

    X = np.stack(X_list)  # shape: (n_windows, n_per_window, n_features)
    y = np.array(y_list)  # shape: (n_windows,)

    return X, y


# 1) charge les fichiers fusionnés sain et balourd

file_sain = DATA_MERGED / "PI-donnee-saine" / "PI-donnee-saine_output.csv"
file_balourd = DATA_MERGED / "PI-donnee-balourd" / "PI-donnee-balourd_output.csv"
file_porteafaux = DATA_MERGED / "PI-donnee-porteafaux" / "PI-donnee-porteafaux_output.csv"

df_sain = load_merged_csv(file_sain)
df_balourd = load_merged_csv(file_balourd)
df_porteafaux = load_merged_csv(file_porteafaux)

print("Sain    :", df_sain.shape)
print("Balourd :", df_balourd.shape)
print("Porte-à-faux :", df_porteafaux.shape)

# 2) crée les fenêtres pour sain (label 0) et balourd (label 1) et porteafaux (label 2)

X_sain, y_sain = make_windows(
    df_sain,
    label=0,
    window_seconds=2.0,
    overlap=0.5,
)

X_bal, y_bal = make_windows(
    df_balourd,
    label=1,
    window_seconds=2.0,
    overlap=0.5,
)

X_porteafaux, y_porteafaux = make_windows(
    df_porteafaux,
    label=2,
    window_seconds=2.0,
    overlap=0.5,
)

print("Fenêtres sain    :", X_sain.shape, y_sain.shape)
print("Fenêtres balourd :", X_bal.shape, y_bal.shape)
print("Fenêtres porte-à-faux :", X_porteafaux.shape, y_porteafaux.shape)

# 3) concatène tout

X_all = np.concatenate([X_sain, X_bal, X_porteafaux], axis=0)
y_all = np.concatenate([y_sain, y_bal, y_porteafaux], axis=0)
print("Total fenêtres :", X_all.shape, y_all.shape)

# 4) sauvegarde dans data/03_windowed

np.save(DATA_OUT / "X_windows.npy", X_all)
np.save(DATA_OUT / "y_labels.npy", y_all)

# petite table résumé en CSV
summary = pd.DataFrame({"label": y_all})
summary["label_name"] = summary["label"].map({0: "sain", 1: "balourd", 2: "porteafaux"})
summary.to_csv(DATA_OUT / "windows_summary.csv", index=False)

print("Fenêtrage + étiquetage terminé. Fichiers créés dans", DATA_OUT)


PROJECT_ROOT = C:\Users\simge\Downloads\predictive-maintenance-ai
DATA_MERGED = C:\Users\simge\Downloads\predictive-maintenance-ai\data\02_merged
DATA_OUT    = C:\Users\simge\Downloads\predictive-maintenance-ai\data\03_windowed
Lecture du fichier : C:\Users\simge\Downloads\predictive-maintenance-ai\data\02_merged\PI-donnee-saine\PI-donnee-saine_output.csv
Lecture du fichier : C:\Users\simge\Downloads\predictive-maintenance-ai\data\02_merged\PI-donnee-balourd\PI-donnee-balourd_output.csv
Lecture du fichier : C:\Users\simge\Downloads\predictive-maintenance-ai\data\02_merged\PI-donnee-porteafaux\PI-donnee-porteafaux_output.csv
Sain    : (104514, 9)
Balourd : (102304, 9)
Porte-à-faux : (101653, 9)
Fenêtres sain    : (628, 333, 6) (628,)
Fenêtres balourd : (615, 333, 6) (615,)
Fenêtres porte-à-faux : (611, 333, 6) (611,)
Total fenêtres : (1854, 333, 6) (1854,)
Fenêtrage + étiquetage terminé. Fichiers créés dans C:\Users\simge\Downloads\predictive-maintenance-ai\data\03_windowed


In [4]:
from pathlib import Path
import numpy as np
import pandas as pd

PROJECT_ROOT = Path("..").resolve().parent
DATA_OUT = PROJECT_ROOT / "data" / "03_windowed"

X_all = np.load(DATA_OUT / "X_windows.npy")
y_all = np.load(DATA_OUT / "y_labels.npy")
summary = pd.read_csv(DATA_OUT / "windows_summary.csv")

print("X_all shape :", X_all.shape)
print("y_all shape :", y_all.shape)
print(summary["label_name"].value_counts())


X_all shape : (1854, 333, 6)
y_all shape : (1854,)
label_name
sain          628
balourd       615
porteafaux    611
Name: count, dtype: int64
