In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
from collections import Counter

# =========================
# CONFIGURACIÃ“N
# =========================
carpeta = Path(".")  # carpeta con los CSV originales
fs = 5000
ts = 1.0
ventana = int(fs * ts)   # 5000
paso = ventana           # sin solape

clase_normal = "no_fault"
proporcion_fallas = 0.4  # 40% de ventanas de cada falla
random_state = 42

mapa = {
    "no_fault.csv": "no_fault",
    "eccentricity.csv": "eccentricity",
    "missing_tooth.csv": "missing_tooth",
    "root_crack.csv": "root_crack",
    "surface_fault.csv": "surface_fault",
    "tooth_chipped_fault.csv": "tooth_chipped_fault",
}

# =========================
# FUNCIONES
# =========================
def extraer_features_ventana(df):
    s1 = df["sensor1"].values
    s2 = df["sensor2"].values

    return {
        "s1_media": np.mean(s1),
        "s1_std": np.std(s1),
        "s1_rms": np.sqrt(np.mean(s1**2)),
        "s2_media": np.mean(s2),
        "s2_std": np.std(s2),
        "s2_rms": np.sqrt(np.mean(s2**2)),
        "s1_s2_corr": np.corrcoef(s1, s2)[0, 1],
        "speedSet": df["speedSet"].iloc[0],
        "load_value": df["load_value"].iloc[0],
    }

# =========================
# CONSTRUCCIÃ“N DEL DATASET
# =========================
filas = []

for archivo, clase in mapa.items():
    df = pd.read_csv(carpeta / archivo)

    n_ventanas = len(df) // ventana
    ventanas_usadas = []

    for i in range(0, n_ventanas * ventana, paso):
        df_win = df.iloc[i:i+ventana]
        feats = extraer_features_ventana(df_win)
        feats["label"] = clase
        ventanas_usadas.append(feats)

    df_v = pd.DataFrame(ventanas_usadas)

    # ESCENARIO OPERATIVO: fallas raras
    if clase != clase_normal:
        n = max(1, int(len(df_v) * proporcion_fallas))
        df_v = df_v.sample(n=n, random_state=random_state)

    filas.append(df_v)
    print(f"âœ… {clase}: ventanas usadas = {len(df_v)}")

# =========================
# DATASET FINAL
# =========================
df_final = pd.concat(filas, ignore_index=True)

print("\nâ–¶ DistribuciÃ³n FINAL:")
print(Counter(df_final["label"]))

df_final.to_csv("gear_vibration_operativo.csv", index=False)
print("\nðŸ’¾ Guardado: gear_vibration_operativo.csv")


âœ… no_fault: ventanas usadas = 30
âœ… eccentricity: ventanas usadas = 12
âœ… missing_tooth: ventanas usadas = 12
âœ… root_crack: ventanas usadas = 12
âœ… surface_fault: ventanas usadas = 12
âœ… tooth_chipped_fault: ventanas usadas = 12

â–¶ DistribuciÃ³n FINAL:
Counter({'no_fault': 30, 'eccentricity': 12, 'missing_tooth': 12, 'root_crack': 12, 'surface_fault': 12, 'tooth_chipped_fault': 12})

ðŸ’¾ Guardado: gear_vibration_operativo.csv
