In [None]:
import pandas as pd
import os
from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import MinimalFCParameters
from tsfresh.utilities.dataframe_functions import impute

# ----------------------
# 1. Load profile (targets)
# ----------------------
folder = r"C:\Users\Dell\Downloads\condition+monitoring+of+hydraulic+systems"

profile = pd.read_csv(
    os.path.join(folder, "profile.txt"),
    sep="\t", header=None,
    names=["cooler", "valve", "pump_leakage", "accumulator", "stable_flag"]
)

# ----------------------
# 1b. Load optional indicators (CE, CP, SE) if available
# ----------------------
extra_files = {"CE": "ce", "CP": "cp", "SE": "se"}
for fname, colname in extra_files.items():
    path = os.path.join(folder, f"{fname}.txt")
    if os.path.exists(path):
        try:
            profile[colname] = pd.read_csv(path, sep="\t", header=None).iloc[:, 0].values
            print(f"✅ Loaded extra indicator: {fname}.txt as column '{colname}'")
        except Exception as e:
            print(f"⚠️ Could not load {fname}.txt: {e}")

# ----------------------
# 2. Load all sensor files (include PS5, PS6 too)
# ----------------------
sensor_files = [
    "PS1", "PS2", "PS3", "PS4", "PS5", "PS6",  # pressure sensors
    "EPS1",                                    # electrical power sensor
    "FS1", "FS2",                              # flow sensors
    "TS1", "TS2", "TS3", "TS4",                # temperature sensors
    "VS1"                                      # vibration sensor
]

sensors = {}
for s in sensor_files:
    fpath = os.path.join(folder, f"{s}.txt")
    if os.path.exists(fpath):
        sensors[s] = pd.read_csv(fpath, sep="\t", header=None)
    else:
        print(f"⚠️ Missing sensor file: {s}.txt (skipped)")

# ----------------------
# 3. Reshape for TSFresh (memory-optimized long format)
# ----------------------
def optimize_and_melt(df, sensor_name):
    """Convert sensor data to long format with memory optimization"""
    df_long = df.reset_index().melt(
        id_vars="index",
        var_name="time",
        value_name="value"
    )
    df_long["id"] = df_long["index"].astype("int32")
    df_long["time"] = df_long["time"].astype("int16")
    df_long["value"] = pd.to_numeric(df_long["value"], downcast="float")
    df_long["kind"] = sensor_name
    return df_long[["id", "time", "kind", "value"]]

long_df = pd.DataFrame()
for s, df in sensors.items():
    chunk = optimize_and_melt(df, s)
    long_df = pd.concat([long_df, chunk], ignore_index=True)

print("✅ Long dataframe created:", long_df.shape)

# ----------------------
# 4a. Extract Minimal Features
# ----------------------
features_minimal = extract_features(
    long_df,
    column_id="id",
    column_sort="time",
    column_kind="kind",
    column_value="value",
    default_fc_parameters=MinimalFCParameters(),
    n_jobs=0
)
dataset_minimal = profile.join(impute(features_minimal))
minimal_path = os.path.join(folder, "hydraulic_dataset_tsfresh_minimal.csv")
dataset_minimal.to_csv(minimal_path, index=False)
print("✅ Minimal dataset saved:", minimal_path)

# ----------------------
# 4b. Extract Full Features (big set)
# ----------------------
features_full = extract_features(
    long_df,
    column_id="id",
    column_sort="time",
    column_kind="kind",
    column_value="value",
    n_jobs=0
)
features_full = impute(features_full)
dataset_full = profile.join(features_full)
full_path = os.path.join(folder, "hydraulic_dataset_tsfresh_full.csv")
dataset_full.to_csv(full_path, index=False)
print("✅ Full dataset saved:", full_path)

# ----------------------
# 5. Feature Selection for each target
# ----------------------
target_cols = ["cooler", "valve", "pump_leakage", "accumulator", "stable_flag"]

for target in target_cols:
    print(f"\n🔹 Selecting features for target: {target}")
    try:
        X_selected = select_features(features_full, profile[target])
        dataset_selected = profile.join(X_selected)

        selected_path = os.path.join(
            folder, f"hydraulic_dataset_tsfresh_selected_{target}.csv"
        )
        dataset_selected.to_csv(selected_path, index=False)
        print(f"✅ Selected dataset saved for {target}: {selected_path}")
        print("   Shape:", dataset_selected.shape)
    except Exception as e:
        print(f"⚠️ Error with target {target}: {e}")

# ----------------------
# 6. Preview (example from last target)
# ----------------------
print("\nSample from last selected dataset:")
print(dataset_selected.head())


✅ Loaded extra indicator: CE.txt as column 'ce'
✅ Loaded extra indicator: CP.txt as column 'cp'
✅ Loaded extra indicator: SE.txt as column 'se'
✅ Long dataframe created: (95917500, 4)


Feature Extraction: 100%|███████████████████████████████████████████████████████| 30870/30870 [01:14<00:00, 414.42it/s]


✅ Minimal dataset saved: C:\Users\Dell\Downloads\condition+monitoring+of+hydraulic+systems\hydraulic_dataset_tsfresh_minimal.csv


Feature Extraction:   0%|                                                                    | 0/30870 [00:00<?, ?it/s]