In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from pathlib import Path

In [46]:
# === CONFIG ===
DATA_DIR = Path("../data/raw/condition+monitoring+of+hydraulic+systems")               # Ordner mit den .txt-Dateien
OUTPUT_DIR = Path("../data/processed")       # Zielordner
OUTPUT_CSV = OUTPUT_DIR / "hydraulic_system_combined.csv"

In [47]:
# Optional: nur bestimmte Sensoren laden (Dateinamen ohne .txt), z.B. zum Testen:
# SENSORS_INCLUDE = ["PS1", "PS2", "TS1", "TS2", "FS1", "FS2", "EPS1"]
SENSORS_INCLUDE = None   # None = alle laden

In [48]:
# === HELPERS ===
def read_sensor_matrix(file_path: Path) -> pd.DataFrame:
    """
    Liest eine tab-getrennte Sensor-Matrix (Zeile = Zyklus, Spalten = Messpunkte im Zyklus).
    Benennt Spalten als <SENSOR>_<index>.
    """
    sensor = file_path.stem
    df = pd.read_csv(file_path, sep="\t", header=None)
    # eindeutige Spaltennamen
    df.columns = [f"{sensor}_{i+1}" for i in range(df.shape[1])]
    return df

In [57]:
# === MAIN ===
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

In [58]:
# 1️⃣ Alle .txt-Dateien im raw-Ordner finden
all_txt = sorted([p for p in DATA_DIR.glob("*.txt")])
if not all_txt:
    raise FileNotFoundError(f"Keine .txt-Dateien in {DATA_DIR.resolve()} gefunden.")

In [62]:
# 2️⃣ Dateien ausschließen, die keine Sensordaten enthalten
EXCLUDE = {"profile.txt", "documentation.txt", "description.txt"}
sensor_files = [p for p in all_txt if p.name.lower() not in EXCLUDE]

# Optional: Filter nach Sensorname
if SENSORS_INCLUDE is not None:
    include_set = set(s.upper() for s in SENSORS_INCLUDE)
    sensor_files = [p for p in sensor_files if p.stem.upper() in include_set]

if not sensor_files:
    raise RuntimeError("Keine gültigen Sensor-Dateien gefunden. Prüfe Pfad oder Filter.")

print(f"📄 Lade {len(sensor_files)} Sensor-Dateien…")


📄 Lade 17 Sensor-Dateien…


In [63]:
# 3️⃣ Sensor-Dateien einlesen
sensor_dfs = []
for f in sensor_files:
    df_s = read_sensor_matrix(f)
    sensor_dfs.append(df_s)
    print(f"  - {f.name}: {df_s.shape}")

  - CE.txt: (2205, 60)
  - CP.txt: (2205, 60)
  - EPS1.txt: (2205, 6000)
  - FS1.txt: (2205, 600)
  - FS2.txt: (2205, 600)
  - PS1.txt: (2205, 6000)
  - PS2.txt: (2205, 6000)
  - PS3.txt: (2205, 6000)
  - PS4.txt: (2205, 6000)
  - PS5.txt: (2205, 6000)
  - PS6.txt: (2205, 6000)
  - SE.txt: (2205, 60)
  - TS1.txt: (2205, 60)
  - TS2.txt: (2205, 60)
  - TS3.txt: (2205, 60)
  - TS4.txt: (2205, 60)
  - VS1.txt: (2205, 60)


In [64]:
# 4️⃣ Sensor-Daten kombinieren
df_sensors = pd.concat(sensor_dfs, axis=1)
print(f"✅ Sensor-Matrix kombiniert: {df_sensors.shape}")

✅ Sensor-Matrix kombiniert: (2205, 43680)


In [65]:
# 5️⃣ Profile-Datei (Zielvariablen) einlesen
profile_path = DATA_DIR / "profile.txt"
df_profile = pd.read_csv(profile_path, sep="\t", header=None)
df_profile.columns = [
    "cooler_condition",
    "valve_condition",
    "pump_leakage",
    "accumulator_pressure",
    "stable_flag"
]
print(f"✅ Profile geladen: {df_profile.shape}")

✅ Profile geladen: (2205, 5)


In [66]:
# 6️⃣ Sicherstellen, dass Zeilen übereinstimmen
if df_profile.shape[0] != df_sensors.shape[0]:
    raise ValueError(
        f"Zeilenanzahl passt nicht: profile={df_profile.shape[0]} vs sensors={df_sensors.shape[0]}"
    )


In [67]:
# 7️⃣ Alles zusammenführen
df_full = pd.concat([df_profile, df_sensors], axis=1)
print(f"🧱 Finales Shape: {df_full.shape}")

🧱 Finales Shape: (2205, 43685)


In [68]:
# 8️⃣ Als CSV speichern
df_full.to_csv(OUTPUT_CSV, index=False)
print(f"💾 Gespeichert: {OUTPUT_CSV.resolve()}")

💾 Gespeichert: C:\Users\Ismail Güner\OneDrive - Heitkamp Construction Swiss GmbH\Desktop\00_Github_Projects\construction-data-lab\industrial-time-series-demo\data\processed\hydraulic_system_combined.csv


In [75]:
# (Optional) Parquet speichern (kleiner + schneller)
try:
    import pyarrow  # optional
    parquet_path = OUTPUT_DIR / "hydraulic_system_combined.parquet"
    df_full.to_parquet(parquet_path, index=False)
    print(f"💾 (Optional) Parquet gespeichert: {parquet_path.resolve()}")
except Exception as e:
    print(f"(Hinweis) Parquet nicht gespeichert (kein pyarrow installiert): {e}")

(Hinweis) Parquet nicht gespeichert (kein pyarrow installiert): No type extension with name arrow.py_extension_type found


In [74]:
# 9️⃣ Mini-Vorschau
display(df_full.iloc[:5, :40])

Unnamed: 0,cooler_condition,valve_condition,pump_leakage,accumulator_pressure,stable_flag,CE_1,CE_2,CE_3,CE_4,CE_5,CE_6,CE_7,CE_8,CE_9,CE_10,CE_11,CE_12,CE_13,CE_14,CE_15,CE_16,CE_17,CE_18,CE_19,CE_20,CE_21,CE_22,CE_23,CE_24,CE_25,CE_26,CE_27,CE_28,CE_29,CE_30,CE_31,CE_32,CE_33,CE_34,CE_35
0,3,100,0,130,1,47.202,47.273,47.25,47.332,47.213,47.372,47.273,47.438,46.691,46.599,46.623,46.653,46.136,46.127,45.948,45.935,45.726,45.139,44.978,44.937,44.269,44.079,43.741,43.375,42.847,42.322,42.322,41.797,41.406,40.924,40.427,39.857,39.517,38.999,38.493
1,3,100,0,130,1,29.208,28.822,28.805,28.922,28.591,28.643,28.216,27.812,27.514,27.481,27.631,27.464,27.258,27.424,26.955,27.023,26.834,27.073,27.26,27.236,26.72,26.824,26.757,26.311,26.431,25.515,25.724,25.724,25.59,25.257,25.296,25.475,24.716,25.037,24.784
2,3,100,0,130,1,23.554,23.521,23.527,23.008,23.042,23.052,22.658,22.952,22.908,22.359,22.287,22.213,22.562,22.525,22.725,22.63,22.745,22.643,22.997,22.997,22.903,22.761,22.701,22.818,22.785,22.667,22.729,22.82,22.087,22.25,22.121,21.979,21.772,21.945,21.863
3,3,100,0,130,1,21.54,21.419,21.565,20.857,21.052,21.039,20.926,20.912,20.989,20.882,20.596,20.465,20.604,20.672,20.453,20.479,20.621,20.717,20.342,20.777,20.676,20.747,20.689,20.79,21.061,20.307,20.181,20.307,20.144,19.783,19.804,19.749,19.875,20.155,19.913
4,3,100,0,130,1,20.46,20.298,20.35,19.867,19.997,19.972,19.924,19.813,19.691,19.634,19.133,19.14,19.444,19.457,19.815,19.329,19.67,19.965,19.868,19.628,20.041,19.913,20.164,20.037,20.402,20.152,20.148,20.085,19.614,19.686,19.949,19.925,19.818,19.5,19.52
