Attribute Information:

The data set contains raw process sensor data (i.e. without feature extraction) which are structured as matrices (tab-delimited) with the rows representing the cycles and the columns the data points within a cycle. The sensors involved are:

Sensor		Physical quantity		Unit		Sampling rate
PS1		Pressure			bar		100 Hz
PS2		Pressure			bar		100 Hz
PS3		Pressure			bar		100 Hz
PS4		Pressure			bar		100 Hz
PS5		Pressure			bar		100 Hz
PS6		Pressure			bar		100 Hz
EPS1		Motor power			W		100 Hz
FS1		Volume flow			l/min		10 Hz
FS2		Volume flow			l/min		10 Hz
TS1		Temperature			Â°C		1 Hz
TS2		Temperature			Â°C		1 Hz
TS3		Temperature			Â°C		1 Hz
TS4		Temperature			Â°C		1 Hz
VS1		Vibration			mm/s		1 Hz
CE		Cooling efficiency (virtual)	%		1 Hz
CP		Cooling power (virtual)		kW		1 Hz
SE		Efficiency factor		%		1 Hz



Has Missing Values?

No

In [28]:
import pandas as pd
from numpy import loadtxt

class DataProcessor:
    def __init__(self, input_path, file_names):
        self.input_path = input_path
        self.file_names = file_names
        
    def read_files(self):
        self.data = {}
        print("Reading files...")
        for file in self.file_names:
            with open(self.input_path + file + '.txt', 'r') as f:
                self.data[file] = loadtxt(f)
        return self.data
    
    def print_shape(self):
        print("Files read:")
        for file in self.data:
            print(f"{file}: {self.data[file].shape}")
            
    def create_target_df(self):
        target_columns = ['Cooler_Condition', 'Valve_Condition', 
                        'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                        'Stable_Flag']
        self.df_target = pd.DataFrame(self.data['target'], columns=target_columns)
        self.valve_condition = self.df_target['Valve_Condition']
        del self.data['target']
        return self.valve_condition

def main():
    input_path = "input_data/"
    file_names = [
        "ce", "cp", "eps1", "se", "vs1", 
        "fs1", "fs2", 
        "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
        "ts1", "ts2", "ts3", "ts4", "target"
    ]
    
    processor = DataProcessor(input_path, file_names)
    data = processor.read_files()
    processor.print_shape()
    processor.create_target_df()
    df_target = processor.valve_condition
    return data, df_target

data, df_target = main()

Reading files...
Files read:
ce: (2205, 60)
cp: (2205, 60)
eps1: (2205, 6000)
se: (2205, 60)
vs1: (2205, 60)
fs1: (2205, 600)
fs2: (2205, 600)
ps1: (2205, 6000)
ps2: (2205, 6000)
ps3: (2205, 6000)
ps4: (2205, 6000)
ps5: (2205, 6000)
ps6: (2205, 6000)
ts1: (2205, 60)
ts2: (2205, 60)
ts3: (2205, 60)
ts4: (2205, 60)
target: (2205, 5)


{'ce': array([[47.202, 47.273, 47.25 , ..., 29.261, 29.287, 28.866],
       [29.208, 28.822, 28.805, ..., 23.483, 23.32 , 23.588],
       [23.554, 23.521, 23.527, ..., 21.582, 21.283, 21.519],
       ...,
       [46.757, 46.863, 46.756, ..., 45.8  , 46.171, 46.502],
       [46.584, 46.469, 46.538, ..., 46.59 , 46.579, 46.57 ],
       [46.77 , 46.539, 46.479, ..., 46.384, 46.479, 46.621]]), 'cp': array([[2.184, 2.184, 2.184, ..., 1.401, 1.402, 1.383],
       [1.414, 1.384, 1.385, ..., 1.158, 1.147, 1.164],
       [1.159, 1.157, 1.157, ..., 1.108, 1.085, 1.103],
       ...,
       [2.146, 2.145, 2.139, ..., 2.107, 2.121, 2.144],
       [2.145, 2.139, 2.147, ..., 2.135, 2.135, 2.148],
       [2.144, 2.146, 2.143, ..., 2.144, 2.136, 2.148]]), 'eps1': array([[2411.6, 2411.6, 2411.6, ..., 2409.6, 2409.6, 2409.6],
       [2409.6, 2409.6, 2409.6, ..., 2397.8, 2397.8, 2397.8],
       [2397.8, 2397.8, 2397.8, ..., 2383.8, 2383.8, 2383.8],
       ...,
       [2413.6, 2413.6, 2413.6, ..., 2413.6, 

In [1]:
import pandas as pd
import numpy as np
from numpy import loadtxt
import matplotlib.pyplot as plt

#PS: Pressure, bar, 100 Hz --> 100 Messungen pro Skeunde
#EPS: Motor power, W, 100 Hz
#FS: Volume flow, l/min, 10 Hz --> 10 Messungen pro Sekunde
#TS: Temperature, Celsius, 1 Hz --> 1 Messung pro Sekunde
#VS: Vibration, mm/s, 1Hz
#CE: Cooling efficiency (virtual), %, 1 Hz
#CP: Cooling power (virtual), kW, 1 Hz
#SE: Efficency factor, %, 1 Hz


input_path = "input_data/"
file_names = [
    "ce", "cp", "eps1", "se", "vs1", 
    "fs1", "fs2", 
    "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
    "ts1", "ts2", "ts3", "ts4", "target"
]

data = {}

for file_name in file_names:
    data[file_name] = loadtxt(f"{input_path}{file_name}.txt")

In [None]:
for file_name in file_names:
    print(f"Shape of {file_name}:{data[file_name].shape}")

In [2]:
try:
    target_columns = ['Cooler_Condition', 'Valve_Condition', 
                      'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                      'Stable_Flag']
    df_target = pd.DataFrame(data['target'], columns=target_columns)
    df_target = df_target['Valve_Condition']  # Nur 'Valve_Condition' verwenden
    del data['target']  # 'target' aus den übrigen Daten entfernen
except Exception as e:
    print(f"Fehler beim Verarbeiten von 'target': {e}")

#df_target.head()

In [None]:
df_target.describe()

In [None]:
fig, axes = plt.subplots(6, 3, figsize=(15, 20))
axes = axes.flatten()

for i, key in enumerate(data):
    ax = axes[i]
    ax.plot(data[key], color = 'blue', linewidth = 1)
    ax.plot(np.mean(data[key], axis=1), color = 'red') 
    ax.set_title(key) 

plt.tight_layout()
plt.show()

In [None]:
for key, array in data.items():
    print(f"{key}: Mean = {round((np.mean(array)),4)}, Std = {round((np.std(array)),4)}, Min = {np.min(array)}, Max = {np.max(array)}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

for key in data: 
    # Erstelle ein 2D-Array (zum Beispiel ein einfaches Bild oder eine Matrix)
    image = data[key]

    # Berechne die 2D-Fourier-Transformation
    f_transform = np.fft.fft2(image)

    # Verschiebe den Nullfrequenzanteil in die Mitte des Spektrums
    f_transform_shifted = np.fft.fftshift(f_transform)

    # Berechne das Magnitudespektrum (Betrag)
    magnitude_spectrum = np.abs(f_transform_shifted)

    # Berechne das Phasenspektrum (Phase)
    phase_spectrum = np.angle(f_transform_shifted)

    # Summiere das Magnitudespektrum entlang der Spalten (horizontale Richtung)
    magnitude_spectrum_sum_cols = np.sum(magnitude_spectrum, axis=0)

    # Plot des Magnitudespektrums als Summe entlang der Spalten
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(magnitude_spectrum_sum_cols)
    plt.title(f"{key}: Magnitude Spectrum (Summe entlang der Spalten)")
    plt.xlabel("Spalten-Index")
    plt.ylabel("Summierte Magnitude")

    # Plot des Phasenspektrums
    plt.subplot(1, 2, 2)
    plt.imshow(phase_spectrum, cmap='twilight', aspect='auto')
    plt.title(f"{key}: Phase Spectrum")
    plt.colorbar(label='Phase (radians)')
    plt.tight_layout()
    plt.show()


In [28]:
# Erstelle ein Dictionary für alle Features
features = {}

# Iteriere über alle Keys im `data` Dictionary
for idx, key in enumerate(data, start=1):
    # Berechne die gewünschten Statistiken für jede Zeile (axis=1)
    #features[f"{key}_{idx}_mean"] = data[key].mean(axis=1)
    #features[f"{key}_{idx}_median"] = np.median(data[key], axis=1)
    features[f"{key}_{idx}_std"] = data[key].std(axis=1)
    #features[f"{key}_{idx}_min"] = data[key].min(axis=1)
    #features[f"{key}_{idx}_max"] = data[key].max(axis=1)
    #features[f"{key}_{idx}_range"] = data[key].max(axis=1) - data[key].min(axis=1)

# Konvertiere das `features` Dictionary in einen DataFrame
df_features = pd.DataFrame(features)


In [None]:
print(df_features.shape, df_target.shape)

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Extrahierte Features und Labels (ersetze dies mit deinem Datensatz)

# Features und Zielvariable
X = df_features
y = df_target

X.shape

y.shape

# Datenaufteilung
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# AdaBoost-Modell
ada_model = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=42)

# Training
ada_model.fit(X_train, y_train)

# Vorhersagen
y_pred = ada_model.predict(X_test)

# Modellbewertung
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
