Attribute Information:

The data set contains raw process sensor data (i.e. without feature extraction) which are structured as matrices (tab-delimited) with the rows representing the cycles and the columns the data points within a cycle. The sensors involved are:

Sensor		Physical quantity		Unit		Sampling rate
PS1		Pressure			bar		100 Hz
PS2		Pressure			bar		100 Hz
PS3		Pressure			bar		100 Hz
PS4		Pressure			bar		100 Hz
PS5		Pressure			bar		100 Hz
PS6		Pressure			bar		100 Hz
EPS1		Motor power			W		100 Hz
FS1		Volume flow			l/min		10 Hz
FS2		Volume flow			l/min		10 Hz
TS1		Temperature			Â°C		1 Hz
TS2		Temperature			Â°C		1 Hz
TS3		Temperature			Â°C		1 Hz
TS4		Temperature			Â°C		1 Hz
VS1		Vibration			mm/s		1 Hz
CE		Cooling efficiency (virtual)	%		1 Hz
CP		Cooling power (virtual)		kW		1 Hz
SE		Efficiency factor		%		1 Hz



Has Missing Values?

No

In [None]:
Class DataProcessor:
    def __init__(self, input_path, file_names):
        self.input_path = input_path
        self.file_names = file_names
        
        
    def read_files(self):
        self.data = {}
        for file in self.file_names:
            with open(self.input_path + file, 'r') as f:
                data[f] = loadtxt(f"{self.input_path}{self.file_name}.txt")
        return self.data
    
    def print_shape(self):
        for file in self.data:
            print(f"{file}: {self.data[file].shape}")
            
    def create_target_df(self):
        try:
            target_columns = ['Cooler_Condition', 'Valve_Condition', 
                            'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                            'Stable_Flag']
            self.df_target = pd.DataFrame(self.data['target'], columns=target_columns)
            self.df_target = df_target['Valve_Condition']
            del self.data['target']
            return self.df_target
        except Exception as e:
            print(f"Fehler beim Verarbeiten von 'target': {e}")
        

In [1]:
import pandas as pd
import numpy as np
from numpy import loadtxt
import matplotlib.pyplot as plt

#PS: Pressure, bar, 100 Hz --> 100 Messungen pro Skeunde
#EPS: Motor power, W, 100 Hz
#FS: Volume flow, l/min, 10 Hz --> 10 Messungen pro Sekunde
#TS: Temperature, Celsius, 1 Hz --> 1 Messung pro Sekunde
#VS: Vibration, mm/s, 1Hz
#CE: Cooling efficiency (virtual), %, 1 Hz
#CP: Cooling power (virtual), kW, 1 Hz
#SE: Efficency factor, %, 1 Hz


input_path = "input_data/"
file_names = [
    "ce", "cp", "eps1", "se", "vs1", 
    "fs1", "fs2", 
    "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
    "ts1", "ts2", "ts3", "ts4", "target"
]

data = {}

for file_name in file_names:
    data[file_name] = loadtxt(f"{input_path}{file_name}.txt")

In [3]:
for file_name in file_names:
    print(f"Shape of {file_name}:{data[file_name].shape}")

Shape of ce:(2205, 60)
Shape of cp:(2205, 60)
Shape of eps1:(2205, 6000)
Shape of se:(2205, 60)
Shape of vs1:(2205, 60)
Shape of fs1:(2205, 600)
Shape of fs2:(2205, 600)
Shape of ps1:(2205, 6000)
Shape of ps2:(2205, 6000)
Shape of ps3:(2205, 6000)
Shape of ps4:(2205, 6000)
Shape of ps5:(2205, 6000)
Shape of ps6:(2205, 6000)
Shape of ts1:(2205, 60)
Shape of ts2:(2205, 60)
Shape of ts3:(2205, 60)
Shape of ts4:(2205, 60)


KeyError: 'target'

In [2]:
try:
    target_columns = ['Cooler_Condition', 'Valve_Condition', 
                      'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                      'Stable_Flag']
    df_target = pd.DataFrame(data['target'], columns=target_columns)
    df_target = df_target['Valve_Condition']  # Nur 'Valve_Condition' verwenden
    del data['target']  # 'target' aus den übrigen Daten entfernen
except Exception as e:
    print(f"Fehler beim Verarbeiten von 'target': {e}")

#df_target.head()

In [None]:
df_target.describe()

In [None]:
fig, axes = plt.subplots(6, 3, figsize=(15, 20))
axes = axes.flatten()

for i, key in enumerate(data):
    ax = axes[i]
    ax.plot(data[key], color = 'blue', linewidth = 1)
    ax.plot(np.mean(data[key], axis=1), color = 'red') 
    ax.set_title(key) 

plt.tight_layout()
plt.show()

In [None]:
for key, array in data.items():
    print(f"{key}: Mean = {round((np.mean(array)),4)}, Std = {round((np.std(array)),4)}, Min = {np.min(array)}, Max = {np.max(array)}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

for key in data: 
    # Erstelle ein 2D-Array (zum Beispiel ein einfaches Bild oder eine Matrix)
    image = data[key]

    # Berechne die 2D-Fourier-Transformation
    f_transform = np.fft.fft2(image)

    # Verschiebe den Nullfrequenzanteil in die Mitte des Spektrums
    f_transform_shifted = np.fft.fftshift(f_transform)

    # Berechne das Magnitudespektrum (Betrag)
    magnitude_spectrum = np.abs(f_transform_shifted)

    # Berechne das Phasenspektrum (Phase)
    phase_spectrum = np.angle(f_transform_shifted)

    # Summiere das Magnitudespektrum entlang der Spalten (horizontale Richtung)
    magnitude_spectrum_sum_cols = np.sum(magnitude_spectrum, axis=0)

    # Plot des Magnitudespektrums als Summe entlang der Spalten
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(magnitude_spectrum_sum_cols)
    plt.title(f"{key}: Magnitude Spectrum (Summe entlang der Spalten)")
    plt.xlabel("Spalten-Index")
    plt.ylabel("Summierte Magnitude")

    # Plot des Phasenspektrums
    plt.subplot(1, 2, 2)
    plt.imshow(phase_spectrum, cmap='twilight', aspect='auto')
    plt.title(f"{key}: Phase Spectrum")
    plt.colorbar(label='Phase (radians)')
    plt.tight_layout()
    plt.show()


In [28]:
# Erstelle ein Dictionary für alle Features
features = {}

# Iteriere über alle Keys im `data` Dictionary
for idx, key in enumerate(data, start=1):
    # Berechne die gewünschten Statistiken für jede Zeile (axis=1)
    #features[f"{key}_{idx}_mean"] = data[key].mean(axis=1)
    #features[f"{key}_{idx}_median"] = np.median(data[key], axis=1)
    features[f"{key}_{idx}_std"] = data[key].std(axis=1)
    #features[f"{key}_{idx}_min"] = data[key].min(axis=1)
    #features[f"{key}_{idx}_max"] = data[key].max(axis=1)
    #features[f"{key}_{idx}_range"] = data[key].max(axis=1) - data[key].min(axis=1)

# Konvertiere das `features` Dictionary in einen DataFrame
df_features = pd.DataFrame(features)


In [20]:
print(df_features.shape, df_target.shape)

(2205, 17) (2205,)


In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Extrahierte Features und Labels (ersetze dies mit deinem Datensatz)

# Features und Zielvariable
X = df_features
y = df_target

X.shape

y.shape

# Datenaufteilung
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# AdaBoost-Modell
ada_model = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=42)

# Training
ada_model.fit(X_train, y_train)

# Vorhersagen
y_pred = ada_model.predict(X_test)

# Modellbewertung
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))




Accuracy: 0.8163265306122449

Classification Report:
              precision    recall  f1-score   support

        73.0       1.00      0.67      0.80        88
        80.0       0.71      0.96      0.82        67
        90.0       0.74      0.62      0.68        80
       100.0       0.83      0.91      0.87       206

    accuracy                           0.82       441
   macro avg       0.82      0.79      0.79       441
weighted avg       0.83      0.82      0.81       441

