Attribute Information:

The data set contains raw process sensor data (i.e. without feature extraction) which are structured as matrices (tab-delimited) with the rows representing the cycles and the columns the data points within a cycle. The sensors involved are:

Sensor		Physical quantity		Unit		Sampling rate
PS1		Pressure			bar		100 Hz
PS2		Pressure			bar		100 Hz
PS3		Pressure			bar		100 Hz
PS4		Pressure			bar		100 Hz
PS5		Pressure			bar		100 Hz
PS6		Pressure			bar		100 Hz
EPS1		Motor power			W		100 Hz
FS1		Volume flow			l/min		10 Hz
FS2		Volume flow			l/min		10 Hz
TS1		Temperature			Â°C		1 Hz
TS2		Temperature			Â°C		1 Hz
TS3		Temperature			Â°C		1 Hz
TS4		Temperature			Â°C		1 Hz
VS1		Vibration			mm/s		1 Hz
CE		Cooling efficiency (virtual)	%		1 Hz
CP		Cooling power (virtual)		kW		1 Hz
SE		Efficiency factor		%		1 Hz



Has Missing Values?

No

In [None]:
import pandas as pd
import numpy as np
from numpy import loadtxt
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

#PS: Pressure, bar, 100 Hz --> 100 Messungen pro Skeunde
#EPS: Motor power, W, 100 Hz
#FS: Volume flow, l/min, 10 Hz --> 10 Messungen pro Sekunde
#TS: Temperature, Celsius, 1 Hz --> 1 Messung pro Sekunde
#VS: Vibration, mm/s, 1Hz
#CE: Cooling efficiency (virtual), %, 1 Hz
#CP: Cooling power (virtual), kW, 1 Hz
#SE: Efficency factor, %, 1 Hz


class DataProcessor:
    def __init__(self, input_path, file_names):
        self.input_path = input_path
        self.file_names = file_names
        
    def read_files(self):
        self.data = {}
        print("Reading files...")
        for file in self.file_names:
            with open(self.input_path + file + '.txt', 'r') as f:
                self.data[file] = loadtxt(f)
        return self.data
    
    def print_shape(self):
        print("Files read:")
        for file in self.data:
            print(f"{file}: {self.data[file].shape}")
            
    def create_target_df(self):
        target_columns = ['Cooler_Condition', 'Valve_Condition', 
                        'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                        'Stable_Flag']
        self.df_target = pd.DataFrame(self.data['target'], columns=target_columns)
        self.valve_condition = self.df_target['Valve_Condition']
        del self.data['target']
        return self.valve_condition

def process_data():
    input_path = "input_data/"
    file_names = [
        "ce", "cp", "eps1", "se", "vs1", 
        "fs1", "fs2", 
        "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
        "ts1", "ts2", "ts3", "ts4", "target"
    ]
    
    processor = DataProcessor(input_path, file_names)
    data = processor.read_files()
    processor.print_shape()
    processor.create_target_df()
    df_target = processor.valve_condition
    return data, df_target

data, df_target = process_data()

In [14]:
class DataPlotter:
    def __init__(self, dataset: dict):
        self.dataset = dataset
    
    def create_subplot(self):
        fig, axs = plt.subplots(6, 3, figsize=(15, 20))
        axes = axs.flatten()  
        for i, key in enumerate(self.dataset):
            ax = axes[i]
            ax.plot(self.dataset[key], color='blue', linewidth=1)
            ax.plot(np.mean(self.dataset[key], axis=1), color='red')
            ax.set_title(key)
        plt.tight_layout()
        plt.show()
    
    def create_fft_plot(self):
        for key in self.dataset: 
            # Erstelle ein 2D-Array (z. B. eine Matrix)
            image = self.dataset[key]
            # 2D-Fourier-Transformation
            f_transform = np.fft.fft2(image)
            # Verschiebe den Nullfrequenzanteil in die Mitte des Spektrums
            f_transform_shifted = np.fft.fftshift(f_transform)
            # Berechne das Magnitudespektrum (Betrag)
            magnitude_spectrum = np.abs(f_transform_shifted)
            # Berechne das Phasenspektrum (Phase)
            phase_spectrum = np.angle(f_transform_shifted)
            # Summiere das Magnitudespektrum entlang der Spalten (horizontale Richtung)
            magnitude_spectrum_sum_cols = np.sum(magnitude_spectrum, axis=0)
            
            # Plots
            plt.figure(figsize=(12, 6))
            # Magnitudespektrum
            plt.subplot(1, 2, 1)
            plt.plot(magnitude_spectrum_sum_cols)
            plt.title(f"{key}: Magnitude Spectrum (Summe entlang der Spalten)")
            plt.xlabel("Spalten-Index")
            plt.ylabel("Summierte Magnitude")
            # Phasenspektrum
            plt.subplot(1, 2, 2)
            plt.imshow(phase_spectrum, cmap='twilight', aspect='auto')
            plt.title(f"{key}: Phase Spectrum")
            plt.colorbar(label='Phase (radians)')
            plt.tight_layout()
            plt.show()
            
plot = DataPlotter(data)

In [None]:
plot.create_subplot()

In [None]:
plot.create_fft_plot()

In [None]:
for key, array in data.items():
    print(f"{key}: Mean = {round((np.mean(array)),4)}, Std = {round((np.std(array)),4)}, Min = {np.min(array)}, Max = {np.max(array)}")

In [24]:
class ManualFeatureExtractor:
    def __init__(self, dataset):
        self.dataset = dataset
        
    def extract_features(self):
        features = {}
        for i, key in enumerate(self.dataset, start=1):
            #features[f"{key}_{i}_mean"] = self.dataset[key].mean(axis=1)
            #features[f"{key}_{i}_median"] = np.median(self.dataset[key], axis=1)
            features[f"{key}_{i}_std"] = self.dataset[key].std(axis=1)
            #features[f"{key}_{i}_min"] = self.dataset[key].min(axis=1)
            #features[f"{key}_{i}_max"] = self.dataset[key].max(axis=1)
            #features[f"{key}_{i}_range"] = self.dataset[key].max(axis=1) - self.dataset[key].min(axis=1)
        self.df_features = pd.DataFrame(features)
        return self.df_features

extractor = ManualFeatureExtractor(data)
df_features = extractor.extract_features()

In [None]:
print(df_features.shape, df_target.shape)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

class ModelBuilder:
    def __init__(self, features, target):
        self.features = features
        self.target = target
    
    def split_data(self):
        X_train, X_test, y_train, y_test = train_test_split(
            self.features, self.target, test_size=0.2, random_state=42
        )
        return X_train, X_test, y_train, y_test
    
    def build_model(self):
        X_train, X_test, y_train, y_test = self.split_data()
        
        model = AdaBoostClassifier(
            estimator=DecisionTreeClassifier(max_depth=1),
            n_estimators=50,
            random_state=42
        )
        
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f"Accuracy: {accuracy}")
        print(classification_report(y_test, y_pred))
    
        return model 




In [None]:
X = df_features
Y = df_target
builder = ModelBuilder(X,y)
model = builder.build_model()

Merker für nach Urlaub: Datensätze von Sensoren, die mehrere Aufzeichnungen haben, zusammenführen, um einen großen Datensatz zu erhalten. Daraus dann wieder Standardabweichungen berechnen und in das Modell geben