In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

#PS: Pressure, bar, 100 Hz --> 100 Messungen pro Sekunde
#EPS: Motor power, W, 100 Hz 
#FS: Volume flow, l/min, 10 Hz --> 10 Messungen pro Sekunde
#TS: Temperature, Celsius, 1 Hz --> 1 Messung pro Sekunde
#VS: Vibration, mm/s, 1Hz
#CE: Cooling efficiency (virtual), %, 1 Hz
#CP: Cooling power (virtual), kW, 1 Hz
#SE: Efficency factor, %, 1 Hz


class DataProcessor:
    def __init__(self, input_path, file_names):
        self.input_path = input_path
        self.file_names = file_names
        
    def read_files(self):
        self.data = {}
        print("Reading files...")
        for file in self.file_names:
            with open(self.input_path + file + '.txt', 'r') as f:
                self.data[file] = pd.read_csv(f, header=None, sep='\t')
        return self.data
    
    def print_shape(self):
        print("Files read:")
        for file in self.data:
            print(f"{file}: {self.data[file].shape}")
            
    def create_target_df(self):
        target_columns = ['Cooler_Condition', 'Valve_Condition', 
                        'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                        'Stable_Flag']
        self.data['target'].columns = target_columns
        self.valve_condition = self.data['target']['Valve_Condition']
        #del self.data['target']
        return self.valve_condition

def process_data():
    input_path = "input_data/"
    file_names = [
        "ce", "cp", "eps1", "se", "vs1", 
        "fs1", "fs2", 
        "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
        "ts1", "ts2", "ts3", "ts4", "target"
    ]
    
    processor = DataProcessor(input_path, file_names)
    data = processor.read_files()
    processor.print_shape()
    df_target = processor.create_target_df()
    df_target = processor.valve_condition
    return data, df_target

data, df_target = process_data()

Reading files...
Files read:
ce: (2205, 60)
cp: (2205, 60)
eps1: (2205, 6000)
se: (2205, 60)
vs1: (2205, 60)
fs1: (2205, 600)
fs2: (2205, 600)
ps1: (2205, 6000)
ps2: (2205, 6000)
ps3: (2205, 6000)
ps4: (2205, 6000)
ps5: (2205, 6000)
ps6: (2205, 6000)
ts1: (2205, 60)
ts2: (2205, 60)
ts3: (2205, 60)
ts4: (2205, 60)
target: (2205, 5)


<h1> tsfresh </h1>

In [3]:
df_se = data['se']
df_se['target'] = df_target
df_se['id'] = df_se.index
df_se.shape

(2205, 62)

In [4]:
df_fs1 = data['fs1']

In [5]:
from scipy.signal import decimate
downsample_factor = 10 
filtered_signals = []

for i in range(df_fs1.shape[0]):
    row = df_fs1.iloc[i].values  # Extract row as a 1D array
    filtered_signal = decimate(row, downsample_factor, ftype='fir')  # Downsample
    filtered_signals.append(filtered_signal)  # Store the result

# Create a new DataFrame with the filtered signals
df_fs1_ds = pd.DataFrame(filtered_signals)
    

In [6]:
df_fs1_ds['target'] = df_target
df_fs1_ds["id"] = df_fs1_ds.index
df_fs1_ds.shape

(2205, 62)

In [7]:
df_ps3 = data['ps3']

downsample_factor = 100 
filtered_signals = []

for i in range(df_ps3.shape[0]):
    row = df_ps3.iloc[i].values  # Extract row as a 1D array
    filtered_signal = decimate(row, downsample_factor, ftype='fir')  # Downsample
    filtered_signals.append(filtered_signal)  # Store the result

# Create a new DataFrame with the filtered signals
df_ps3_ds = pd.DataFrame(filtered_signals)

In [8]:
df_ps3_ds['target'] = df_target
df_ps3_ds["id"] = df_ps3_ds.index
df_ps3_ds.shape

(2205, 62)

In [10]:
df_combined = pd.concat([df_se, df_fs1_ds, df_ps3_ds], ignore_index=True)

In [11]:
df_target = df_combined['target']
df_combined.drop(columns=['target'], inplace= True)

In [15]:
df_combined

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,id
0,68.039000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,69.141000,...,68.101000,68.101000,68.420000,68.420000,68.223000,68.223000,68.159000,68.159000,68.264000,0
1,68.264000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,65.715000,...,68.536000,68.536000,68.465000,68.465000,68.491000,68.491000,68.528000,68.528000,68.595000,1
2,68.595000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,67.320000,...,68.901000,68.901000,68.805000,68.805000,68.456000,68.456000,68.758000,68.758000,68.628000,2
3,68.628000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,68.991000,...,68.860000,68.860000,68.946000,68.946000,69.021000,69.021000,68.851000,68.851000,68.868000,3
4,68.868000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,59.523000,...,68.483000,68.483000,68.819000,68.819000,68.862000,68.862000,69.036000,69.036000,68.972000,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6610,0.172926,0.011496,-0.018428,0.024396,-0.043644,0.063811,-0.104500,0.168088,-0.308903,0.980114,...,2.317221,2.344719,2.342248,2.311307,2.359219,2.300111,2.390136,2.225378,2.583649,2200
6611,0.174585,0.011612,-0.018216,0.023935,-0.042724,0.062371,-0.102110,0.162070,-0.301379,0.960469,...,2.351193,2.298340,2.330884,2.312942,2.351669,2.291068,2.402735,2.230432,2.545339,2201
6612,0.170944,0.011664,-0.018810,0.025126,-0.044919,0.065827,-0.107497,0.169427,-0.318937,1.016304,...,2.301769,2.340973,2.351554,2.324610,2.339859,2.286507,2.403170,2.205626,2.521674,2202
6613,0.173025,0.011461,-0.018365,0.024314,-0.043512,0.063590,-0.104051,0.163943,-0.308924,0.981163,...,2.350127,2.354437,2.351759,2.355719,2.375095,2.312541,2.435032,2.259279,2.562707,2203


In [12]:
df_long = pd.melt(df_combined, id_vars=['id'], var_name='time', value_name='value')

In [13]:
df_long

Unnamed: 0,id,time,value
0,0,0,68.039000
1,1,0,68.264000
2,2,0,68.595000
3,3,0,68.628000
4,4,0,68.868000
...,...,...,...
396895,2200,59,2.583649
396896,2201,59,2.545339
396897,2202,59,2.521674
396898,2203,59,2.562707


In [53]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
df_target = encoder.fit_transform(df_target)

In [56]:
df_target.shape

(6615,)

In [57]:
df_combined.shape

(6615, 61)

In [14]:
from tsfresh import extract_features
from tsfresh.feature_selection import select_features

# Merkmalsextraktion mit tsfresh
features = extract_features(df_long, column_id="id", column_sort="time")
features_cleaned = features.dropna(axis=1)

Feature Extraction: 100%|██████████| 30/30 [01:54<00:00,  3.82s/it]


In [59]:
features_cleaned

Unnamed: 0,value__variance_larger_than_standard_deviation,value__has_duplicate_max,value__has_duplicate_min,value__has_duplicate,value__sum_values,value__abs_energy,value__mean_abs_change,value__mean_change,value__mean_second_derivative_central,value__median,...,value__fourier_entropy__bins_3,value__fourier_entropy__bins_5,value__fourier_entropy__bins_10,value__fourier_entropy__bins_100,value__permutation_entropy__dimension_3__tau_1,value__permutation_entropy__dimension_4__tau_1,value__permutation_entropy__dimension_5__tau_1,value__permutation_entropy__dimension_6__tau_1,value__permutation_entropy__dimension_7__tau_1,value__mean_n_absolute_max__number_of_maxima_7
0,1.0,1.0,0.0,1.0,4067.509508,246746.349547,30.815722,-0.366322,0.003442,7.755421,...,0.120874,0.165893,0.344705,1.839205,1.753243,2.939832,4.022800,4.681511,4.966788,73.954571
1,1.0,1.0,0.0,1.0,4077.633090,248280.121739,30.923486,-0.367472,0.003184,7.769700,...,0.120874,0.165893,0.344705,1.842939,1.749839,2.946953,3.973445,4.667507,4.982722,74.407000
2,1.0,1.0,0.0,1.0,4090.050785,249947.281542,31.034725,-0.369242,0.004072,7.753248,...,0.120874,0.165893,0.344705,1.836474,1.752428,3.002585,4.040713,4.681906,4.971350,74.612286
3,1.0,1.0,0.0,1.0,4103.766486,252026.221681,31.193110,-0.369850,0.003289,7.754219,...,0.120874,0.165893,0.344705,1.842939,1.745121,2.921272,3.949570,4.605680,4.915978,74.960571
4,1.0,1.0,0.0,1.0,4080.139615,249420.215623,31.054908,-0.371252,0.003760,7.762190,...,0.120874,0.165893,0.344705,1.842939,1.754574,2.972914,4.035809,4.686047,4.947449,74.992857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2200,1.0,1.0,0.0,1.0,4059.455427,245488.139593,30.767409,-0.365678,0.003955,7.748896,...,0.120874,0.165893,0.344705,1.813669,1.744995,2.941994,3.990251,4.739059,5.031580,72.967286
2201,1.0,1.0,0.0,1.0,4061.528521,245745.636475,30.785160,-0.366976,0.004793,7.773995,...,0.120874,0.165893,0.344705,1.813669,1.757278,2.937750,3.949484,4.632435,4.942489,73.058286
2202,1.0,1.0,0.0,1.0,4065.174664,246187.353301,30.802732,-0.366326,0.004519,7.760359,...,0.120874,0.165893,0.344705,1.813669,1.754237,2.965355,3.962111,4.659687,4.963383,73.062143
2203,1.0,1.0,0.0,1.0,4055.725012,244948.040348,30.729858,-0.366035,0.004622,7.747842,...,0.120874,0.165893,0.344705,1.813669,1.751019,2.963961,4.021593,4.661131,4.950456,72.838714


In [66]:
df_target = data['target']['Valve_Condition']

In [65]:
# Feature-Selektion basierend auf Zielwerten
selected_features = select_features(features_cleaned, y=df_target)  # labels = Zielvariablen (falls vorhanden)

In [67]:
selected_features

Unnamed: 0,value__number_crossing_m__m_1,"value__cwt_coefficients__coeff_14__w_2__widths_(2, 5, 10, 20)","value__cwt_coefficients__coeff_11__w_2__widths_(2, 5, 10, 20)","value__cwt_coefficients__coeff_10__w_2__widths_(2, 5, 10, 20)","value__cwt_coefficients__coeff_13__w_2__widths_(2, 5, 10, 20)",value__range_count__max_1__min_-1,"value__cwt_coefficients__coeff_8__w_2__widths_(2, 5, 10, 20)",value__minimum,"value__change_quantiles__f_agg_""mean""__isabs_True__qh_0.2__ql_0.0","value__agg_linear_trend__attr_""slope""__chunk_len_50__f_agg_""min""",...,"value__fft_coefficient__attr_""real""__coeff_50","value__fft_coefficient__attr_""real""__coeff_45",value__ar_coefficient__coeff_5__k_10,"value__agg_linear_trend__attr_""stderr""__chunk_len_10__f_agg_""var""","value__fft_coefficient__attr_""abs""__coeff_67","value__fft_coefficient__attr_""abs""__coeff_45","value__agg_linear_trend__attr_""stderr""__chunk_len_10__f_agg_""max""","value__fft_coefficient__attr_""real""__coeff_14",value__number_crossing_m__m_0,"value__fft_coefficient__attr_""imag""__coeff_84"
0,2.0,-0.149719,0.081495,0.085304,-0.156396,25.0,-0.579115,-0.963714,0.227741,0.966110,...,89.307551,-460.773619,0.018696,9.432143,361.882079,462.975078,0.722850,-25.797348,12.0,-436.496970
1,2.0,-0.138969,0.075790,0.076920,-0.142424,25.0,-0.579146,-0.866528,0.209177,0.942554,...,87.932649,-469.636689,0.017523,9.568810,365.202068,471.621747,0.719165,-25.537871,12.0,-443.259662
2,2.0,-0.141720,0.076443,0.078926,-0.146655,25.0,-0.578669,-0.899187,0.214322,0.957842,...,91.284373,-468.822462,0.018707,9.608400,365.515858,470.850913,0.723671,-26.878956,12.0,-442.461926
3,2.0,-0.149903,0.081587,0.085240,-0.155728,25.0,-0.582340,-0.954869,0.224653,0.954323,...,89.248751,-467.871974,0.017857,9.698960,367.667553,469.959030,0.730762,-27.867852,12.0,-443.923375
4,2.0,-0.123589,0.065107,0.063637,-0.121266,26.0,-0.571484,-0.694937,0.174617,0.875592,...,92.628336,-478.588830,0.015374,9.875627,369.024946,479.784176,0.720172,-26.544482,12.0,-452.457814
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2200,4.0,-0.118460,0.066441,0.064629,-0.122043,27.0,-0.573564,-0.737236,0.182402,0.906819,...,95.881660,-470.943109,0.012993,9.676713,370.253555,471.385420,0.705312,-19.784339,12.0,-445.987606
2201,4.0,-0.117116,0.065559,0.063824,-0.120956,27.0,-0.573541,-0.733221,0.181683,0.914169,...,97.356886,-472.383285,0.012578,9.707276,370.332705,472.791185,0.706503,-20.661267,12.0,-446.655660
2202,2.0,-0.125756,0.068937,0.069717,-0.131729,26.0,-0.569417,-0.816544,0.193725,0.925311,...,94.941448,-467.956427,0.013495,9.617253,367.509100,468.572107,0.707527,-22.035656,12.0,-443.212972
2203,4.0,-0.115751,0.062316,0.062008,-0.119995,27.0,-0.563035,-0.736127,0.177751,0.923197,...,95.306749,-469.747879,0.013144,9.660674,369.639520,470.173611,0.704380,-21.525125,12.0,-445.175483


In [68]:
df_features = pd.DataFrame(selected_features)

In [74]:
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(df_target)

In [70]:
df_features.replace([np.inf, -np.inf], np.nan, inplace=True)

In [71]:
df_features = df_features.dropna(how = "all", axis= "columns")

In [72]:
df_features = df_features.ffill(axis="index")

In [16]:
from sklearn.feature_selection import VarianceThreshold

selector = VarianceThreshold()
df_features = selector.fit_transform(df_features)


NameError: name 'df_features' is not defined

In [75]:
print(df_features.shape, y_encoded.shape)

(2205, 683) (2205,)


In [80]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

  
states = [27, 6728, 49122]
accs = []
features = df_features
target = y_encoded

for RANDOM_STATE in states:
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size = 0.2, random_state = RANDOM_STATE, stratify = target
    )
    
    model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=2),
    n_estimators=30
    )
    
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    accs.append(accuracy_score(y_test, preds))
    print(f"Random State: {RANDOM_STATE}")
    print(classification_report(y_test, preds, zero_division=0.0))

accs_mean = round(np.mean(accs), 4)
accs_std = round(np.std(accs), 4)

print(f"Mean Accuracy: {accs_mean}")
print(f"Std Accuracy: {accs_std}")



Random State: 27
              precision    recall  f1-score   support

           0       1.00      0.99      0.99        72
           1       0.99      1.00      0.99        72
           2       0.92      0.78      0.84        72
           3       0.93      0.98      0.95       225

    accuracy                           0.95       441
   macro avg       0.96      0.94      0.95       441
weighted avg       0.95      0.95      0.95       441





Random State: 6728
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        72
           1       1.00      1.00      1.00        72
           2       0.88      0.88      0.88        72
           3       0.96      0.96      0.96       225

    accuracy                           0.96       441
   macro avg       0.96      0.96      0.96       441
weighted avg       0.96      0.96      0.96       441





Random State: 49122
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        72
           1       0.97      1.00      0.99        72
           2       0.51      0.94      0.66        72
           3       0.99      0.71      0.83       225

    accuracy                           0.84       441
   macro avg       0.87      0.91      0.87       441
weighted avg       0.91      0.84      0.85       441

Mean Accuracy: 0.9176
Std Accuracy: 0.0525


In [81]:
from sklearn import svm

states = [27, 6728, 49122]
accs = []
features = df_features
target = y_encoded

for RANDOM_STATE in states:
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size = 0.2, random_state = RANDOM_STATE, stratify = target
    )
    
    clf = svm.SVC(kernel='linear')
    
    clf.fit(X_train, y_train)
    preds = model.predict(X_test)
    accs.append(accuracy_score(y_test, preds))
    print(f"Random State: {RANDOM_STATE}")
    print(classification_report(y_test, preds, zero_division=0.0))

accs_mean = round(np.mean(accs), 4)
accs_std = round(np.std(accs), 4)

print(f"Mean Accuracy: {accs_mean}")
print(f"Std Accuracy: {accs_std}")

Random State: 27
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        72
           1       1.00      1.00      1.00        72
           2       0.50      0.97      0.66        72
           3       0.99      0.69      0.81       225

    accuracy                           0.84       441
   macro avg       0.87      0.92      0.87       441
weighted avg       0.91      0.84      0.85       441

Random State: 6728
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        72
           1       1.00      1.00      1.00        72
           2       0.55      0.90      0.68        72
           3       0.96      0.76      0.85       225

    accuracy                           0.86       441
   macro avg       0.88      0.92      0.88       441
weighted avg       0.91      0.86      0.87       441

Random State: 49122
              precision    recall  f1-score   support

           0       

In [82]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

states = [27, 6728, 49122]
accs = []

features = df_features
target = y_encoded

for RANDOM_STATE in states:
 
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size=0.2, random_state=RANDOM_STATE, stratify=target
    )
    
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train, y_train)
    
    
    preds = knn.predict(X_test)  # Korrigiert von `model.predict` zu `knn.predict`
    accs.append(accuracy_score(y_test, preds))
    
    # Ergebnisse ausgeben
    print(f"Random State: {RANDOM_STATE}")
    print(classification_report(y_test, preds, zero_division=0.0))


accs_mean = round(np.mean(accs), 4)
accs_std = round(np.std(accs), 4)

print(f"Mean Accuracy: {accs_mean}")
print(f"Std Accuracy: {accs_std}")

Random State: 27
              precision    recall  f1-score   support

           0       1.00      0.99      0.99        72
           1       0.97      0.99      0.98        72
           2       0.91      0.89      0.90        72
           3       0.97      0.98      0.97       225

    accuracy                           0.97       441
   macro avg       0.96      0.96      0.96       441
weighted avg       0.97      0.97      0.97       441

Random State: 6728
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        72
           1       1.00      1.00      1.00        72
           2       0.91      0.85      0.88        72
           3       0.95      0.97      0.96       225

    accuracy                           0.96       441
   macro avg       0.97      0.96      0.96       441
weighted avg       0.96      0.96      0.96       441

Random State: 49122
              precision    recall  f1-score   support

           0       