In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

#PS: Pressure, bar, 100 Hz --> 100 Messungen pro Sekunde
#EPS: Motor power, W, 100 Hz 
#FS: Volume flow, l/min, 10 Hz --> 10 Messungen pro Sekunde
#TS: Temperature, Celsius, 1 Hz --> 1 Messung pro Sekunde
#VS: Vibration, mm/s, 1Hz
#CE: Cooling efficiency (virtual), %, 1 Hz
#CP: Cooling power (virtual), kW, 1 Hz
#SE: Efficency factor, %, 1 Hz


class DataProcessor:
    def __init__(self, input_path, file_names):
        self.input_path = input_path
        self.file_names = file_names
        
    def read_files(self):
        self.data = {}
        print("Reading files...")
        for file in self.file_names:
            with open(self.input_path + file + '.txt', 'r') as f:
                self.data[file] = pd.read_csv(f, header=None, sep='\t')
        return self.data
    
    def print_shape(self):
        print("Files read:")
        for file in self.data:
            print(f"{file}: {self.data[file].shape}")
            
    def create_target_df(self):
        target_columns = ['Cooler_Condition', 'Valve_Condition', 
                        'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                        'Stable_Flag']
        self.data['target'].columns = target_columns
        self.valve_condition = self.data['target']['Valve_Condition']
        #del self.data['target']
        return self.valve_condition

def process_data():
    input_path = "input_data/"
    file_names = [
        "ce", "cp", "eps1", "se", "vs1", 
        "fs1", "fs2", 
        "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
        "ts1", "ts2", "ts3", "ts4", "target"
    ]
    
    processor = DataProcessor(input_path, file_names)
    data = processor.read_files()
    processor.print_shape()
    df_target = processor.create_target_df()
    df_target = processor.valve_condition
    return data, df_target

data, df_target = process_data()

Reading files...
Files read:
ce: (2205, 60)
cp: (2205, 60)
eps1: (2205, 6000)
se: (2205, 60)
vs1: (2205, 60)
fs1: (2205, 600)
fs2: (2205, 600)
ps1: (2205, 6000)
ps2: (2205, 6000)
ps3: (2205, 6000)
ps4: (2205, 6000)
ps5: (2205, 6000)
ps6: (2205, 6000)
ts1: (2205, 60)
ts2: (2205, 60)
ts3: (2205, 60)
ts4: (2205, 60)
target: (2205, 5)


<h1> tsfresh </h1>

In [40]:
df_features = data['ps3']
# df_features["time"] = df_features.index
# df_features["id"] = df_features.index

In [41]:
df_features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999
0,2.305,2.305,2.336,2.578,2.977,3.234,2.414,0.805,0,0,...,2.336,2.391,2.375,2.297,2.328,2.383,2.328,2.250,2.250,2.211
1,2.281,2.320,2.305,2.578,2.945,3.164,2.328,0.742,0,0,...,2.297,2.266,2.266,2.219,2.211,2.266,2.273,2.211,2.195,2.219
2,2.227,2.187,2.156,2.406,2.852,3.109,2.219,0.664,0,0,...,2.359,2.391,2.391,2.375,2.375,2.375,2.305,2.305,2.320,2.266
3,2.320,2.352,2.297,2.500,2.977,3.227,2.328,0.781,0,0,...,2.117,2.219,2.281,2.227,2.164,2.164,2.219,2.250,2.273,2.273
4,2.250,2.250,2.242,2.383,2.758,3.031,2.227,0.719,0,0,...,2.141,2.172,2.187,2.227,2.219,2.211,2.242,2.219,2.227,2.297
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2200,2.375,2.352,2.359,2.516,2.648,2.625,2.156,0.906,0,0,...,2.328,2.305,2.328,2.359,2.375,2.281,2.242,2.250,2.266,2.273
2201,2.273,2.266,2.352,2.539,2.664,2.742,2.273,0.992,0,0,...,2.273,2.383,2.359,2.297,2.297,2.336,2.406,2.461,2.461,2.406
2202,2.375,2.437,2.359,2.391,2.602,2.680,2.109,0.797,0,0,...,2.227,2.242,2.219,2.211,2.273,2.273,2.250,2.219,2.219,2.250
2203,2.305,2.414,2.469,2.523,2.680,2.711,2.078,0.766,0,0,...,2.328,2.328,2.328,2.281,2.266,2.305,2.281,2.250,2.242,2.281


In [42]:

from tsfresh import extract_features

# Beispiel-Daten: DataFrame laden
# data = pd.read_csv('your_data.csv')  # Falls du die Daten aus einer Datei hast

# DataFrame ins Long-Format transformieren
data_long = data['ce'].reset_index().melt(id_vars="index", var_name="time", value_name="value")
data_long.rename(columns={"index": "id"}, inplace=True)

# Ergebnis:
# Spalten: ['id', 'time', 'value']


In [43]:
from tsfresh import extract_features
from tsfresh.feature_selection import select_features

# Merkmalsextraktion mit tsfresh
features = extract_features(data_long, column_id="id", column_sort="time", column_value="value")
features_cleaned = features.dropna(axis=1) 
# Feature-Selektion basierend auf Zielwerten
selected_features = select_features(features_cleaned, y=df_target)  # labels = Zielvariablen (falls vorhanden)


Feature Extraction: 100%|██████████| 30/30 [01:11<00:00,  2.39s/it]


In [44]:
df_features = pd.DataFrame(selected_features)

In [45]:
df_features

Unnamed: 0,value__lempel_ziv_complexity__bins_2,"value__agg_autocorrelation__f_agg_""mean""__maxlag_40",value__absolute_sum_of_changes,"value__change_quantiles__f_agg_""mean""__isabs_True__qh_1.0__ql_0.0",value__mean_abs_change,value__approximate_entropy__m_2__r_0.5
0,0.233333,-0.030203,19.508,0.330644,0.330644,0.017177
1,0.233333,-0.016262,13.544,0.229559,0.229559,0.106531
2,0.250000,-0.054981,10.697,0.181305,0.181305,0.387071
3,0.300000,-0.148018,11.569,0.196085,0.196085,0.606029
4,0.316667,-0.024456,10.188,0.172678,0.172678,0.633615
...,...,...,...,...,...,...
2200,0.300000,-0.064284,8.622,0.146136,0.146136,0.691328
2201,0.283333,-0.133296,7.780,0.131864,0.131864,0.471079
2202,0.250000,-0.111876,7.645,0.129576,0.129576,0.298029
2203,0.266667,-0.054121,9.146,0.155017,0.155017,0.724266


In [48]:
df_features.replace([np.inf, -np.inf], np.nan, inplace=True)

In [49]:
df_features = df_features.dropna(how = "all", axis= "columns")

In [50]:
df_features = df_features.ffill(axis="index")

In [None]:
from string import ascii_letters
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style="white")

# Compute the correlation matrix
corr = d.corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})


In [51]:
from sklearn.feature_selection import VarianceThreshold

selector = VarianceThreshold()
df_features = selector.fit_transform(df_features)


In [52]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

  
states = [27, 6728, 49122]
accs = []
features = df_features
target = df_target

for RANDOM_STATE in states:
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size = 0.2, random_state = RANDOM_STATE, stratify = target
    )
    
    model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),
    n_estimators=50
    )
    
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    accs.append(accuracy_score(y_test, preds))
    print(f"Random State: {RANDOM_STATE}")
    print(classification_report(y_test, preds, zero_division=0.0))

accs_mean = round(np.mean(accs), 4)
accs_std = round(np.std(accs), 4)

print(f"Mean Accuracy: {accs_mean}")
print(f"Std Accuracy: {accs_std}")



Random State: 27
              precision    recall  f1-score   support

          73       0.00      0.00      0.00        72
          80       0.00      0.00      0.00        72
          90       0.00      0.00      0.00        72
         100       0.51      0.97      0.66       225

    accuracy                           0.49       441
   macro avg       0.13      0.24      0.17       441
weighted avg       0.26      0.49      0.34       441

Random State: 6728
              precision    recall  f1-score   support

          73       0.67      0.03      0.05        72
          80       0.38      0.04      0.07        72
          90       0.14      0.01      0.03        72
         100       0.52      0.98      0.68       225

    accuracy                           0.51       441
   macro avg       0.43      0.27      0.21       441
weighted avg       0.46      0.51      0.37       441

Random State: 49122
              precision    recall  f1-score   support

          73       

