In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

#PS: Pressure, bar, 100 Hz --> 100 Messungen pro Sekunde
#EPS: Motor power, W, 100 Hz 
#FS: Volume flow, l/min, 10 Hz --> 10 Messungen pro Sekunde
#TS: Temperature, Celsius, 1 Hz --> 1 Messung pro Sekunde
#VS: Vibration, mm/s, 1Hz
#CE: Cooling efficiency (virtual), %, 1 Hz
#CP: Cooling power (virtual), kW, 1 Hz
#SE: Efficency factor, %, 1 Hz


class DataProcessor:
    def __init__(self, input_path, file_names):
        self.input_path = input_path
        self.file_names = file_names
        
    def read_files(self):
        self.data = {}
        print("Reading files...")
        for file in self.file_names:
            with open(self.input_path + file + '.txt', 'r') as f:
                self.data[file] = pd.read_csv(f, header=None, sep='\t')
        return self.data
    
    def print_shape(self):
        print("Files read:")
        for file in self.data:
            print(f"{file}: {self.data[file].shape}")
            
    def create_target_df(self):
        target_columns = ['Cooler_Condition', 'Valve_Condition', 
                        'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                        'Stable_Flag']
        self.data['target'].columns = target_columns
        self.valve_condition = self.data['target']['Valve_Condition']
        #del self.data['target']
        return self.valve_condition

def process_data():
    input_path = "input_data/"
    file_names = [
        "ce", "cp", "eps1", "se", "vs1", 
        "fs1", "fs2", 
        "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
        "ts1", "ts2", "ts3", "ts4", "target"
    ]
    
    processor = DataProcessor(input_path, file_names)
    data = processor.read_files()
    processor.print_shape()
    df_target = processor.create_target_df()
    df_target = processor.valve_condition
    return data, df_target

data, df_target = process_data()

In [None]:
import pandas as pd
from tsfel.feature_extraction import features
import tsfel

# Trennen von Input (X) und Target (y)
X = data['ce']
y = df_target

# tsfel Konfiguration für die Feature-Extraktion
cfg = tsfel.get_features_by_domain()  # Alle Domains
extracted_features = []

# Iteration über jede Zeile und Feature-Extraktion
for _, row in X.iterrows():
    features_row = tsfel.time_series_features_extractor(cfg, row.values)
    extracted_features.append(features_row)

# Features in einen DataFrame konvertieren
features_df = pd.DataFrame(extracted_features)

# Hinzufügen der Targets
features_df['Target'] = y.values

# Jetzt kannst du features_df für Modelltraining verwenden.


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Aufteilen in Trainings- und Testdaten
X_train, X_test, y_train, y_test = train_test_split(
    features_df.drop(columns=['Target']),
    features_df['Target'],
    test_size=0.2,
    random_state=42
)

# Modell trainieren
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Vorhersagen
predictions = model.predict(X_test)

<h1> tsfresh </h1>

In [None]:
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d

# Upsampling-Zeitstempel (100 Hz für 60 Sekunden)
time_original = np.linspace(0, 60, 60)  # Originalzeit: 0 bis 60 Sekunden mit 1 Sekunde Abstand
time_upsampled = np.linspace(0, 60, 6000)  # Zeit für 100 Hz Upsampling

# Erstelle den DataFrame für die upgesampleten Signale
df_se = pd.DataFrame(index=range(data['se'].shape[0]), columns=range(len(time_upsampled)))

# Interpolation und Upsampling
for i in range(data['se'].shape[0]):
    signal_original = data['se'].iloc[i]  # Originalsignal aus der aktuellen Zeile
    
    # Interpolation (lineare Interpolation)
    linear_interpolator = interp1d(time_original, signal_original, kind='linear')
    signal_upsampled = linear_interpolator(time_upsampled)
    
    # Zuweisen des upgesampleten Signals in den DataFrame
    df_se.iloc[i] = signal_upsampled  # Jetzt funktioniert die Zuweisung

# Überprüfen der upgesampleten Daten
print(df_se.tail())
# Beispiel für das upgesamplete Signal der ersten Zeile


In [28]:
df_upsampled = pd.DataFrame(df_se)

In [None]:
df_upsampled

In [30]:

from tsfresh import extract_features

# Beispiel-Daten: DataFrame laden
# data = pd.read_csv('your_data.csv')  # Falls du die Daten aus einer Datei hast

# DataFrame ins Long-Format transformieren
data_long = data['ps3'].reset_index().melt(id_vars="index", var_name="time", value_name="value")
data_long.rename(columns={"index": "id"}, inplace=True)

# Ergebnis:
# Spalten: ['id', 'time', 'value']


In [None]:
data_long

In [6]:
df_features = data['ps3']
df_features["time"] = df_features.index
df_features["id"] = df_features.index

In [32]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y_encoded = encoder.fit_transform(df_target)

In [None]:
from tsfresh import extract_features
from tsfresh.feature_selection import select_features

# Merkmalsextraktion mit tsfresh
features = extract_features(data_long, column_id="id", column_sort="time", column_value="value")
features_cleaned = features.dropna(axis=1) 
# Feature-Selektion basierend auf Zielwerten
selected_features = select_features(features_cleaned, y=y_encoded)  # labels = Zielvariablen (falls vorhanden)


: 

In [6]:
df_features = pd.DataFrame(selected_features)

In [None]:
df_features

In [32]:
df_features.replace([np.inf, -np.inf], np.nan, inplace=True)

In [33]:
df_features = df_features.dropna(how = "all", axis= "columns")

In [34]:
df_features = df_features.ffill(axis="index")

In [None]:
from string import ascii_letters
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style="white")

# Compute the correlation matrix
corr = df_features.corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})


In [8]:
from sklearn.feature_selection import VarianceThreshold

selector = VarianceThreshold()
df_features = selector.fit_transform(df_features)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

  
states = [27, 6728, 49122]
accs = []
features = df_features
target = y_encoded

for RANDOM_STATE in states:
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size = 0.2, random_state = RANDOM_STATE, stratify = target
    )
    
    model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),
    n_estimators=50
    )
    
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    accs.append(accuracy_score(y_test, preds))
    print(f"Random State: {RANDOM_STATE}")
    print(classification_report(y_test, preds, zero_division=0.0))

accs_mean = round(np.mean(accs), 4)
accs_std = round(np.std(accs), 4)

print(f"Mean Accuracy: {accs_mean}")
print(f"Std Accuracy: {accs_std}")