In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from scipy.signal import decimate

#PS: Pressure, bar, 100 Hz --> 100 Messungen pro Sekunde
#EPS: Motor power, W, 100 Hz 
#FS: Volume flow, l/min, 10 Hz --> 10 Messungen pro Sekunde
#TS: Temperature, Celsius, 1 Hz --> 1 Messung pro Sekunde
#VS: Vibration, mm/s, 1Hz
#CE: Cooling efficiency (virtual), %, 1 Hz
#CP: Cooling power (virtual), kW, 1 Hz
#SE: Efficency factor, %, 1 Hz


class DataProcessor:
    def __init__(self, input_path, file_names):
        self.input_path = input_path
        self.file_names = file_names
        
    def read_files(self):
        self.data = {}
        print("Reading files...")
        for file in self.file_names:
            with open(self.input_path + file + '.txt', 'r') as f:
                self.data[file] = pd.read_csv(f, header=None, sep='\t')
        return self.data
    
    def print_shape(self):
        print("Files read:")
        for file in self.data:
            print(f"{file}: {self.data[file].shape}")
            
    def create_target_df(self):
        target_columns = ['Cooler_Condition', 'Valve_Condition', 
                        'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                        'Stable_Flag']
        self.data['target'].columns = target_columns
        self.valve_condition = self.data['target']['Valve_Condition']
        #del self.data['target']
        return self.valve_condition

def process_data():
    input_path = "input_data/"
    file_names = [
        "ce", "cp", "eps1", "se", "vs1", 
        "fs1", "fs2", 
        "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
        "ts1", "ts2", "ts3", "ts4", "target"
    ]
    
    processor = DataProcessor(input_path, file_names)
    data = processor.read_files()
    processor.print_shape()
    df_target = processor.create_target_df()
    df_target = processor.valve_condition
    return data, df_target

data, df_target = process_data()

<h1> tsfresh </h1>

In [None]:
df_se = data['se']
df_se['target'] = df_target
df_se['id'] = df_se.index
df_se.shape

In [4]:
df_fs1 = data['fs1']

In [5]:
downsample_factor = 10 
filtered_signals = []

for i in range(df_fs1.shape[0]):
    row = df_fs1.iloc[i].values  # Extract row as a 1D array
    filtered_signal = decimate(row, downsample_factor, ftype='fir')  # Downsample
    filtered_signals.append(filtered_signal)  # Store the result

# Create a new DataFrame with the filtered signals
df_fs1_ds = pd.DataFrame(filtered_signals)
    

In [None]:
df_fs1_ds['target'] = df_target
df_fs1_ds["id"] = df_fs1_ds.index
df_fs1_ds.shape

In [7]:
df_ps3 = data['ps3']

downsample_factor = 100 
filtered_signals = []

for i in range(df_ps3.shape[0]):
    row = df_ps3.iloc[i].values  # Extract row as a 1D array
    filtered_signal = decimate(row, downsample_factor, ftype='fir')  # Downsample
    filtered_signals.append(filtered_signal)  # Store the result

# Create a new DataFrame with the filtered signals
df_ps3_ds = pd.DataFrame(filtered_signals)

In [None]:
df_ps3_ds['target'] = df_target
df_ps3_ds["id"] = df_ps3_ds.index
df_ps3_ds.shape

In [10]:
df_combined = pd.concat([df_se, df_fs1_ds, df_ps3_ds], ignore_index=True)

In [11]:
df_target = df_combined['target']
df_combined.drop(columns=['target'], inplace= True)

In [None]:
df_combined

In [12]:
df_long = pd.melt(df_combined, id_vars=['id'], var_name='time', value_name='value')

In [None]:
df_long

In [53]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
df_target = encoder.fit_transform(df_target)

In [None]:
df_target.shape

In [None]:
df_combined.shape

In [None]:
from tsfresh import extract_features
from tsfresh.feature_selection import select_features

# Merkmalsextraktion mit tsfresh
features = extract_features(df_long, column_id="id", column_sort="time")
features_cleaned = features.dropna(axis=1)

In [None]:
features_cleaned

In [66]:
df_target = data['target']['Valve_Condition']

In [65]:
# Feature-Selektion basierend auf Zielwerten
selected_features = select_features(features_cleaned, y=df_target)  # labels = Zielvariablen (falls vorhanden)

In [None]:
selected_features

In [68]:
df_features = pd.DataFrame(selected_features)

In [74]:
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(df_target)

In [70]:
df_features.replace([np.inf, -np.inf], np.nan, inplace=True)

In [71]:
df_features = df_features.dropna(how = "all", axis= "columns")

In [72]:
df_features = df_features.ffill(axis="index")

In [None]:
from sklearn.feature_selection import VarianceThreshold

selector = VarianceThreshold()
df_features = selector.fit_transform(df_features)


In [None]:
print(df_features.shape, y_encoded.shape)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

  
states = [27, 6728, 49122]
accs = []
features = df_features
target = y_encoded

for RANDOM_STATE in states:
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size = 0.2, random_state = RANDOM_STATE, stratify = target
    )
    
    model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=2),
    n_estimators=30
    )
    
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    accs.append(accuracy_score(y_test, preds))
    print(f"Random State: {RANDOM_STATE}")
    print(classification_report(y_test, preds, zero_division=0.0))

accs_mean = round(np.mean(accs), 4)
accs_std = round(np.std(accs), 4)

print(f"Mean Accuracy: {accs_mean}")
print(f"Std Accuracy: {accs_std}")

In [None]:
from sklearn import svm

states = [27, 6728, 49122]
accs = []
features = df_features
target = y_encoded

for RANDOM_STATE in states:
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size = 0.2, random_state = RANDOM_STATE, stratify = target
    )
    
    clf = svm.SVC(kernel='linear')
    
    clf.fit(X_train, y_train)
    preds = model.predict(X_test)
    accs.append(accuracy_score(y_test, preds))
    print(f"Random State: {RANDOM_STATE}")
    print(classification_report(y_test, preds, zero_division=0.0))

accs_mean = round(np.mean(accs), 4)
accs_std = round(np.std(accs), 4)

print(f"Mean Accuracy: {accs_mean}")
print(f"Std Accuracy: {accs_std}")

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

states = [27, 6728, 49122]
accs = []

features = df_features
target = y_encoded

for RANDOM_STATE in states:
 
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size=0.2, random_state=RANDOM_STATE, stratify=target
    )
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train, y_train)
    
    preds = knn.predict(X_test)  # Korrigiert von `model.predict` zu `knn.predict`
    accs.append(accuracy_score(y_test, preds))
    
    # Ergebnisse ausgeben
    print(f"Random State: {RANDOM_STATE}")
    print(classification_report(y_test, preds, zero_division=0.0))


accs_mean = round(np.mean(accs), 4)
accs_std = round(np.std(accs), 4)

print(f"Mean Accuracy: {accs_mean}")
print(f"Std Accuracy: {accs_std}")