<h2>1. Imports and load data</h2>

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from torch.utils.data import DataLoader, TensorDataset


In [2]:
class DataProcessor:
    def __init__(self, input_path, file_names):
        self.input_path = input_path
        self.file_names = file_names
        
    def read_files(self):
        self.data = {}
        print("Reading files...")
        for file in self.file_names:
            with open(self.input_path + file + '.txt', 'r') as f:
                self.data[file] = pd.read_csv(f, header=None, sep='\t')
        return self.data
    
    def print_shape(self):
        print("Files read:")
        for file in self.data:
            print(f"{file}: {self.data[file].shape}")
            
    def create_target_df(self):
        target_columns = ['Cooler_Condition', 'Valve_Condition', 
                        'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                        'Stable_Flag']
        self.data['target'].columns = target_columns
        self.valve_condition = self.data['target']['Valve_Condition']
        #del self.data['target']
        return self.valve_condition

def process_data():
    input_path = "input_data/"
    file_names = [
        "ce", "cp", "eps1", "se", "vs1", 
        "fs1", "fs2", 
        "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
        "ts1", "ts2", "ts3", "ts4", "target"
    ]
    
    processor = DataProcessor(input_path, file_names)
    data = processor.read_files()
    processor.print_shape()
    df_target = processor.create_target_df()
    df_target = processor.valve_condition
    return data, df_target

data, df_target = process_data()

Files read:
ce: (2205, 60)
cp: (2205, 60)
eps1: (2205, 6000)
se: (2205, 60)
vs1: (2205, 60)
fs1: (2205, 600)
fs2: (2205, 600)
ps1: (2205, 6000)
ps2: (2205, 6000)
ps3: (2205, 6000)
ps4: (2205, 6000)
ps5: (2205, 6000)
ps6: (2205, 6000)
ts1: (2205, 60)
ts2: (2205, 60)
ts3: (2205, 60)
ts4: (2205, 60)
target: (2205, 5)


<h2>2. Create input and target data </h2>

We use the six sensors 'eps1', 'se', 'fs1', 'ps1', 'ps2', 'ps3' 

In [None]:
df_list = ['eps1', 'se', 'fs1', 'ps1', 'ps2', 'ps3']
input_df = pd.concat([data[i] for i in df_list], axis = 1)
input_df

Standardise the input and target data

In [9]:
# Standardise the target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(df_target)

# Standatdise the input
scaler = StandardScaler()
input_data_scaled = scaler.fit_transform(input_df)

In [6]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import torch
from tsai.all import *

# Aufteilen in Trainings- und Testdaten
X_train, X_test, y_train, y_test = train_test_split(input_data_scaled, y_encoded, test_size=0.2, random_state=27, stratify=y_encoded)

# Umwandeln in PyTorch-Tensoren
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Umwandeln der Daten in ein DataLoader Format für tsai
train_data = [(x, y) for x, y in zip(X_train_tensor, y_train_tensor)]
test_data = [(x, y) for x, y in zip(X_test_tensor, y_test_tensor)]

# DatLoader erstellen
train_dl = DataLoader(train_data, batch_size=32, shuffle=True)
test_dl = DataLoader(test_data, batch_size=32, shuffle=False)

# Modell mit tsai erstellen
# Wir nutzen das TabularModel, das speziell für tabellarische Daten geeignet ist
model = TabularModel(n_in=X_train.shape[1], n_out=len(set(y_encoded)), layers=[200, 100], ps=0.4)

# Training des Modells mit tsai
learn = Learner(dls=(train_dl, test_dl), model=model, loss_func=CrossEntropyLossFlat(), opt_func=Adam, metrics=[accuracy])

# Lernen und Training
learn.fit_one_cycle(100, lr_max=0.002)

# Vorhersagen für den Testdatensatz
y_pred, y_true = learn.get_preds(dl=test_dl)

# Berechnung der Genauigkeit und Ausgabe des Klassifikationsberichts
y_pred_classes = torch.argmax(y_pred, dim=1).cpu().numpy()
print(classification_report(y_true.cpu().numpy(), y_pred_classes, zero_division=0.0))

# Genauigkeit berechnen
accuracy = accuracy_score(y_true.cpu().numpy(), y_pred_classes)
print(f"Test Accuracy: {accuracy:.4f}")


ModuleNotFoundError: No module named 'tsai'