<h1> Classification with a Neural Network using Keras (Sequential API)</h1>

<h2>1. Imports and load data</h2>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from aeon.classification.deep_learning import TimeCNNClassifier
from tensorflow.keras.callbacks import EarlyStopping
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
class DataProcessor:
    def __init__(self, input_path, file_names):
        self.input_path = input_path
        self.file_names = file_names
        
    def read_files(self):
        self.data = {}
        print("Reading files...")
        for file in self.file_names:
            with open(self.input_path + file + '.txt', 'r') as f:
                self.data[file] = pd.read_csv(f, header=None, sep='\t')
        return self.data
    
    def print_shape(self):
        print("Files read:")
        for file in self.data:
            print(f"{file}: {self.data[file].shape}")
            
    def create_target_df(self):
        target_columns = ['Cooler_Condition', 'Valve_Condition', 
                        'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                        'Stable_Flag']
        self.data['target'].columns = target_columns
        self.valve_condition = self.data['target']['Valve_Condition']
        #del self.data['target']
        return self.valve_condition

def process_data():
    input_path = "input_data/"
    file_names = [
        "ce", "cp", "eps1", "se", "vs1", 
        "fs1", "fs2", 
        "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
        "ts1", "ts2", "ts3", "ts4", "target"
    ]
    
    processor = DataProcessor(input_path, file_names)
    data = processor.read_files()
    processor.print_shape()
    df_target = processor.create_target_df()
    df_target = processor.valve_condition
    return data, df_target

data, df_target = process_data()

Reading files...
Files read:
ce: (2205, 60)
cp: (2205, 60)
eps1: (2205, 6000)
se: (2205, 60)
vs1: (2205, 60)
fs1: (2205, 600)
fs2: (2205, 600)
ps1: (2205, 6000)
ps2: (2205, 6000)
ps3: (2205, 6000)
ps4: (2205, 6000)
ps5: (2205, 6000)
ps6: (2205, 6000)
ts1: (2205, 60)
ts2: (2205, 60)
ts3: (2205, 60)
ts4: (2205, 60)
target: (2205, 5)


<h2>2. Create input and target data </h2>

We use the six sensors which we identified as relevant during data exploration: 'eps1', 'se', 'fs1', 'ps1', 'ps2', 'ps3' 

In [3]:
df_list = ['eps1', 'se', 'fs1', 'ps1', 'ps2', 'ps3']
input_df = pd.concat([data[i] for i in df_list], axis = 1)
features = input_df.to_numpy()
features

array([[2.4116e+03, 2.4116e+03, 2.4116e+03, ..., 2.2500e+00, 2.2500e+00,
        2.2110e+00],
       [2.4096e+03, 2.4096e+03, 2.4096e+03, ..., 2.2110e+00, 2.1950e+00,
        2.2190e+00],
       [2.3978e+03, 2.3978e+03, 2.3978e+03, ..., 2.3050e+00, 2.3200e+00,
        2.2660e+00],
       ...,
       [2.4136e+03, 2.4136e+03, 2.4136e+03, ..., 2.2190e+00, 2.2190e+00,
        2.2500e+00],
       [2.4136e+03, 2.4136e+03, 2.4136e+03, ..., 2.2500e+00, 2.2420e+00,
        2.2810e+00],
       [2.4158e+03, 2.4156e+03, 2.4156e+03, ..., 2.2730e+00, 2.2270e+00,
        2.2500e+00]])

In [4]:
features.shape

(2205, 24660)

In [5]:
# Standardise the target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(df_target)
y_encoded


array([3, 3, 3, ..., 3, 3, 3], dtype=int64)

<h2>3. Create the model, train it & make predictions </h2>

In [6]:
import os
from pathlib import Path
import torch
import numpy as np
from torchaudio.transforms import Spectrogram
from torchvision.utils import save_image
from sklearn.model_selection import train_test_split

# Annahme: features ist ein 2D-Array mit Form (1764, 24660)
X_train, X_test, y_train, y_test = train_test_split(
    features, 
    y_encoded, 
    test_size=0.2, 
    random_state=42, 
    stratify=y_encoded
)

base_dir = Path("data")
os.makedirs(base_dir / "train", exist_ok=True)

transform = Spectrogram(n_fft=32)

# Annahme: 8 Kanäle pro Sample
num_channels = 1
time_length = X_train.shape[1] // num_channels  # 24660 // 8 = 3082

for idx, (sample, label) in enumerate(zip(X_train, y_train)):
    sample_dir = base_dir / "train" / Path(f"{label}")
    os.makedirs(sample_dir, exist_ok=True)

    # Umformen des 1D-Samples in 2D (Zeit × Kanäle)
    try:
        sample_reshaped = sample.reshape(time_length, num_channels)  # Form: (3082, 8)
    except ValueError:
        print(f"Sample {idx} kann nicht in {num_channels} Kanäle umgeformt werden. Überspringe...")
        continue

    for channel in range(num_channels):
        waveform = sample_reshaped[:, channel]  # Extrahiere Kanal
        waveform = torch.Tensor(waveform)
        spectrogram = transform(waveform)
        save_image(spectrogram, sample_dir / f"{idx}_channel_{channel}.png")


os.makedirs(base_dir / "test", exist_ok=True)
num_channels = 1
time_length = X_test.shape[1] // num_channels  # 24660 // 8 = 3082

for idx, (sample, label) in enumerate(zip(X_test, y_test)):
    sample_dir = base_dir / "test" / Path(f"{label}")
    os.makedirs(sample_dir, exist_ok=True)

    # Umformen des 1D-Samples in 2D (Zeit × Kanäle)
    try:
        sample_reshaped = sample.reshape(time_length, num_channels)  # Form: (3082, 8)
    except ValueError:
        print(f"Sample {idx} kann nicht in {num_channels} Kanäle umgeformt werden. Überspringe...")
        continue

    for channel in range(num_channels):
        waveform = sample_reshaped[:, channel]  # Extrahiere Kanal
        waveform = torch.Tensor(waveform)
        spectrogram = transform(waveform)
        save_image(spectrogram, sample_dir / f"{idx}_channel_{channel}.png")



In [7]:
import os
from pathlib import Path
from torch.utils.data import Dataset
from torchvision.io import decode_image
from torchvision.transforms import Grayscale
import torch
from torchaudio.transforms import Spectrogram
from torchvision.utils import save_image
from sklearn.model_selection import train_test_split

# Assuming features is a 2D array with shape (1764, 24660)
X_train, X_test, y_train, y_test = train_test_split(
    features, 
    y_encoded, 
    test_size=0.2, 
    random_state=42, 
    stratify=y_encoded
)

base_dir = Path("data")
os.makedirs(base_dir / "train", exist_ok=True)

# Define the Spectrogram transform with appropriate parameters
transform = Spectrogram(n_fft=32, hop_length=16)

# Assuming 8 channels per sample
num_channels = 1
time_length = X_train.shape[1] // num_channels  # 24660 // 8 = 3082

for idx, (sample, label) in enumerate(zip(X_train, y_train)):
    sample_dir = base_dir / "train" / Path(f"{label}")
    os.makedirs(sample_dir, exist_ok=True)

    # Reshape the 1D sample into 2D (time × channels)
    try:
        sample_reshaped = sample.reshape(time_length, num_channels)  # Shape: (3082, 8)
    except ValueError as e:
        print(f"Sample {idx} cannot be reshaped into {num_channels} channels. Error: {e}. Skipping...")
        continue

    for channel in range(num_channels):
        waveform = sample_reshaped[:, channel]  # Extract channel
        waveform = torch.Tensor(waveform)
        spectrogram = transform(waveform)
        # Normalize spectrogram if necessary
        spectrogram = (spectrogram - spectrogram.min()) / (spectrogram.max() - spectrogram.min())
        save_image(spectrogram, sample_dir / f"{idx}_channel_{channel}.png")

os.makedirs(base_dir / "test", exist_ok=True)

for idx, (sample, label) in enumerate(zip(X_test, y_test)):
    sample_dir = base_dir / "test" / Path(f"{label}")
    os.makedirs(sample_dir, exist_ok=True)

    # Reshape the 1D sample into 2D (time × channels)
    try:
        sample_reshaped = sample.reshape(time_length, num_channels)  # Shape: (3082, 8)
    except ValueError as e:
        print(f"Sample {idx} cannot be reshaped into {num_channels} channels. Error: {e}. Skipping...")
        continue

    for channel in range(num_channels):
        waveform = sample_reshaped[:, channel]  # Extract channel
        waveform = torch.Tensor(waveform)
        spectrogram = transform(waveform)
        # Normalize spectrogram if necessary
        spectrogram = (spectrogram - spectrogram.min()) / (spectrogram.max() - spectrogram.min())
        save_image(spectrogram, sample_dir / f"{idx}_channel_{channel}.png")

class SpectrogramDataset(Dataset):
    def __init__(
        self,
        directory: Path,
        channels: int = 1,
        transform=None,
        target_transform=None,
    ):
        self.data_dir = directory
        self.channels = channels
        self.transform = transform
        self.target_transform = target_transform

        # Iterate over all files in the given directory and define labels
        self.labels = []
        self.samples = []
        subdirs = [f.path for f in os.scandir(self.data_dir) if f.is_dir()]

        for class_dir in subdirs:
            # Extract the label from the directory name (e.g., "data/train/0" -> "0")
            label = os.path.basename(class_dir)
            samples = [f.path for f in os.scandir(class_dir) if f.is_file()]
            measurement = []
            for idx, sample in enumerate(samples):
                measurement.append(sample)
                if (idx + 1) % self.channels == 0:
                    self.labels.append(int(label))  # Convert label to integer
                    self.samples.append(measurement)
                    measurement = []

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index: int) -> torch.Tensor:
        spectrograms = []
        for image_path in self.samples[index]:
            image = decode_image(image_path)
            # Convert to Grayscale if necessary
            if self.channels > 1:
                image = Grayscale(num_output_channels=1)(image)
            if self.transform:
                image = self.transform(image)
            spectrograms.append(image)
        
        label = self.labels[index]
        if self.target_transform:
            label = self.target_transform(label)
        
        spectrograms = torch.stack(spectrograms)
        spectrograms = spectrograms.type(torch.float32)
        spectrograms = spectrograms.squeeze(1)
        
        assert spectrograms.shape[0] == self.channels
        return spectrograms, label

In [8]:
train_data = SpectrogramDataset(directory=base_dir / "train", channels=1)
test_data = SpectrogramDataset(directory=base_dir / "test", channels=1)

In [9]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=8, shuffle=False)

In [10]:
import torch.nn as nn

class ConvModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(8, 64, kernel_size=2),  # bs, 8, 17, 17 -> bs, 64, 16, 16
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),  # bs, 64, 16, 16 -> bs, 64, 8, 8
            nn.Conv2d(64, 64, kernel_size=1),  # bs, 64, 8, 8 -> bs, 64, 8, 8
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),  # bs, 64, 8, 8 -> bs, 64, 4, 4
        )
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(64 * 4 * 4, 128),
            nn.ReLU(),
            nn.Linear(128, 1),  # One output neuron for binary classification
            nn.Sigmoid(),  # Binary classification --> Sigmoid activation
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [11]:
model = ConvModel()
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [13]:
num_epochs = 10  # Anzahl der Epochen
for epoch in range(num_epochs):
    model.train()  # Setze das Modell in den Trainingsmodus
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Daten auf das richtige Gerät verschieben

        optimizer.zero_grad()         # Gradienten zurücksetzen (sonst werden sie akkumuliert)
        outputs = model(inputs)       # Vorwärtsdurchlauf

        loss = criterion(outputs, labels.float())  # Verlust berechnen
        loss.backward()              # Backpropagation: Gradienten berechnen
        optimizer.step()             # Optimizer aktualisiert die Modellparameter

        running_loss += loss.item()  # Verlust summieren für die Anzeige

    # Durchschnittlicher Verlust pro Epoche anzeigen
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")


RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 8, 2, 2], but got 2-dimensional input of size [32, 24660] instead

In [12]:
# Daten in PyTorch Tensor konvertieren
X_train, X_val, y_train, y_val = train_test_split(features, y_encoded, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

# Dataloader erstellen
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

X_train_tensor = X_train_tensor.unsqueeze(1)  # Füge die 1D-Kanal-Dimension hinzu
X_val_tensor = X_val_tensor.unsqueeze(1)
