<h1> Classification with a Neural Network using Keras (Sequential API)</h1>

<h2>1. Imports and load data</h2>

In [79]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from aeon.classification.deep_learning import TimeCNNClassifier
from tensorflow.keras.callbacks import EarlyStopping

In [80]:
class DataProcessor:
    def __init__(self, input_path, file_names):
        self.input_path = input_path
        self.file_names = file_names
        
    def read_files(self):
        self.data = {}
        print("Reading files...")
        for file in self.file_names:
            with open(self.input_path + file + '.txt', 'r') as f:
                self.data[file] = pd.read_csv(f, header=None, sep='\t')
        return self.data
    
    def print_shape(self):
        print("Files read:")
        for file in self.data:
            print(f"{file}: {self.data[file].shape}")
            
    def create_target_df(self):
        target_columns = ['Cooler_Condition', 'Valve_Condition', 
                        'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                        'Stable_Flag']
        self.data['target'].columns = target_columns
        self.valve_condition = self.data['target']['Valve_Condition']
        #del self.data['target']
        return self.valve_condition

def process_data():
    input_path = "input_data/"
    file_names = [
        "ce", "cp", "eps1", "se", "vs1", 
        "fs1", "fs2", 
        "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
        "ts1", "ts2", "ts3", "ts4", "target"
    ]
    
    processor = DataProcessor(input_path, file_names)
    data = processor.read_files()
    processor.print_shape()
    df_target = processor.create_target_df()
    df_target = processor.valve_condition
    return data, df_target

data, df_target = process_data()

Reading files...
Files read:
ce: (2205, 60)
cp: (2205, 60)
eps1: (2205, 6000)
se: (2205, 60)
vs1: (2205, 60)
fs1: (2205, 600)
fs2: (2205, 600)
ps1: (2205, 6000)
ps2: (2205, 6000)
ps3: (2205, 6000)
ps4: (2205, 6000)
ps5: (2205, 6000)
ps6: (2205, 6000)
ts1: (2205, 60)
ts2: (2205, 60)
ts3: (2205, 60)
ts4: (2205, 60)
target: (2205, 5)


<h2>2. Create input and target data </h2>

We use the six sensors which we identified as relevant during data exploration: 'eps1', 'se', 'fs1', 'ps1', 'ps2', 'ps3' 

In [81]:
df_list = ['eps1', 'se', 'fs1', 'ps1', 'ps2', 'ps3']
input_df = pd.concat([data[i] for i in df_list], axis = 1)
features = input_df.to_numpy()
features

array([[2.4116e+03, 2.4116e+03, 2.4116e+03, ..., 2.2500e+00, 2.2500e+00,
        2.2110e+00],
       [2.4096e+03, 2.4096e+03, 2.4096e+03, ..., 2.2110e+00, 2.1950e+00,
        2.2190e+00],
       [2.3978e+03, 2.3978e+03, 2.3978e+03, ..., 2.3050e+00, 2.3200e+00,
        2.2660e+00],
       ...,
       [2.4136e+03, 2.4136e+03, 2.4136e+03, ..., 2.2190e+00, 2.2190e+00,
        2.2500e+00],
       [2.4136e+03, 2.4136e+03, 2.4136e+03, ..., 2.2500e+00, 2.2420e+00,
        2.2810e+00],
       [2.4158e+03, 2.4156e+03, 2.4156e+03, ..., 2.2730e+00, 2.2270e+00,
        2.2500e+00]])

In [82]:
features.shape

(2205, 24660)

In [83]:
# Standardise the target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(df_target)
y_encoded


array([3, 3, 3, ..., 3, 3, 3], dtype=int64)

<h2>3. Create the model, train it & make predictions </h2>

In [84]:
import os
from pathlib import Path
import torch
from torchaudio.transforms import Spectrogram
from torchvision.utils import save_image

# Daten aufteilen
X_train, X_test, y_train, y_test = train_test_split(
        features, y_encoded, test_size=0.2, stratify=y_encoded
    )

base_dir = Path("data")
os.makedirs(base_dir / "train", exist_ok=True)

transform = Spectrogram(n_fft=32)

for idx, (sample, label) in enumerate(zip(X_train, y_train)):
    sample_dir = base_dir / "train" / Path(f"class_{label}")
    os.makedirs(sample_dir, exist_ok=True)
    
    # Angenommen, sample ist 1D (24660 Merkmale)
    waveform = torch.Tensor(sample)  # Sample direkt in ein Tensor umwandeln
    spectrogram = transform(waveform)  # Spectrogramm erzeugen
    save_image(spectrogram, sample_dir / f"{idx}.png")  # Speichern

In [85]:
base_dir = Path("data")
os.makedirs(base_dir / "test", exist_ok=True)

transform = Spectrogram(n_fft=32)

for idx, (sample, label) in enumerate(zip(X_test, y_test)):
    sample_dir = base_dir / "test" / Path(f"class_{label}")
    os.makedirs(sample_dir, exist_ok=True)
    
    # Angenommen, sample ist 1D (24660 Merkmale)
    waveform = torch.Tensor(sample)  # Sample direkt in ein Tensor umwandeln
    spectrogram = transform(waveform)  # Spectrogramm erzeugen
    save_image(spectrogram, sample_dir / f"{idx}.png")  # Speichern


In [86]:
from torch.utils.data import Dataset, DataLoader
from torchvision.io import decode_image
from torchvision.transforms import Grayscale

In [87]:
from torch.utils.data import Dataset
from PIL import Image
import torch
from torchvision import transforms

class SpectrogramDataset(Dataset):
    def __init__(
        self,
        directory: Path,
        channels: int = 8,
        transform=None,
        target_transform=None,
    ):
        self.data_dir = directory
        self.channels = channels
        self.transform = transform
        self.target_transform = target_transform

        # Iterate over all files in the given directory and define labels
        self.labels = []
        self.samples = []
        subdirs = [f.path for f in os.scandir(self.data_dir) if f.is_dir()]

        for class_dir in subdirs:
            label = class_dir.split("_")[-1]
            samples = [f.path for f in os.scandir(class_dir) if f.is_file()]
            measurement = []
            for idx, sample in enumerate(samples):
                idx += 1
                measurement.append(sample)
                if idx % self.channels == 0:
                    self.labels.append(int(label))
                    self.samples.append(measurement)
                    measurement = []

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample_paths = self.samples[idx]  # List of paths to the spectrogram files
        label = self.labels[idx]
        
        # Read and stack the images (spectrograms)
        images = []
        for path in sample_paths:
            image = Image.open(path)  # Load the image (spectrogram)
            image = image.convert("RGB")  # Convert to RGB (if necessary)
            images.append(image)

        # Convert list of images to a single tensor (stack along a new axis)
        sample = torch.stack([transforms.ToTensor()(img) for img in images])

        if self.transform:
            sample = self.transform(sample)

        if self.target_transform:
            label = self.target_transform(label)

        return sample, label


In [88]:
train_data = SpectrogramDataset(base_dir / "train")
test_data = SpectrogramDataset(base_dir / "test")

In [89]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [91]:
import torch
from torch import nn
from torch.utils.data import DataLoader

# Define the model
class ConvModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),  # bs, 8, 17, 17 -> bs, 64, 16, 16
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),  # bs, 64, 16, 16 -> bs, 64, 8, 8
            nn.Conv2d(64, 64, kernel_size=1),  # bs, 64, 8, 8 -> bs, 64, 8, 8
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),  # bs, 64, 8, 8 -> bs, 64, 4, 4
        )
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(64 * 4 * 4, 128),
            nn.ReLU(),
            nn.Linear(128, 4),  # One output neuron for binary classification
            nn.ReLU(),  # Binary classification --> Sigmoid activation
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x


# Initialize the model and move it to device (GPU or CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ConvModel().to(device)

# Assuming you have a DataLoader `train_loader` and a suitable loss function
criterion = nn.BCELoss()  # BCELoss with Sigmoid in model
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Example loop (assuming `train_loader` provides the correct data)
epochs = 20
training_loss = []

for epoch in range(epochs):
    epoch_loss = 0.0
    for i, data in enumerate(train_loader):
        images, labels = data
        images = images.to(device)
        labels = labels.to(device).to(torch.float32)  # Make sure labels are float32 for BCE

        optimizer.zero_grad()
        outputs = model(images)  # Pass images through the model
        loss = criterion(outputs.squeeze(-1), labels)  # Compute loss
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    training_loss.append(epoch_loss)
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss/len(train_loader):.4f}")


RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 3, 3], but got 5-dimensional input of size [32, 8, 3, 17, 1542] instead

In [76]:
# from torch import optim
# model = ConvModel()

# criterion = nn.BCELoss()
# optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay = 0.01)

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")

Using device: cpu


In [None]:
# from time import time

# epochs = 20

# training_loss = []

# for epoch in range(epochs):
#     start = time()
#     epoch_loss = 0.0
#     for i, data in enumerate(train_loader):
#         images, labels = data
        
#         images = images.to(device)
#         labels = labels.to(device).to(torch.float32)
        
#         optimizer.zero_grad()
        
#         outputs = model(images)
        
#         outputs.squeeze(-1)
        
#         loss = criterion(outputs.squeeze(-1), labels)
        
#         optimizer.step()
        
#         epoch_loss += loss.item()
#     stop = time()
#     training_loss.append(epoch_loss)
#     print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss/len(train_loader):.4f}, Time: {stop - start:.2f}s")


NotImplementedError: 

In [71]:
states = [27, 6728, 49122]
accs = []