In [1]:
import os
import sys
sys.path.append("..")

import mlflow
import h5py

import lightning.pytorch as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from torchvision import datasets, transforms

mlflow.pytorch.autolog()
mlflow.set_experiment("cnc")

torch.set_float32_matmul_precision("medium")

class VibrationDataset(data.Dataset):
    def __init__(self, root_dir="../data", machine_names=["M01", "M02", "M03"], process_name="OP07", label="good", seq_length=2000):
        self.root_dir = root_dir
        self.machine_names = machine_names
        self.process_name = process_name
        self.label = label
        self.seq_length = seq_length
        self.data_sequences = []  # List to store sequences

        min_value, max_value = -2500.0, 2500.0

        # Read data for each machine
        for machine_name in self.machine_names:
            data_dir = os.path.join(root_dir, machine_name, process_name, label)
            file_list = [file for file in os.listdir(data_dir) if file.endswith(".h5")]
            
            # Read and store sequences for each file
            for file_name in file_list:
                file_path = os.path.join(data_dir, file_name)
                
                with h5py.File(file_path, "r") as hf:
                    vibration_data = torch.tensor(hf["vibration_data"][:], dtype=torch.float32)

                # Normalize the vibration data to be between -1 and 1
                vibration_data = 2 * ((vibration_data - min_value) / (max_value - min_value)) - 1
                    
                # Splitting the data into sequences
                num_full_sequences = len(vibration_data) // self.seq_length
                full_sequence_data = vibration_data[:num_full_sequences * self.seq_length]
                sequences = full_sequence_data.view(num_full_sequences, self.seq_length, 3)

                self.data_sequences.extend(sequences)
        
    def __len__(self):
        return len(self.data_sequences)

    def __getitem__(self, idx):
        sequence = self.data_sequences[idx]
        return sequence, sequence  # input and target are the same for autoencoders


# Define the LSTM-based Encoder and Decoder
class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(Encoder, self).__init__()
        self.lstm1 = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.lstm2 = nn.LSTM(hidden_dim, latent_dim, batch_first=True)
        
    def forward(self, x):
        x, (_, _) = self.lstm1(x)
        _, (h_n, _) = self.lstm2(x)
        return h_n

class Decoder(nn.Module):
    def __init__(self, latent_dim, hidden_dim, output_dim, seq_length):
        super(Decoder, self).__init__()
        self.seq_length = seq_length
        self.lstm1 = nn.LSTM(latent_dim, latent_dim)
        self.lstm2 = nn.LSTM(latent_dim, hidden_dim)
        self.linear = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        x = x.repeat(self.seq_length, 1, 1)
        x, (_, _) = self.lstm1(x)
        x, (_, _) = self.lstm2(x)
        x = self.linear(x)
        x = torch.movedim(x, 1, 0)
        return x

# Define the LSTM-based Autoencoder LightningModule
class LSTMAutoEncoder(pl.LightningModule):
    def __init__(self, input_dim, hidden_dim, latent_dim, seq_length):
        super(LSTMAutoEncoder, self).__init__()
        self.encoder = Encoder(input_dim, hidden_dim, latent_dim)
        self.decoder = Decoder(latent_dim, hidden_dim, input_dim, seq_length)

        params = {
            "input_dim": input_dim,
            "hidden_dim": hidden_dim,
            "latent_dim": latent_dim,
            "seq_length": seq_length,
        }
        mlflow.log_params(params)

    def training_step(self, batch, batch_idx):
        x, _ = batch
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        self.log("train_loss", loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, _ = batch
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        self.log("val_loss", loss)
        return loss    
    
    def test_step(self, batch, batch_idx):
        x, _ = batch
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        self.log("test_loss", loss)
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=5e-4)
        return optimizer

# Initialize the LSTM-based autoencoder
input_dim = 3 # X, Y, Z vibration data
hidden_dim = 4  # Arbitrary size for the hidden dimension
latent_dim = 2  # Arbitrary size for the latent dimension
seq_length = 4000

# init the autoencoder
autoencoder = LSTMAutoEncoder(input_dim, hidden_dim, latent_dim, seq_length)

# Create the full dataset
full_dataset = VibrationDataset(seq_length=seq_length)

# Compute the lengths for train/validation split
train_len = int(0.8 * len(full_dataset))
val_len = len(full_dataset) - train_len

# Split the dataset
train_dataset, val_dataset = data.dataset.random_split(full_dataset, [train_len, val_len])

train_dataloader = data.DataLoader(train_dataset, batch_size=128, shuffle=True)
val_dataloader = data.DataLoader(val_dataset, batch_size=128, shuffle=False)

test_dataset = VibrationDataset(label="bad", seq_length=seq_length)
test_dataloaders = data.DataLoader(test_dataset)

# train the model (hint: here are some helpful Trainer arguments for rapid idea iteration)
trainer = pl.Trainer(accelerator="gpu", max_epochs=40, log_every_n_steps=10)
trainer.fit(model=autoencoder, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

trainer.test(dataloaders=test_dataloaders)



* 'schema_extra' has been renamed to 'json_schema_extra'
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
v:\Code\cnc-monitoring-system\.conda\lib\site-packages\lightning\pytorch\trainer\connectors\logger_connector\logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
v:\Code\cnc-monitoring-system\.conda\lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | T

Epoch 39: 100%|██████████| 12/12 [00:00<00:00, 19.57it/s, v_num=64]

`Trainer.fit` stopped: `max_epochs=40` reached.


Epoch 39: 100%|██████████| 12/12 [00:00<00:00, 19.21it/s, v_num=64]


Restoring states from the checkpoint path at v:\Code\cnc-monitoring-system\notebooks\lightning_logs\version_64\checkpoints\epoch=39-step=480.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at v:\Code\cnc-monitoring-system\notebooks\lightning_logs\version_64\checkpoints\epoch=39-step=480.ckpt
v:\Code\cnc-monitoring-system\.conda\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 90/90 [00:01<00:00, 59.56it/s]


[{'test_loss': 0.02576933614909649}]