In [1]:
import os
import sys
import yaml
sys.path.append(os.path.abspath('.')) # to run files that are away
os.environ["WANDB_SILENT"] = "true"  # Suppress WandB logs

libraries = ["torch", "numpy", "polars"]
modules   = {lib: sys.modules.get(lib) for lib in libraries}

if not modules["torch"]:
    import torch
if not modules["numpy"]:
    import numpy as np
if not modules["polars"]:
    import polars as pl

import pandas as pd
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.preprocessing import MinMaxScaler

from autoencoder import Autoencoder, train_autoencoder, compute_reconstruction_loss

In [2]:
"""Loading the data"""

with open('params.yaml', 'r') as file:
    yaml_config = yaml.safe_load(file)

data_file  = f"./datasets/{yaml_config['dataset']['filename']}"
df         = pd.read_csv(data_file)

X_scaled   = MinMaxScaler().fit_transform(df.values)
X_tensor   = torch.tensor(X_scaled, dtype=torch.float32)
dataset    = TensorDataset(X_tensor, X_tensor)  # Autoencoder reconstructs the input
train_size = int(yaml_config['dataset']['train_valid_split'] * len(df))
val_size   = len(df) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


In [3]:
"""Autoencoder training"""

hidden_dim       = yaml_config['autoencoder_design']['hidden_dim']
encoding_dim     = yaml_config['autoencoder_design']['encoding_dim']
latent_dim       = yaml_config['autoencoder_design']['latent_dim']
dropout_prob     = yaml_config['autoencoder_design']['dropout_prob']
training_epochs  = yaml_config['autoencoder_training']['training_epochs']
batch_size       = yaml_config['autoencoder_training']['batch_size']
optimizer_lr     = yaml_config['autoencoder_training']['optimizer_lr']
weight_decay     = yaml_config['autoencoder_training']['weight_decay']
training_patience= yaml_config['autoencoder_training']['training_patience']

# Create DataLoader objects for train and validation datasets
input_size   = X_scaled.shape[1]
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

autoencoder  = Autoencoder(input_size, hidden_dim, encoding_dim, latent_dim, dropout_prob)
optimizer    = torch.optim.AdamW(autoencoder.parameters(), lr=optimizer_lr, weight_decay=weight_decay)

train_autoencoder(autoencoder, training_epochs, train_loader, optimizer,
                  validation_loader=val_loader, patience = training_patience)

Epoch [1/30], training loss: 0.0315
Validation loss: 0.0108
Epoch [2/30], training loss: 0.0075
Validation loss: 0.0063
Epoch [3/30], training loss: 0.0062
Validation loss: 0.0060
Epoch [4/30], training loss: 0.0060
Validation loss: 0.0061
Epoch [5/30], training loss: 0.0059
Validation loss: 0.0057
Epoch [6/30], training loss: 0.0055
Validation loss: 0.0049
Epoch [7/30], training loss: 0.0048
Validation loss: 0.0046
Epoch [8/30], training loss: 0.0046
Validation loss: 0.0046
Epoch [9/30], training loss: 0.0046
Validation loss: 0.0045
Epoch [10/30], training loss: 0.0045
Validation loss: 0.0044
Epoch [11/30], training loss: 0.0045
Validation loss: 0.0046
Epoch [12/30], training loss: 0.0044
Validation loss: 0.0043
Epoch [13/30], training loss: 0.0044
Validation loss: 0.0046
Epoch [14/30], training loss: 0.0044
Validation loss: 0.0043
Epoch [15/30], training loss: 0.0043
Validation loss: 0.0042
Epoch [16/30], training loss: 0.0042
Validation loss: 0.0041
Epoch [17/30], training loss: 0.0

In [4]:
"""Run AE on new dataset
Can be for reconstruction, anomaly detection, feature extraction, classification"""

# PAUSED, WILL PICK UP LATER

# autoencoder.eval()  # Set the model to evaluation mode (disables dropout)

# with torch.no_grad():  # No gradient computation during inference
#     x_input = torch.tensor(new_data, dtype=torch.float32)  # New data input
#     x_reconstructed = autoencoder(x_input)  # Reconstruct input

# reconstruction_error = compute_reconstruction_loss(x_input, x_reconstructed)


'Run AE on new dataset\nCan be for reconstruction, anomaly detection, feature extraction, classification'

In [None]:
"""Diffusion training"""

import diffusion_model as diff

diff_steps       = yaml_config['diffusion_design']['diff_steps']
cos_start_offset = yaml_config['noise_scheduler']['cos_start_offset']
noise_profile    = yaml_config['noise_scheduler']['noise_profile']
start_noise_val  = yaml_config['noise_scheduler']['start_noise_val']
end_noise_val    = yaml_config['noise_scheduler']['end_noise_val']
train_epochs     = yaml_config['diffusion_training']['train_epochs']
training_patience= yaml_config['diffusion_training']['training_patience']
optimizer_lr     = yaml_config['diffusion_training']['optimizer_lr']
weight_decay     = yaml_config['diffusion_training']['weight_decay']
base_channels    = yaml_config['Unet_design']['base_channels']
dropout_prob     = yaml_config['Unet_design']['dropout_prob']

X_tensor = torch.tensor(X_scaled, dtype=torch.float32).unsqueeze(-1)  # [B, 1, L]
print(f"{X_tensor.shape=}")

betas = diff.get_noise_schedule(start_val=start_noise_val, end_val=end_noise_val, diff_steps=diff_steps,
                                cos_start_offset=cos_start_offset, noise_profile=noise_profile)
diffusion_model = diff.UNet(X_tensor.shape[1], dropout_prob, base_channels)
optimizer       = torch.optim.AdamW(diffusion_model.parameters(), lr=optimizer_lr, weight_decay=weight_decay)

diff.train_diffusion(diffusion_model, train_loader, optimizer, betas,
                     diff_steps, train_epochs, val_loader, training_patience)


X_tensor.shape=torch.Size([19735, 28, 1])
conv_block: in_channels = 28, out_channels = 18
conv_block: in_channels = 18, out_channels = 36
conv_block: in_channels = 36, out_channels = 72
conv_block: in_channels = 72, out_channels = 36
conv_block: in_channels = 36, out_channels = 18
Epoch [1/4], Loss: 0.9847, Val Loss: 0.9512
Epoch [2/4], Loss: 0.9499, Val Loss: 0.9316
Epoch [3/4], Loss: 0.9345, Val Loss: 0.9196
Epoch [4/4], Loss: 0.9299, Val Loss: 0.9083


In [None]:
# is this working?