In [1]:
import os
import sys
import yaml
sys.path.append(os.path.abspath('.')) # to run files that are away
os.environ["WANDB_SILENT"] = "true"  # Suppress WandB logs

libraries = ["torch", "numpy", "polars"]
modules   = {lib: sys.modules.get(lib) for lib in libraries}

if not modules["torch"]:
    import torch
if not modules["numpy"]:
    import numpy as np
if not modules["polars"]:
    import polars as pl

import pandas as pd
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.preprocessing import MinMaxScaler



In [2]:
"""Loading the data"""

with open('params.yaml', 'r') as file:
    yaml_config = yaml.safe_load(file)

data_file  = f"./datasets/{yaml_config['dataset']['filename']}"
df         = pd.read_csv(data_file)

X_scaled   = MinMaxScaler().fit_transform(df.values)
X_tensor   = torch.tensor(X_scaled, dtype=torch.float32)
dataset    = TensorDataset(X_tensor, X_tensor)  # Autoencoder reconstructs the input
train_size = int(yaml_config['dataset']['train_valid_split'] * len(df))
val_size   = len(df) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


In [None]:
"""Autoencoder training"""

import autoencoder as ae
# from autoencoder import Autoencoder, train_autoencoder

hidden_dim        = yaml_config['autoencoder_design']['hidden_dim']
encoding_dim      = yaml_config['autoencoder_design']['encoding_dim']
latent_dim        = yaml_config['autoencoder_design']['latent_dim']
dropout_prob      = yaml_config['autoencoder_design']['dropout_prob']
training_epochs   = yaml_config['autoencoder_training']['training_epochs']
batch_size        = yaml_config['autoencoder_training']['batch_size']
optimizer_lr      = yaml_config['autoencoder_training']['optimizer_lr']
weight_decay      = yaml_config['autoencoder_training']['weight_decay']
training_patience = yaml_config['autoencoder_training']['training_patience']
scheduler_patience= yaml_config['autoencoder_training']['scheduler_patience']
scheduler_mode    = yaml_config['autoencoder_training']['scheduler_mode']
scheduler_factor  = yaml_config['autoencoder_training']['scheduler_factor']

device = ae.device
print(f"Using device from module: {device}")

# Create DataLoader objects for train and validation datasets
input_size   = X_scaled.shape[1]
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

autoencoder  = ae.Autoencoder(input_size, hidden_dim, encoding_dim, latent_dim, dropout_prob)
optimizer    = torch.optim.AdamW(autoencoder.parameters(), lr=optimizer_lr, weight_decay=weight_decay)
scheduler    = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, scheduler_mode, patience=scheduler_patience, factor=scheduler_factor)

ae.train_autoencoder(device, autoencoder, training_epochs, train_loader, optimizer, scheduler,
                     validation_loader=val_loader, patience = training_patience)


Using device: cuda
Using device from module: cuda
Epoch [1/30], training loss: 0.1546
Validation loss: 0.0291
Epoch [2/30], training loss: 0.0514
Validation loss: 0.0089
Epoch [3/30], training loss: 0.0249
Validation loss: 0.0034
Epoch [4/30], training loss: 0.0150
Validation loss: 0.0018
Epoch [5/30], training loss: 0.0099
Validation loss: 0.0010
Epoch [6/30], training loss: 0.0068
Validation loss: 0.0006
Epoch [7/30], training loss: 0.0048
Validation loss: 0.0004
Epoch [8/30], training loss: 0.0035
Validation loss: 0.0002
Epoch [9/30], training loss: 0.0024
Validation loss: 0.0002
Epoch [10/30], training loss: 0.0017
Validation loss: 0.0001
Epoch [11/30], training loss: 0.0011
Validation loss: 0.0001
Epoch [12/30], training loss: 0.0008
Validation loss: 0.0000
Epoch [13/30], training loss: 0.0005
Validation loss: 0.0000
Epoch [14/30], training loss: 0.0003
Validation loss: 0.0000
Epoch [15/30], training loss: 0.0002
Validation loss: 0.0000
Epoch [16/30], training loss: 0.0001
Validat

In [4]:
"""Run AE on new dataset
Can be for reconstruction, anomaly detection, feature extraction, classification"""

# PAUSED, WILL PICK UP LATER

# autoencoder.eval()  # Set the model to evaluation mode (disables dropout)

# with torch.no_grad():  # No gradient computation during inference
#     x_input = torch.tensor(new_data, dtype=torch.float32)  # New data input
#     x_reconstructed = autoencoder(x_input)  # Reconstruct input

# reconstruction_error = compute_reconstruction_loss(x_input, x_reconstructed)


'Run AE on new dataset\nCan be for reconstruction, anomaly detection, feature extraction, classification'

In [5]:
"""Diffusion training"""

import diffusion_model as diff

diff_steps        = yaml_config['diffusion_design']['diff_steps']
cos_start_offset  = yaml_config['noise_scheduler']['cos_start_offset']
noise_profile     = yaml_config['noise_scheduler']['noise_profile']
start_noise_val   = yaml_config['noise_scheduler']['start_noise_val']
end_noise_val     = yaml_config['noise_scheduler']['end_noise_val']
train_epochs      = yaml_config['diffusion_training']['train_epochs']
training_patience = yaml_config['diffusion_training']['training_patience']
optimizer_lr      = yaml_config['diffusion_training']['optimizer_lr']
weight_decay      = yaml_config['diffusion_training']['weight_decay']
scheduler_patience= yaml_config['diffusion_training']['scheduler_patience']
scheduler_mode    = yaml_config['diffusion_training']['scheduler_mode']
scheduler_factor  = yaml_config['diffusion_training']['scheduler_factor']
base_channels     = yaml_config['Unet_design']['base_channels']
dropout_prob      = yaml_config['Unet_design']['dropout_prob']

device = diff.device
print(f"Using device from module: {device}")

X_tensor = torch.tensor(X_scaled, dtype=torch.float32).unsqueeze(-1)  # [B, 1, L]
print(f"{X_tensor.shape=}")

betas = diff.get_noise_schedule(start_val=start_noise_val, end_val=end_noise_val, diff_steps=diff_steps,
                                cos_start_offset=cos_start_offset, noise_profile=noise_profile)
diffusion_model = diff.UNet(X_tensor.shape[1], dropout_prob, base_channels)
optimizer       = torch.optim.AdamW(diffusion_model.parameters(), lr=optimizer_lr, weight_decay=weight_decay)
scheduler       = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=train_epochs)

diff.train_diffusion(device, diffusion_model, train_loader, optimizer, scheduler, betas,
                     diff_steps, train_epochs, val_loader, training_patience)


Using device: cuda
Using device from module: cuda
X_tensor.shape=torch.Size([19735, 28, 1])
conv_block: in_channels = 28, out_channels = 64
conv_block: in_channels = 64, out_channels = 128
conv_block: in_channels = 128, out_channels = 256
conv_block: in_channels = 256, out_channels = 128
conv_block: in_channels = 128, out_channels = 64
Epoch [1/100], loss: 0.8332, Validation loss: 0.6627
Epoch [2/100], loss: 0.7052, Validation loss: 0.5734
Epoch [3/100], loss: 0.6679, Validation loss: 0.5392
Epoch [4/100], loss: 0.6503, Validation loss: 0.5358
Epoch [5/100], loss: 0.6391, Validation loss: 0.5215
Epoch [6/100], loss: 0.6398, Validation loss: 0.5223
Epoch [7/100], loss: 0.6341, Validation loss: 0.5133
Epoch [8/100], loss: 0.6306, Validation loss: 0.5165
Epoch [9/100], loss: 0.6265, Validation loss: 0.5036
Epoch [10/100], loss: 0.6251, Validation loss: 0.4987
Epoch [11/100], loss: 0.6220, Validation loss: 0.4970
Epoch [12/100], loss: 0.6228, Validation loss: 0.4895
Epoch [13/100], loss: 0

KeyboardInterrupt: 

In [None]:
# is this working?