In [4]:
import os
import sys
import yaml
# sys.path.append(os.path.join(os.getcwd(), 'detection_copied_from_timewak'))
sys.path.append(os.path.abspath('.')) # to run files that are away
os.environ["WANDB_SILENT"] = "true"  # Suppress WandB logs

libraries = ["torch", "numpy", "polars"]
modules   = {lib: sys.modules.get(lib) for lib in libraries}

if not modules["torch"]:
    import torch
if not modules["numpy"]:
    import numpy as np
if not modules["polars"]:
    import polars as pl

import pandas as pd
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.preprocessing import MinMaxScaler

from autoencoder import Autoencoder, train_autoencoder, compute_reconstruction_loss

In [5]:
"""Loading the data"""

with open('params.yaml', 'r') as file:
    config = yaml.safe_load(file)

data_file  = f"./datasets/{config['dataset']['filename']}"
df         = pd.read_csv(data_file)

X          = MinMaxScaler().fit_transform(df.values)
X_tensor   = torch.tensor(X, dtype=torch.float32)
dataset    = TensorDataset(X_tensor, X_tensor)  # Autoencoder reconstructs the input
train_size = int(config['dataset']['train_valid_split'] * len(df))
val_size   = len(df) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


In [None]:
"""Autoencoder training"""

hidden_dim      = config['autoencoder_design']['hidden_dim']
encoding_dim    = config['autoencoder_design']['encoding_dim']
latent_dim      = config['autoencoder_design']['latent_dim']
dropout_prob    = config['autoencoder_design']['dropout_prob']

training_epochs = config['autoencoder_training']['training_epochs']
batch_size      = config['autoencoder_training']['batch_size']
optimizer_lr    = config['autoencoder_training']['optimizer_lr']
weight_decay    = config['autoencoder_training']['weight_decay']
ae_training_patience = config['autoencoder_training']['training_patience']


# Create DataLoader objects for train and validation datasets
input_size   = X.shape[1]
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

autoencoder  = Autoencoder(input_size, hidden_dim, encoding_dim, latent_dim, dropout_prob)
optimizer    = torch.optim.AdamW(autoencoder.parameters(), lr=optimizer_lr, weight_decay=weight_decay)

train_autoencoder(autoencoder, training_epochs, train_loader, optimizer,
                  validation_loader=val_loader, patience = ae_training_patience)

Epoch [1/30], training loss: 0.0323
Validation loss: 0.0144
Epoch [2/30], training loss: 0.0101
Validation loss: 0.0078
Epoch [3/30], training loss: 0.0077
Validation loss: 0.0076
Epoch [4/30], training loss: 0.0073
Validation loss: 0.0067
Epoch [5/30], training loss: 0.0061
Validation loss: 0.0059
Epoch [6/30], training loss: 0.0059
Validation loss: 0.0057
Epoch [7/30], training loss: 0.0058
Validation loss: 0.0058
Epoch [8/30], training loss: 0.0057
Validation loss: 0.0056
Epoch [9/30], training loss: 0.0057
Validation loss: 0.0057
Epoch [10/30], training loss: 0.0056
Validation loss: 0.0056
Epoch [11/30], training loss: 0.0055
Validation loss: 0.0057
Epoch [12/30], training loss: 0.0055
Validation loss: 0.0056
Epoch [13/30], training loss: 0.0054
Validation loss: 0.0055
Epoch [14/30], training loss: 0.0054
Validation loss: 0.0055
Epoch [15/30], training loss: 0.0054
Validation loss: 0.0054
Epoch [16/30], training loss: 0.0053
Validation loss: 0.0054
Epoch [17/30], training loss: 0.0

In [None]:
"""Run AE on new dataset
Can be for reconstruction, anomaly detection, feature extraction, classification"""

autoencoder.eval()  # Set the model to evaluation mode (disables dropout)

with torch.no_grad():  # No gradient computation during inference
    x_input = torch.tensor(new_data, dtype=torch.float32)  # New data input
    x_reconstructed = autoencoder(x_input)  # Reconstruct input

reconstruction_error = compute_reconstruction_loss(x_input, x_reconstructed)


NameError: name 'new_data' is not defined

In [None]:
# Diffusion model

# Train Model
#     Sample t, add noise → z_t
#     Predict ε̂ = model(z_t, t)
#     Loss = MSE(ε, ε̂)

# Sampling
#     Start from noise z_T
#     Iteratively denoise to get z₀
#     Decode with autoencoder to get time series

import torch.nn as nn

t = 4 #timestep (from loop)

z_latent   = torch.tensor([[1, 2., 32., 4],[ 2, 8, 4, 6]])
timesteps  = 1000  # total diffusion steps
noise_betas= torch.linspace(1e-4, 0.02, timesteps)

alphas     = 1.0 - noise_betas
alpha_bars = torch.cumprod(alphas, dim=0)
alpha_bar_t= alpha_bars[t]

eps_noise  = torch.randn_like(z_latent)
z_t        = (alpha_bar_t.sqrt() * z_latent) + ((1 - alpha_bar_t).sqrt() * eps_noise)
eps_hat    = (z_t - alpha_bar_t.sqrt() * z_latent) / (1 - alpha_bar_t).sqrt()





tensor([[ 1.,  2., 32.,  4.],
        [ 2.,  8.,  4.,  6.]])
