In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import sys
sys.path.append('.')
from Scripts.BETAVAE import BetaVAE

In [None]:
# Load data
try:
    soap_saved = np.load("assets/test.npy")
    print(f"Data loaded. Shape: {soap_saved.shape}")
except FileNotFoundError:
    print("Error: assets/test.npy not found. Please ensure the data file exists.")

In [None]:
# Preprocessing
if 'soap_saved' in locals():
    data = np.concatenate(soap_saved)
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(data)
    print(f"Scaled data shape: {scaled_data.shape}")

In [None]:
# Split data
if 'scaled_data' in locals():
    train_data, val_data = train_test_split(scaled_data, test_size=0.25, random_state=42)
    print(f"Training samples: {train_data.shape[0]}")
    print(f"Validation samples: {val_data.shape[0]}")

In [None]:
# Initialize Model
if 'scaled_data' in locals():
    INPUT_DIM = scaled_data.shape[1]
    # Hidden dims for Encoder: 324 -> 1024 -> 256 -> 2(latent)
    HIDDEN_DIMS = [1024, 512, 512, 256]
    CODE_DIM = 2

    model = BetaVAE(input_dim=INPUT_DIM, hidden_dims=HIDDEN_DIMS, code_dim=CODE_DIM)
    print(model)

In [None]:
# Train Model
if 'model' in locals() and 'train_data' in locals():
    model.fit(train_data=train_data, val_data=val_data, epochs=50, batch_size=256, learning_rate=1e-4)

In [None]:
# Visualize Latent Space
if 'model' in locals() and 'scaled_data' in locals():
    device = 'cpu'
    model.to(device)
    model.eval()

    with torch.no_grad():
        full_tensor = torch.Tensor(scaled_data).to(device)
        mu, logvar = model.encode(full_tensor)
        proj = mu.cpu().numpy()

    plt.figure(figsize=(8, 6))
    plt.scatter(proj[:, 0], proj[:, 1], s=1, alpha=0.5)
    plt.xlabel(r"$\mu_1$")
    plt.ylabel(r"$\mu_2$")
    plt.title("BetaVAE Latent Space Projection")
    plt.show()