In [11]:
import sys
import pathlib
import numpy as np
import pandas as pd
import optuna
import torch
import logging 

from torch.utils.data import DataLoader, TensorDataset

from optimize_utils import get_optimize_args, objective

script_directory = pathlib.Path("../0.data-download/scripts/").resolve()
sys.path.insert(0, str(script_directory))
from data_loader import load_train_test_data

In [12]:
# Load command line arguments
args = get_optimize_args()


# Load data
data_directory = pathlib.Path("../0.data-download/data").resolve()

train_data, test_data, val_data, load_gene_stats = load_train_test_data(
    data_directory, train_or_test="all", load_gene_stats=True, zero_one_normalize=True
)


In [9]:
# Convert dataframes to tensors
train_tensor = torch.tensor(train_data, dtype=torch.float32)
test_tensor = torch.tensor(test_data, dtype=torch.float32)
val_tensor = torch.tensor(val_data, dtype=torch.float32)

In [13]:
# Run Optuna optimization and save study
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name = "BetaVAE-Optimization"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)
study = optuna.create_study(study_name=study_name, storage=storage_name, direction="minimize", load_if_exists=True)
study.optimize(
    lambda trial: objective(trial, train_tensor, val_tensor, train_data), n_trials=500
)

[I 2024-07-22 11:25:52,037] Using an existing study with name 'BetaVAE-Optimization' instead of creating a new one.


Using an existing study with name 'BetaVAE-Optimization' instead of creating a new one.
Using an existing study with name 'BetaVAE-Optimization' instead of creating a new one.


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch 0, Loss: 312.5887644298041
Epoch 1, Loss: 204.12741808535446
Epoch 2, Loss: 148.99623750145756
Epoch 3, Loss: 114.8589209713153
Epoch 4, Loss: 91.33887939453125
Epoch 5, Loss: 76.19772521061684
Epoch 6, Loss: 66.40432593502216
Epoch 7, Loss: 59.631047490817394
Epoch 8, Loss: 55.40601542458605
Epoch 9, Loss: 52.54718637039412
Epoch 10, Loss: 50.99891740030317
Epoch 11, Loss: 50.09756524384911
Epoch 12, Loss: 49.28920907547225
Epoch 13, Loss: 49.09690232917444
Epoch 14, Loss: 48.7471973020639
Epoch 15, Loss: 48.4799899428638
Epoch 16, Loss: 48.38474312397971
Epoch 17, Loss: 48.31475046641791
Epoch 18, Loss: 48.209614494665345
Epoch 19, Loss: 48.18853859972598
Epoch 20, Loss: 48.15120221038363
Epoch 21, Loss: 48.127862822119866
Epoch 22, Loss: 48.082780433768654
Epoch 23, Loss: 48.10050458765742
Epoch 24, Loss: 48.10143951871502
Epoch 25, Loss: 48.072339960354476
Epoch 26, Loss: 48.084289459684
Epoch 27, Loss: 48.07101832147855
Epoch 28, Loss: 48.0589086731868
Epoch 29, Loss: 48.083

[W 2024-07-22 11:25:59,931] Trial 4 failed with parameters: {'latent_dim': 92, 'beta': 4.079300350316165, 'learning_rate': 0.0029964162670997386, 'batch_size': 16, 'epochs': 877} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/juliacurd/anaconda3/envs/gene_dependency_representations/lib/python3.12/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_197078/3914677838.py", line 7, in <lambda>
    lambda trial: objective(trial, train_tensor, val_tensor, train_data), n_trials=500
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/juliacurd/gene_dependency_representations/2.train-VAE/optimize_utils.py", line 155, in objective
    train_vae(model, train_loader, optimizer, epochs=epochs)
  File "/home/juliacurd/gene_dependency_representations/2.train-VAE/betavae.py", line 94, in train_vae
    recon_ba

Epoch 78, Loss: 48.03111180547458
Trial 4 failed with parameters: {'latent_dim': 92, 'beta': 4.079300350316165, 'learning_rate': 0.0029964162670997386, 'batch_size': 16, 'epochs': 877} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/juliacurd/anaconda3/envs/gene_dependency_representations/lib/python3.12/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_197078/3914677838.py", line 7, in <lambda>
    lambda trial: objective(trial, train_tensor, val_tensor, train_data), n_trials=500
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/juliacurd/gene_dependency_representations/2.train-VAE/optimize_utils.py", line 155, in objective
    train_vae(model, train_loader, optimizer, epochs=epochs)
  File "/home/juliacurd/gene_dependency_representations/2.train-VAE/betavae.py", line 94, in train_vae
    re

[W 2024-07-22 11:25:59,934] Trial 4 failed with value None.


Trial 4 failed with value None.
Trial 4 failed with value None.


KeyboardInterrupt: 

In [None]:
# Save best hyperparameters
best_trial = study.best_trial
print(best_trial)
print(f"Best trial: {best_trial.values}")
print(f"Best hyperparameters: {best_trial.params}")