In [12]:
import sys
import pathlib
import numpy as np
import pandas as pd
import hiplot
from optuna.visualization import plot_param_importances
import optuna
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

from betavae import BetaVAE, train_vae, evaluate_vae
from optimize_utils import get_optimize_args, objective

script_directory = pathlib.Path("../0.data-download/scripts/").resolve()
sys.path.insert(0, str(script_directory))
from data_loader import load_train_test_data

In [16]:
# Load command line arguments
args = get_optimize_args()

# Load data
data_directory = pathlib.Path("../0.data-download/data").resolve()

train_data, test_data, load_gene_stats = load_train_test_data(
    data_directory, train_or_test="all", load_gene_stats=True, zero_one_normalize=True
)

[[0.8244309  0.5716864  0.21650638 ... 0.7044882  0.6934315  0.55542636]
 [0.60171366 0.44459793 0.49050593 ... 0.7006544  0.51975447 0.5930364 ]
 [0.55815876 0.41745156 0.55871475 ... 0.63283473 0.4719008  0.56748426]
 ...
 [0.6492675  0.49926242 0.36755386 ... 0.7904766  0.52086765 0.5231257 ]
 [0.5825433  0.5695022  0.44272694 ... 0.63639265 0.59594727 0.39246973]
 [0.68888646 0.5363674  0.46805647 ... 0.7161716  0.3728095  0.508517  ]]


In [14]:
# Convert dataframes to tensors
train_tensor = torch.tensor(train_data, dtype=torch.float32)
test_tensor = torch.tensor(test_data, dtype=torch.float32)

In [15]:
# Run Optuna optimization
study = optuna.create_study(direction="minimize")
study.optimize(
    lambda trial: objective(trial, train_tensor, test_tensor, train_data), n_trials=500
)

[I 2024-07-19 13:44:55,023] A new study created in memory with name: no-name-1b4db0a6-da45-4aef-bd85-35c5349c6e70


Epoch 0, Loss: 182.30607361289734
Epoch 1, Loss: 114.26435495887401


  batch_size = trial.suggest_int(
  epochs = trial.suggest_int(


Epoch 2, Loss: 74.8010391122879
Epoch 3, Loss: 56.768469583197366
Epoch 4, Loss: 49.45094130607436
Epoch 5, Loss: 46.882413896940264
Epoch 6, Loss: 46.01964559719077
Epoch 7, Loss: 45.65857810880394
Epoch 8, Loss: 45.415811824564265
Epoch 9, Loss: 45.505688041668265
Epoch 10, Loss: 45.37813986138571
Epoch 11, Loss: 45.335391707736676
Epoch 12, Loss: 45.29060127576973
Epoch 13, Loss: 45.336144974542194
Epoch 14, Loss: 45.300030307629186
Epoch 15, Loss: 45.30430081904081
Epoch 16, Loss: 45.22100305205774
Epoch 17, Loss: 45.31579154949516
Epoch 18, Loss: 45.33493874289773
Epoch 19, Loss: 45.279708974777336
Epoch 20, Loss: 45.26187966086648
Epoch 21, Loss: 45.26349165632918
Epoch 22, Loss: 45.26935826706945
Epoch 23, Loss: 45.2740177088932
Epoch 24, Loss: 45.2481993129271
Epoch 25, Loss: 45.26036030537373
Epoch 26, Loss: 45.27266151371986
Epoch 27, Loss: 45.34253701999673
Epoch 28, Loss: 45.18459738091696
Epoch 29, Loss: 45.3213461986045
Epoch 30, Loss: 45.26535544055686
Epoch 31, Loss: 45

[W 2024-07-19 13:45:04,381] Trial 0 failed with parameters: {'latent_dim': 19, 'beta': 1.9913451018512291, 'learning_rate': 0.005, 'batch_size': 16, 'epochs': 605} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/juliacurd/anaconda3/envs/gene_dependency_representations/lib/python3.12/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_41974/4197105900.py", line 3, in <lambda>
    study.optimize(lambda trial: objective(trial, train_tensor, test_tensor, train_data), n_trials=500)
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/juliacurd/gene_dependency_representations/2.train-VAE/optimize_utils.py", line 140, in objective
    train_vae(model, train_loader, optimizer, epochs=epochs)
  File "/home/juliacurd/gene_dependency_representations/2.train-VAE/betavae.py", line 98, in train

Epoch 117, Loss: 45.252715389617244


KeyboardInterrupt: 

In [None]:
# Save best hyperparameters
best_trial = study.best_trial
print(best_trial)
print(f"Best trial: {best_trial.values}")
print(f"Best hyperparameters: {best_trial.params}")

FrozenTrial(number=37, state=1, values=[101.85667588975694], datetime_start=datetime.datetime(2024, 7, 19, 11, 5, 27, 824798), datetime_complete=datetime.datetime(2024, 7, 19, 11, 5, 48, 314352), params={'latent_dim': 23, 'beta': 1.0124069329596523, 'learning_rate': 0.005, 'batch_size': 48, 'epochs': 605}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'latent_dim': IntDistribution(high=100, log=False, low=10, step=1), 'beta': FloatDistribution(high=10.0, log=False, low=1.0, step=None), 'learning_rate': CategoricalDistribution(choices=(0.005, 0.001, 0.0001, 1e-05, 1e-06)), 'batch_size': IntDistribution(high=112, log=False, low=16, step=32), 'epochs': IntDistribution(high=905, log=False, low=5, step=100)}, trial_id=37, value=None)
Best trial: [101.85667588975694]
Best hyperparameters: {'latent_dim': 23, 'beta': 1.0124069329596523, 'learning_rate': 0.005, 'batch_size': 48, 'epochs': 605}
