In [1]:
import sys
import os
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
sys.path.append("../../")
sys.path.append("../")
from utils.evaluation import evaluate
from utils.metrics import Metrics
from models.NNGenerator import AdaptableDenseModel
import optuna

In [2]:
import neptune
from neptune_pytorch import NeptuneLogger
from neptune.utils import stringify_unsupported
from dotenv import load_dotenv
load_dotenv()

True

In [3]:

import pickle
from git import Repo

# Get the git root directory
repo = Repo(".", search_parent_directories=True)
git_root = repo.git.rev_parse("--show-toplevel")

# Load data
X_Train_pd = pickle.load(open(f"{git_root}/data/splits/train/X_pandas.pck", "rb"))
y_Train_pd = pickle.load(open(f"{git_root}/data/splits/train/y_pandas.pck", "rb"))

X_Val_pd = pickle.load(open(f"{git_root}/data/splits/val/X_pandas.pck", "rb"))
y_Val_pd = pickle.load(open(f"{git_root}/data/splits/val/y_pandas.pck", "rb"))

In [4]:
X_Train = torch.tensor(X_Train_pd.values, dtype=torch.float32)
y_Train = torch.tensor(y_Train_pd.values, dtype=torch.float32)

X_Val = torch.tensor(X_Val_pd.values, dtype=torch.float32)
y_Val = torch.tensor(y_Val_pd.values, dtype=torch.float32)

train_dataset = torch.utils.data.TensorDataset(X_Train, y_Train)
val_dataset = torch.utils.data.TensorDataset(X_Val, y_Val)

In [5]:
def label_from_logits(y_hat: torch.Tensor, threshold = 0.5) -> torch.Tensor:
    with torch.no_grad():
        y_pred_tensor = (torch.sigmoid(y_hat) > threshold).float()
    return y_pred_tensor


def evaluate_from_dataframe(X: pd.DataFrame):
    X_tensor = torch.tensor(X.to_numpy(), dtype=torch.float32)
    
    #model: a pytorch model, which transforms X -> y in torch.Tensor format
    model.eval()
    model.cpu()
    y_pred_tensor = label_from_logits(model(X_tensor))
    
    return pd.DataFrame(y_pred_tensor.numpy())

def training(model, optimizer, criterion, train_dataloader, val_dataloder, epochs, device, neptune_logger=None, run = None, trial = None):
    criterion = criterion.to(device)
    model = model.to(device)
    train_acc = 0
    val_acc = 0
    train_loss = 0
    val_loss = 0
    val_acc = 0
    val_precision = 0
    val_recall = 0
    val_f1 = 0
    for epoch in tqdm(range(epochs)):
        model.train()
        train_acc = 0
        val_acc = 0
        train_loss = 0
        val_loss = 0
        for x,y in train_dataloader:
            x = x.to(device)
            y = y.to(device)
            y_pred = model(x)
            loss = criterion(y_pred, y)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            train_acc += Metrics.calculate_accuracy(y.cpu().numpy(), label_from_logits(y_pred).cpu().numpy())
        
        with torch.no_grad():
            model.eval()
            y_preds = np.array([])
            y_trues = np.array([])
            for x,y in val_dataloder:
                x = x.to(device)
                y = y.to(device)
                y_pred = model(x)
                val_loss += criterion(y_pred, y)
                y_pred = label_from_logits(y_pred).cpu().numpy()
                y = y.cpu().numpy()
                y_preds = np.vstack((y_preds, y_pred)) if y_preds.size else y_pred
                y_trues = np.vstack((y_trues, y)) if y_trues.size else y

            val_acc = Metrics.calculate_accuracy(y_preds, y_trues)
            val_precision = Metrics.calculate_precision(y_preds, y_trues)
            val_recall = Metrics.calculate_recall(y_preds, y_trues)
            val_f1 = Metrics.calculate_f1_score(y_preds, y_trues)

        if neptune_logger:
            run[neptune_logger.base_namespace]['train_loss'].append(train_loss)
            run[neptune_logger.base_namespace]['train_acc'].append(train_acc/len(train_dataloader))
            run[neptune_logger.base_namespace]['val_loss'].append(val_loss)
            run[neptune_logger.base_namespace]['val_acc'].append(val_acc)
            run[neptune_logger.base_namespace]['val_precision'].append(val_precision)
            run[neptune_logger.base_namespace]['val_recall'].append(val_recall)
            run[neptune_logger.base_namespace]['val_f1'].append(val_f1)

        #print(f"Epoch: {epoch} Train Loss: {train_loss} Train Acc: {train_acc/len(train_dataloader)} Val Loss: {val_loss} Val Acc: {val_acc}")
    return val_f1, val_acc

In [6]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)

cuda


In [7]:
def objective(trial):
    input_dim = X_Train.shape[1]
    output_dim = y_Train.shape[1]
    n_layers = trial.suggest_int("n_layers", 3, 12)
    parameters = {
        "n_layers": n_layers,
        "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True),
        "layernorm": trial.suggest_categorical("layernorm", [True, False]),
        "batchnorm": True,
        "weight_decay": trial.suggest_float("weight_decay", 1e-6, 1e-1, log=True),
        "dropout_prob": trial.suggest_float("dropout_prob", 0.0, 0.5),
        "leaky_relu_slope": trial.suggest_float("leaky_relu_slope", 0.0, 0.5),
        "batchsize": trial.suggest_int("batchsize", 32, 512),
        "epochs": trial.suggest_int("epochs", 25, 75),
        "device": device,
        "optimizer": "AdamW",
        "criterion": "BCEWithLogitsLoss",
        "LayerInitialization": "Xavier",
        "drop_out": True,
        "shuffle": True,
        "model_name": "AdaptableDenseModel",
        "Threshold": 0.5,
    }
    sizes = []
    for i in range(n_layers):
        last_size = input_dim if i == 0 else sizes[-1]
        size = trial.suggest_int(f"n_units_l{i}", output_dim, last_size)
        sizes.append(size)
    parameters["hidden_layer_sizes"] = sizes


    model = AdaptableDenseModel(input_dim=input_dim,
    output_dim=output_dim,
    hidden_layer_sizes=parameters["hidden_layer_sizes"],
    dropout_prob=parameters["dropout_prob"],
    leaky_relu_slope=parameters["leaky_relu_slope"],
    use_layer_norm=parameters["layernorm"],
    use_batch_norm=parameters["batchnorm"])

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=parameters["batchsize"], shuffle=parameters["shuffle"])
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=512, shuffle=False)
    optimizer = torch.optim.AdamW(model.parameters(), lr=parameters["learning_rate"], weight_decay=parameters["weight_decay"])
    criterion = nn.BCEWithLogitsLoss()


    val_f1, val_acc = training(model, optimizer, criterion, train_dataloader, val_dataloader, parameters["epochs"], device=device, trial = trial)
    
    return val_f1, val_acc

    

In [8]:
import neptune.integrations.optuna as npt_utils
optuna.logging.set_verbosity(optuna.logging.ERROR)

params = {
        "batchnorm": True,
        "device": device,
        "optimizer": "AdamW",
        "criterion": "BCEWithLogitsLoss",
        "LayerInitialization": "Xavier",
        "drop_out": True,
        "shuffle": True,
        "model_name": "AdaptableDenseModel",
        "Threshold": 0.5,
    }
run = neptune.init_run(
    api_token=os.getenv("NEPTUNE_API_KEY"),
    project=os.getenv("NEPTUNE_PROJECT_NAME"),
    name="Optuna - AdaptableDenseModel",
    )
run["parameters"] = params

neptune_callback = npt_utils.NeptuneCallback(run)
study = optuna.create_study(directions=["maximize","maximize"])
study.optimize(objective, n_trials=100, callbacks=[neptune_callback])

run.stop()



[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/JPL/rna-sequencing/e/RNAS-149


        Convert the value to a supported type, such as a string or float, or use stringify_unsupported(obj)
        for dictionaries or collections that contain unsupported values.
        For more, see https://docs.neptune.ai/help/value_of_unsupported_type


  0%|          | 0/69 [00:00<?, ?it/s]

        Convert the value to a supported type, such as a string or float, or use stringify_unsupported(obj)
        for dictionaries or collections that contain unsupported values.
        For more, see https://docs.neptune.ai/help/value_of_unsupported_type


  0%|          | 0/69 [00:00<?, ?it/s]

  0%|          | 0/66 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/54 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

  0%|          | 0/66 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/74 [00:00<?, ?it/s]

  0%|          | 0/66 [00:00<?, ?it/s]

  0%|          | 0/68 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/60 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/29 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

  0%|          | 0/29 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/68 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/66 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/69 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/70 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/69 [00:00<?, ?it/s]

  0%|          | 0/69 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/68 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/60 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

  0%|          | 0/69 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/66 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

  0%|          | 0/66 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

  0%|          | 0/70 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/66 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

[neptune] [info   ] Shutting down background jobs, please wait a moment...
[neptune] [info   ] Done!
[neptune] [info   ] Waiting for the remaining 13 operations to synchronize with Neptune. Do not kill this process.
[neptune] [info   ] All 13 operations synced, thanks for waiting!
[neptune] [info   ] Explore the metadata in the Neptune app: https://app.neptune.ai/JPL/rna-sequencing/e/RNAS-149/metadata


In [12]:
import pickle

pickle.dump(study, open(f"{git_root}/experiments/generating/AdaptableNNStudy.pkl", "wb"))