In [1]:
import pandas as pd
import pprint as pp
import optuna

import torch
import torchtext

from sklearn.model_selection import train_test_split

from preprocessing_utils import preprocess_data_for_nn

from torch_nn_model_2 import (
    get_best_available_device,
    train_nn_model,
    NNHyperparams,
)

torchtext.disable_torchtext_deprecation_warning()



In [2]:
def load_dataset(
    sequence_cols: list[str],
    char_cols: list[str],
    vector_cols: list[str],
    scalar_cols: list[str],
    vector_dims: dict[str, int],
    load_fresh: bool = False,
    sample_size: int | None = None,
):
    """
    Load the dataset, optionally reloading it fresh.
    """
    if load_fresh:
        print("Loading dataset fresh...")
        df = pd.read_csv("dataset/apk_analysis_dataset.csv")

        if sample_size is not None:
            print(f"Sampling {sample_size} rows from the dataset...")
            df = df.sample(sample_size, random_state=42)

        df, vocab_dict = preprocess_data_for_nn(
            df,
            sequence_cols=sequence_cols,
            char_cols=char_cols,
            vector_cols=vector_cols,
            scalar_cols=scalar_cols,
            vector_dims=vector_dims,
        )

        print("Saving preprocessed dataset and vocab_dict...")

        df.to_pickle("dataset/processed_apk_analysis_dataset.pkl")
        torch.save(vocab_dict, "dataset/processed_vocab_dict.pth")

        print("Preprocessing complete and saved.")

    else:
        print("Loading last preprocessed dataset...")
        df = pd.read_pickle("dataset/processed_apk_analysis_dataset.pkl")
        vocab_dict = torch.load("dataset/processed_vocab_dict.pth")

    return df, vocab_dict

In [3]:
SEQUENCE_COLS = [
    "activities_list",
    "services_list",
    "receivers_list",
    "permissions_list",
    "api_calls_list",
]

CHAR_COLS = ["fuzzy_hash"]
VECTOR_COLS = ["opcode_counts"]
SCALAR_COLS = ["file_size"]
VECTOR_DIMS = {"opcode_counts": 768}

# Load dataset
df, vocab_dict = load_dataset(
    SEQUENCE_COLS,
    CHAR_COLS,
    VECTOR_COLS,
    SCALAR_COLS,
    VECTOR_DIMS,
    load_fresh=False,
    sample_size=None,
)

df, df_sample = train_test_split(
    df, test_size=0.2, random_state=42, stratify=df["is_malware"]
)

device = get_best_available_device()

Loading last preprocessed dataset...
Using CUDA device: NVIDIA GeForce RTX 4070 SUPER



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\david\miniconda3\envs\python311\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\david\miniconda3\envs\python311\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "c:\Users\david\miniconda3\envs\python311\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start(

In [4]:
display(df.info())
df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 15977 entries, 19862 to 3961
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   file_size         15977 non-null  float64
 1   fuzzy_hash        15977 non-null  object 
 2   activities_list   15977 non-null  object 
 3   services_list     15977 non-null  object 
 4   receivers_list    15977 non-null  object 
 5   permissions_list  15977 non-null  object 
 6   api_calls_list    15977 non-null  object 
 7   opcode_counts     15977 non-null  object 
 8   is_malware        15977 non-null  int64  
dtypes: float64(1), int64(1), object(7)
memory usage: 1.2+ MB


None

Unnamed: 0,file_size,fuzzy_hash,activities_list,services_list,receivers_list,permissions_list,api_calls_list,opcode_counts,is_malware
19862,4939479.0,"[47, 64, 18, 17, 62, 19, 63, 63, 29, 34, 31, 4...","[238986, 10, 238988, 238983, 3, 238985, 4, 22,...","[9, 4, 13, 5, 6, 8, 11, 3, 12, 10, 20857, 0, 0...","[4, 8, 13, 16, 11, 7, 12, 6, 10, 5, 9, 0, 0, 0...","[20, 3, 23, 17, 4, 5, 11, 41, 0, 0, 0, 0, 0, 0...","[175378, 2972, 4510, 6430, 5098, 16823, 16449,...","[268.0, 2926.0, 3840.0, 0.0, 700.0, 619.0, 0.0...",0
7826,4560508.0,"[36, 17, 10, 59, 6, 65, 21, 62, 17, 39, 62, 14...","[179512, 179513, 179511, 18, 179509, 179510, 1...","[20, 25, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[18, 17, 11402, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[13, 9, 7, 24, 46, 21, 19, 4, 12, 8, 6, 22, 5,...","[1012942, 1012943, 1012944, 1012945, 1013007, ...","[164.0, 2642.0, 44.0, 0.0, 49.0, 10.0, 0.0, 32...",1
18535,4335600.0,"[53, 46, 15, 50, 10, 22, 23, 21, 21, 34, 50, 3...","[97608, 44202, 97613, 4, 97614, 22, 97609, 3, ...","[13, 10, 12, 11, 4, 5, 9, 0, 0, 0, 0, 0, 0, 0,...","[6, 13, 16, 12, 4, 9, 7, 10, 8, 11, 0, 0, 0, 0...","[14, 7, 18, 17, 5, 3, 4, 41, 12, 13, 23, 10, 3...","[83888, 197158, 2972, 16823, 16449, 16835, 167...","[389.0, 2931.0, 5771.0, 0.0, 532.0, 537.0, 0.0...",0
7789,2120068.0,"[56, 12, 11, 7, 57, 50, 12, 51, 34, 17, 45, 16...","[194481, 12198, 12199, 23835, 23836, 0, 0, 0, ...","[2126, 2127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[22, 163, 11, 4, 182, 7, 21, 8, 3, 9, 15, 6, 2...","[302565, 121966, 302566, 418308, 302567, 41845...","[139.0, 3710.0, 541.0, 0.0, 113.0, 53.0, 0.0, ...",1
2356,269692.0,"[3, 30, 28, 62, 49, 62, 38, 33, 17, 45, 36, 62...","[721, 931, 707, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[212, 214, 213, 211, 284, 0, 0, 0, 0, 0, 0, 0,...","[153, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[6, 9, 15, 3, 4, 19, 31, 10, 28, 62, 0, 0, 0, ...","[18370, 58927, 73, 84, 685, 731, 277, 592, 223...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, ...",1


In [5]:
# Make sure df_processed, vocab_dict, SEQUENCE_COLS, etc. are defined in your notebook scope
# df_processed = df # Or however you get your fully preprocessed DataFrame for the NN
# device = get_best_available_device() # Already defined


def objective(trial: optuna.trial.Trial) -> float:
    """
    Objective function for Optuna to optimize.
    A trial will suggest a set of hyperparameters, train the model, and return a score.
    """
    # Define the search space for hyperparameters
    # These are examples, adjust ranges and types based on your intuition/needs
    lr = trial.suggest_float("max_learning_rate", 1e-5, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])
    embedding_dim = trial.suggest_categorical("embedding_dim", [64, 128, 256])

    hidden_dims_one_layer = [(16,), (32,), (64,), (128,), (256,)]
    hidden_dims_two_layers = [
        (256, 128),
        (256, 64),
        (256, 32),
        (256, 16),
        (128, 64),
        (128, 32),
        (128, 16),
        (64, 32),
        (64, 16),
        (32, 16),
    ]
    hiddem_dims_possible = hidden_dims_one_layer + hidden_dims_two_layers
    
    hidden_dims_idx = trial.suggest_int("hidden_dims_idx", 0, len(hiddem_dims_possible) - 1)
    hidden_dims = list(hiddem_dims_possible[hidden_dims_idx])
    print(f"Selected hidden_dims: {hidden_dims} (index {hidden_dims_idx})")

    dropout = trial.suggest_float("dropout", 0.1, 0.5, step=0.1)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)

    # Create NNHyperparams instance with suggested values
    # Keep other hyperparams fixed or include them in the trial if you want to tune them
    current_hyperparams = NNHyperparams(
        batch_size=batch_size,
        max_learning_rate=lr,
        epochs=5,
        early_stopping=True,
        patience=2,
        optimizer="adamw",  # Consider fixing or tuning
        weight_decay=weight_decay,
        embedding_dim=embedding_dim,
        hidden_dims=hidden_dims,
        dropout=dropout,
        seq_pooling="mean",
        n_classes=2,
        label_col="is_malware",
        dataloader_num_workers=2,
        dataloader_pin_memory=True,
        dataloader_persistent_workers=True,
        grad_scaler_max_norm=1.0,
    )

    print(f"\n--- Optuna Trial ---")
    print(f"Suggested Hyperparams: {current_hyperparams}")

    trained_model, results, fitted_scalers = train_nn_model(
        df=df_sample,  # Use your full preprocessed DataFrame or a sample
        vocab_dict=vocab_dict,
        sequence_cols=SEQUENCE_COLS,
        scalar_cols=SCALAR_COLS,
        char_cols=CHAR_COLS,
        vector_cols=VECTOR_COLS,
        vector_dims=VECTOR_DIMS,
        hyperparams=current_hyperparams,
        device=device,
        train_split_ratio=0.5,
        scoring_metric="f1",
        random_seed=42,
    )

    # unload the model to free memory
    del trained_model
    torch.cuda.empty_cache()

    import gc

    gc.collect()

    # Get the metric to optimize from the results
    # Ensure this matches the 'scoring_metric' used in train_nn_model
    # and the direction of optimization in optuna.create_study
    metric_to_optimize = results["final_metrics_best_model"].get("f1", 0.0)

    print(f"Trial finished. Validation Recall: {metric_to_optimize:.4f}")

    # Optuna tries to maximize the returned value by default if direction='maximize'
    return metric_to_optimize

In [6]:
# Create a study object. Define the direction of optimization.
# For metrics like F1, Recall, Precision, Accuracy, ROC AUC, PR AUC, you want to "maximize".
# If you were optimizing loss, you would use "minimize".
study = optuna.create_study(direction="maximize", study_name="debrim_nn_hyperopt")

# Start the optimization. n_trials is the number of different hyperparameter sets to try.
# Start with a small number (e.g., 10-20) to test, then increase for a more thorough search.
n_trials = 100
study.optimize(objective, n_trials=n_trials)

# Print the best hyperparameters found
print("\n--- Optuna Study Complete ---")
print(f"Number of finished trials: {len(study.trials)}")
print("Best trial:")
best_trial = study.best_trial

print(f"  Value (F1 Score): {best_trial.value:.4f}")
print("  Params: ")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

# You can now use best_trial.params to configure your NNHyperparams
# for a final training run or for cross-validation.
best_hyperparams_dict = best_trial.params

hidden_dims_one_layer = [(16,), (32,), (64,), (128,), (256,)]
hidden_dims_two_layers = [
    (256, 128),
    (256, 64),
    (256, 32),
    (256, 16),
    (128, 64),
    (128, 32),
    (128, 16),
    (64, 32),
    (64, 16),
    (32, 16),
]
hiddem_dims_possible = hidden_dims_one_layer + hidden_dims_two_layers

# Extract all of the trials and their hyperparameters and show them
print("\nAll trials and their hyperparameters:")
all_trials = study.trials
for trial in all_trials:
    print(f"Trial {trial.number}: Value = {trial.value}, Params = {trial.params}")

print("\nTrials df:")
all_trails_df = study.trials_dataframe()
all_trails_df.to_csv("dataset/optuna_trials_debrim_nn.csv", index=False)
display(all_trails_df)

final_nn_hyperparams = NNHyperparams(
    batch_size=best_hyperparams_dict["batch_size"],
    max_learning_rate=best_hyperparams_dict["max_learning_rate"],
    epochs=20,
    early_stopping=True,
    patience=5,
    optimizer="adamw",
    weight_decay=best_hyperparams_dict["weight_decay"],
    embedding_dim=best_hyperparams_dict["embedding_dim"],
    hidden_dims=hiddem_dims_possible[best_hyperparams_dict["hidden_dims_idx"]],
    dropout=best_hyperparams_dict["dropout"],
    seq_pooling="mean",
    label_col="is_malware",
    n_classes=2,
    dataloader_num_workers=0,
    dataloader_pin_memory=True,
    dataloader_persistent_workers=False,
    grad_scaler_max_norm=1.0,
)

print("\nBest NNHyperparams based on Optuna search:")
pp.pprint(final_nn_hyperparams)

# Now you can use final_nn_hyperparams to train your model, perhaps with cross_val_train_nn_model
# nn_results, best_nn_model = cross_val_train_nn_model(
#     df=df, # Use your full preprocessed DataFrame
#     vocab_dict=vocab_dict,
#     sequence_cols=SEQUENCE_COLS,
#     scalar_cols=SCALAR_COLS,
#     char_cols=CHAR_COLS,
#     vector_cols=VECTOR_COLS,
#     vector_dims=VECTOR_DIMS,
#     hyperparams=final_nn_hyperparams, # Use the tuned hyperparameters
#     n_folds=5, # Example: 5 folds for final evaluation
#     n_repetitions=1,
#     scoring_metric="f1", # Or your preferred final metric
#     device=device,
#     random_seed=42,
# )

[I 2025-06-22 10:21:52,274] A new study created in memory with name: debrim_nn_hyperopt


Selected hidden_dims: [128, 32] (index 10)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=0.0023856730313390243, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.6634066787303807e-05, embedding_dim=128, hidden_dims=[128, 32], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.7262, LR: 9.56e-05
Epoch 1, Batch 26/125, Train Loss: 0.6629, LR: 2.12e-04
Epoch 1, Batch 52/125, Train Loss: 0.5130, LR: 5.2

[I 2025-06-22 10:23:35,920] Trial 0 finished with value: 0.9674465920651069 and parameters: {'max_learning_rate': 0.0023856730313390243, 'batch_size': 16, 'embedding_dim': 128, 'hidden_dims_idx': 10, 'dropout': 0.4, 'weight_decay': 1.6634066787303807e-05}. Best is trial 0 with value: 0.9674465920651069.


Trial finished. Validation Recall: 0.9674
Selected hidden_dims: [32, 16] (index 14)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=3.0173055455242765e-05, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0003004062167267393, embedding_dim=64, hidden_dims=[32, 16], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.7047, LR: 1.22e-06
Epoch 1, Batch 13/63, Train Loss: 0.6871, LR: 2.78e-

[I 2025-06-22 10:24:30,653] Trial 1 finished with value: 0.8289085545722714 and parameters: {'max_learning_rate': 3.0173055455242765e-05, 'batch_size': 32, 'embedding_dim': 64, 'hidden_dims_idx': 14, 'dropout': 0.30000000000000004, 'weight_decay': 0.0003004062167267393}. Best is trial 0 with value: 0.9674465920651069.


Trial finished. Validation Recall: 0.8289
Selected hidden_dims: [32, 16] (index 14)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=0.00047686427118995696, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0007098155083968054, embedding_dim=256, hidden_dims=[32, 16], dropout=0.1, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.6932, LR: 1.91e-05
Epoch 1, Batch 26/125, Train Loss: 0.6870, LR: 4.23e-05
Epoch 1, B

[I 2025-06-22 10:40:31,918] Trial 2 finished with value: 0.9685785536159601 and parameters: {'max_learning_rate': 0.00047686427118995696, 'batch_size': 16, 'embedding_dim': 256, 'hidden_dims_idx': 14, 'dropout': 0.1, 'weight_decay': 0.0007098155083968054}. Best is trial 2 with value: 0.9685785536159601.


Trial finished. Validation Recall: 0.9686
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=8.640717728312954e-05, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=4.8868621856571994e-05, embedding_dim=128, hidden_dims=[64], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.6506, LR: 3.48e-06
Epoch 1, Batch 13/63, Train Loss: 0.6883, LR: 7.96e-06
Epoch 1, Batch 26/63,

[I 2025-06-22 10:41:50,317] Trial 3 finished with value: 0.9277166108185735 and parameters: {'max_learning_rate': 8.640717728312954e-05, 'batch_size': 32, 'embedding_dim': 128, 'hidden_dims_idx': 2, 'dropout': 0.4, 'weight_decay': 4.8868621856571994e-05}. Best is trial 2 with value: 0.9685785536159601.


Trial finished. Validation Recall: 0.9277
Selected hidden_dims: [32, 16] (index 14)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0005493676823427794, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=3.054951462634546e-05, embedding_dim=256, hidden_dims=[32, 16], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6958, LR: 2.26e-05
Epoch 1, Batch 7/32, Train Loss: 0.6919, LR: 5.88e-05
Epoch 1, Batch

[I 2025-06-22 10:48:57,538] Trial 4 finished with value: 0.962037962037962 and parameters: {'max_learning_rate': 0.0005493676823427794, 'batch_size': 64, 'embedding_dim': 256, 'hidden_dims_idx': 14, 'dropout': 0.2, 'weight_decay': 3.054951462634546e-05}. Best is trial 2 with value: 0.9685785536159601.


Trial finished. Validation Recall: 0.9620
Selected hidden_dims: [16] (index 0)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=3.72307572305381e-05, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=9.980271361663464e-06, embedding_dim=64, hidden_dims=[16], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.7324, LR: 1.50e-06
Epoch 1, Batch 13/63, Train Loss: 0.6952, LR: 3.43e-06
Epoch 1, Batch 26/63, Tr

[I 2025-06-22 10:49:52,828] Trial 5 finished with value: 0.8018528049408131 and parameters: {'max_learning_rate': 3.72307572305381e-05, 'batch_size': 32, 'embedding_dim': 64, 'hidden_dims_idx': 0, 'dropout': 0.2, 'weight_decay': 9.980271361663464e-06}. Best is trial 2 with value: 0.9685785536159601.


Trial finished. Validation Recall: 0.8019
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=0.00024161654753634412, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=2.361905972968227e-06, embedding_dim=256, hidden_dims=[64], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.6813, LR: 9.73e-06
Epoch 1, Batch 13/63, Train Loss: 0.6836, LR: 2.23e-05
Epoch 1, Batch 26/63,

[I 2025-06-22 11:04:33,954] Trial 6 finished with value: 0.965 and parameters: {'max_learning_rate': 0.00024161654753634412, 'batch_size': 32, 'embedding_dim': 256, 'hidden_dims_idx': 2, 'dropout': 0.4, 'weight_decay': 2.361905972968227e-06}. Best is trial 2 with value: 0.9685785536159601.


Trial finished. Validation Recall: 0.9650
Selected hidden_dims: [256, 128] (index 5)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=0.00463248659727382, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0002460523448470818, embedding_dim=128, hidden_dims=[256, 128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.6905, LR: 1.87e-04
Epoch 1, Batch 13/63, Train Loss: 0.6624, LR: 4.27e-04
Epoch 1, Bat

[I 2025-06-22 11:06:11,764] Trial 7 finished with value: 0.968968968968969 and parameters: {'max_learning_rate': 0.00463248659727382, 'batch_size': 32, 'embedding_dim': 128, 'hidden_dims_idx': 5, 'dropout': 0.5, 'weight_decay': 0.0002460523448470818}. Best is trial 7 with value: 0.968968968968969.


Trial finished. Validation Recall: 0.9690
Selected hidden_dims: [32, 16] (index 14)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=3.597758807424261e-05, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.5254536321490948e-05, embedding_dim=128, hidden_dims=[32, 16], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.6944, LR: 1.44e-06
Epoch 1, Batch 26/125, Train Loss: 0.8098, LR: 3.19e-06
Epoch 1, B

[I 2025-06-22 11:08:17,447] Trial 8 finished with value: 0.7702407002188184 and parameters: {'max_learning_rate': 3.597758807424261e-05, 'batch_size': 16, 'embedding_dim': 128, 'hidden_dims_idx': 14, 'dropout': 0.2, 'weight_decay': 1.5254536321490948e-05}. Best is trial 7 with value: 0.968968968968969.


Trial finished. Validation Recall: 0.7702
Selected hidden_dims: [64, 16] (index 13)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.00019903972619363417, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.2916797988710483e-06, embedding_dim=128, hidden_dims=[64, 16], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.7201, LR: 8.17e-06
Epoch 1, Batch 7/32, Train Loss: 0.6809, LR: 2.13e-05
Epoch 1, Bat

[I 2025-06-22 11:10:04,329] Trial 9 finished with value: 0.9194536033914272 and parameters: {'max_learning_rate': 0.00019903972619363417, 'batch_size': 64, 'embedding_dim': 128, 'hidden_dims_idx': 13, 'dropout': 0.2, 'weight_decay': 1.2916797988710483e-06}. Best is trial 7 with value: 0.968968968968969.


Trial finished. Validation Recall: 0.9195
Selected hidden_dims: [256, 64] (index 6)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=0.009474260570458054, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00014281990984921708, embedding_dim=128, hidden_dims=[256, 64], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.7025, LR: 3.82e-04
Epoch 1, Batch 13/63, Train Loss: 0.5231, LR: 8.73e-04
Epoch 1, Bat

[I 2025-06-22 11:11:22,458] Trial 10 finished with value: 0.9687814702920443 and parameters: {'max_learning_rate': 0.009474260570458054, 'batch_size': 32, 'embedding_dim': 128, 'hidden_dims_idx': 6, 'dropout': 0.5, 'weight_decay': 0.00014281990984921708}. Best is trial 7 with value: 0.968968968968969.


Trial finished. Validation Recall: 0.9688
Selected hidden_dims: [256, 32] (index 7)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=0.009614878948685204, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00015674563930015588, embedding_dim=128, hidden_dims=[256, 32], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.6793, LR: 3.87e-04
Epoch 1, Batch 13/63, Train Loss: 0.4864, LR: 8.86e-04
Epoch 1, Bat

[I 2025-06-22 11:12:58,381] Trial 11 finished with value: 0.9612870789341378 and parameters: {'max_learning_rate': 0.009614878948685204, 'batch_size': 32, 'embedding_dim': 128, 'hidden_dims_idx': 7, 'dropout': 0.5, 'weight_decay': 0.00015674563930015588}. Best is trial 7 with value: 0.968968968968969.


Trial finished. Validation Recall: 0.9613
Selected hidden_dims: [256, 64] (index 6)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=0.008975360627093466, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00011953356837977742, embedding_dim=128, hidden_dims=[256, 64], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.7025, LR: 3.61e-04
Epoch 1, Batch 13/63, Train Loss: 0.5371, LR: 8.27e-04
Epoch 1, Bat

[I 2025-06-22 11:14:16,169] Trial 12 finished with value: 0.9695740365111561 and parameters: {'max_learning_rate': 0.008975360627093466, 'batch_size': 32, 'embedding_dim': 128, 'hidden_dims_idx': 6, 'dropout': 0.5, 'weight_decay': 0.00011953356837977742}. Best is trial 12 with value: 0.9695740365111561.


Trial finished. Validation Recall: 0.9696
Selected hidden_dims: [256, 128] (index 5)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=0.002218128701578755, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0007959473978236647, embedding_dim=128, hidden_dims=[256, 128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.6905, LR: 8.93e-05
Epoch 1, Batch 13/63, Train Loss: 0.6849, LR: 2.04e-04
Epoch 1, Ba

[I 2025-06-22 11:16:02,522] Trial 13 finished with value: 0.966144517433047 and parameters: {'max_learning_rate': 0.002218128701578755, 'batch_size': 32, 'embedding_dim': 128, 'hidden_dims_idx': 5, 'dropout': 0.5, 'weight_decay': 0.0007959473978236647}. Best is trial 12 with value: 0.9695740365111561.


Trial finished. Validation Recall: 0.9661
Selected hidden_dims: [128, 64] (index 9)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=0.0025714977363574146, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=9.412029297691989e-05, embedding_dim=128, hidden_dims=[128, 64], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.7015, LR: 1.04e-04
Epoch 1, Batch 13/63, Train Loss: 0.6787, LR: 2.37e-04
Epoch 1, Bat

[I 2025-06-22 11:17:17,298] Trial 14 finished with value: 0.968937875751503 and parameters: {'max_learning_rate': 0.0025714977363574146, 'batch_size': 32, 'embedding_dim': 128, 'hidden_dims_idx': 9, 'dropout': 0.5, 'weight_decay': 9.412029297691989e-05}. Best is trial 12 with value: 0.9695740365111561.


Trial finished. Validation Recall: 0.9689
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.004140090545887836, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00030671695223794833, embedding_dim=64, hidden_dims=[256], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.7020, LR: 1.70e-04
Epoch 1, Batch 7/32, Train Loss: 0.6390, LR: 4.43e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 11:18:03,117] Trial 15 finished with value: 0.9714285714285714 and parameters: {'max_learning_rate': 0.004140090545887836, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.4, 'weight_decay': 0.00030671695223794833}. Best is trial 15 with value: 0.9714285714285714.


Trial finished. Validation Recall: 0.9714
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0010462263883440192, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0004167913462931006, embedding_dim=64, hidden_dims=[128], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6957, LR: 4.30e-05
Epoch 1, Batch 7/32, Train Loss: 0.6904, LR: 1.12e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 11:18:49,624] Trial 16 finished with value: 0.9683257918552036 and parameters: {'max_learning_rate': 0.0010462263883440192, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.4, 'weight_decay': 0.0004167913462931006}. Best is trial 15 with value: 0.9714285714285714.


Trial finished. Validation Recall: 0.9683
Selected hidden_dims: [128, 64] (index 9)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=1.2927548094328915e-05, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=9.428790194913002e-05, embedding_dim=64, hidden_dims=[128, 64], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.7037, LR: 5.31e-07
Epoch 1, Batch 7/32, Train Loss: 0.6972, LR: 1.38e-

[I 2025-06-22 11:19:35,703] Trial 17 finished with value: 0.6664440734557596 and parameters: {'max_learning_rate': 1.2927548094328915e-05, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 9, 'dropout': 0.30000000000000004, 'weight_decay': 9.428790194913002e-05}. Best is trial 15 with value: 0.9714285714285714.


Trial finished. Validation Recall: 0.6664
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.004749449410493503, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=4.878047869259928e-05, embedding_dim=64, hidden_dims=[256], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.7020, LR: 1.95e-04
Epoch 1, Batch 7/32, Train Loss: 0.6304, LR: 5.08e-04
Epoch 1, Batch 14/32, T

[I 2025-06-22 11:20:16,533] Trial 18 finished with value: 0.9678068410462777 and parameters: {'max_learning_rate': 0.004749449410493503, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.4, 'weight_decay': 4.878047869259928e-05}. Best is trial 15 with value: 0.9714285714285714.


Trial finished. Validation Recall: 0.9678
Selected hidden_dims: [256, 32] (index 7)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.001166643397544032, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=5.847394899470475e-06, embedding_dim=64, hidden_dims=[256, 32], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6970, LR: 4.79e-05
Epoch 1, Batch 7/32, Train Loss: 0.6905, LR: 1.25e-04

[I 2025-06-22 11:21:02,259] Trial 19 finished with value: 0.9684526790185278 and parameters: {'max_learning_rate': 0.001166643397544032, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 7, 'dropout': 0.30000000000000004, 'weight_decay': 5.847394899470475e-06}. Best is trial 15 with value: 0.9714285714285714.


Trial finished. Validation Recall: 0.9685
Selected hidden_dims: [16] (index 0)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.004855607694347289, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00037698615185622356, embedding_dim=64, hidden_dims=[16], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.7276, LR: 1.99e-04
Epoch 1, Batch 7/32, Train Loss: 0.6578, LR: 5.20e-04
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 11:21:47,962] Trial 20 finished with value: 0.96996996996997 and parameters: {'max_learning_rate': 0.004855607694347289, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 0, 'dropout': 0.4, 'weight_decay': 0.00037698615185622356}. Best is trial 15 with value: 0.9714285714285714.


Trial finished. Validation Recall: 0.9700
Selected hidden_dims: [32] (index 1)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0052173042925733664, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00028864678014902686, embedding_dim=64, hidden_dims=[32], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6855, LR: 2.14e-04
Epoch 1, Batch 7/32, Train Loss: 0.6788, LR: 5.58e-04
Epoch 1, Batch 14/32, T

[I 2025-06-22 11:22:33,649] Trial 21 finished with value: 0.9695121951219512 and parameters: {'max_learning_rate': 0.0052173042925733664, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 1, 'dropout': 0.4, 'weight_decay': 0.00028864678014902686}. Best is trial 15 with value: 0.9714285714285714.


Trial finished. Validation Recall: 0.9695
Selected hidden_dims: [16] (index 0)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0013746221777638922, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0005069913565025937, embedding_dim=64, hidden_dims=[16], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.7276, LR: 5.65e-05
Epoch 1, Batch 7/32, Train Loss: 0.6787, LR: 1.47e-04
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 11:23:19,814] Trial 22 finished with value: 0.9606377678126558 and parameters: {'max_learning_rate': 0.0013746221777638922, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 0, 'dropout': 0.4, 'weight_decay': 0.0005069913565025937}. Best is trial 15 with value: 0.9714285714285714.


Trial finished. Validation Recall: 0.9606
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.005856428094809981, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00016062404609637696, embedding_dim=64, hidden_dims=[128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6918, LR: 2.41e-04
Epoch 1, Batch 7/32, Train Loss: 0.6336, LR: 6.27e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 11:24:05,650] Trial 23 finished with value: 0.9728370221327968 and parameters: {'max_learning_rate': 0.005856428094809981, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.5, 'weight_decay': 0.00016062404609637696}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9728
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0038913719133630937, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00019478758771012758, embedding_dim=64, hidden_dims=[128], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6935, LR: 1.60e-04
Epoch 1, Batch 7/32, Train Loss: 0.6483, LR: 4.16e-04
Epoch

[I 2025-06-22 11:24:51,481] Trial 24 finished with value: 0.9707661290322581 and parameters: {'max_learning_rate': 0.0038913719133630937, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.30000000000000004, 'weight_decay': 0.00019478758771012758}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9708
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0028066157056636713, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0002029163347206826, embedding_dim=64, hidden_dims=[128], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6935, LR: 1.15e-04
Epoch 1, Batch 7/32, Train Loss: 0.6610, LR: 3.00e-04
Epoch 

[I 2025-06-22 11:25:37,594] Trial 25 finished with value: 0.9698189134808853 and parameters: {'max_learning_rate': 0.0028066157056636713, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.30000000000000004, 'weight_decay': 0.0002029163347206826}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9698
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0016145541448908181, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.000978234595442972, embedding_dim=64, hidden_dims=[256], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.7005, LR: 6.63e-05
Epoch 1, Batch 7/32, Train Loss: 0.6673, LR: 1.73e-04
Epoch 1

[I 2025-06-22 11:26:23,375] Trial 26 finished with value: 0.9717457114026236 and parameters: {'max_learning_rate': 0.0016145541448908181, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.30000000000000004, 'weight_decay': 0.000978234595442972}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9717
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.000717485636560264, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.000965204756403488, embedding_dim=64, hidden_dims=[256], dropout=0.1, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6985, LR: 2.95e-05
Epoch 1, Batch 7/32, Train Loss: 0.6785, LR: 7.68e-05
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 11:27:10,620] Trial 27 finished with value: 0.9663823381836427 and parameters: {'max_learning_rate': 0.000717485636560264, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.1, 'weight_decay': 0.000965204756403488}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9664
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.00163149087413567, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=5.622740174265438e-05, embedding_dim=64, hidden_dims=[64], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6865, LR: 6.70e-05
Epoch 1, Batch 7/32, Train Loss: 0.6763, LR: 1.75e-04
Epoch 1, Batch 14/32, Trai

[I 2025-06-22 11:27:56,252] Trial 28 finished with value: 0.9681657402728651 and parameters: {'max_learning_rate': 0.00163149087413567, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 2, 'dropout': 0.5, 'weight_decay': 5.622740174265438e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9682
Selected hidden_dims: [256, 16] (index 8)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=0.001863101980913933, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0005585386420247639, embedding_dim=256, hidden_dims=[256, 16], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.6819, LR: 7.47e-05
Epoch 1, Batch 26/125, Train Loss: 0.6474, LR: 1.65e-04
Epoch 1, Ba

[I 2025-06-22 11:46:33,986] Trial 29 finished with value: 0.9712266532054518 and parameters: {'max_learning_rate': 0.001863101980913933, 'batch_size': 16, 'embedding_dim': 256, 'hidden_dims_idx': 8, 'dropout': 0.4, 'weight_decay': 0.0005585386420247639}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9712
Selected hidden_dims: [128, 16] (index 11)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=0.0008743174015609711, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0009120220055117417, embedding_dim=64, hidden_dims=[128, 16], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.6897, LR: 3.50e-05
Epoch 1, Batch 26/125, Train Loss: 0.6958, LR: 7.7

[I 2025-06-22 11:47:54,901] Trial 30 finished with value: 0.9667003027245207 and parameters: {'max_learning_rate': 0.0008743174015609711, 'batch_size': 16, 'embedding_dim': 64, 'hidden_dims_idx': 11, 'dropout': 0.30000000000000004, 'weight_decay': 0.0009120220055117417}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9667
Selected hidden_dims: [256, 16] (index 8)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=0.0018514500975375955, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0005340591213530987, embedding_dim=256, hidden_dims=[256, 16], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.6819, LR: 7.42e-05
Epoch 1, Batch 26/125, Train Loss: 0.6480, LR: 1.64e-04
Epoch 1, B

[I 2025-06-22 12:20:07,632] Trial 31 finished with value: 0.9693004529441369 and parameters: {'max_learning_rate': 0.0018514500975375955, 'batch_size': 16, 'embedding_dim': 256, 'hidden_dims_idx': 8, 'dropout': 0.4, 'weight_decay': 0.0005340591213530987}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9693
Selected hidden_dims: [128, 16] (index 11)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=0.003485002473217171, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.000531261972037709, embedding_dim=256, hidden_dims=[128, 16], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.6927, LR: 1.40e-04
Epoch 1, Batch 26/125, Train Loss: 0.6129, LR: 3.09e-04
Epoch 1, Ba

[I 2025-06-22 12:50:32,068] Trial 32 finished with value: 0.9705284552845529 and parameters: {'max_learning_rate': 0.003485002473217171, 'batch_size': 16, 'embedding_dim': 256, 'hidden_dims_idx': 11, 'dropout': 0.4, 'weight_decay': 0.000531261972037709}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9705
Selected hidden_dims: [256, 128] (index 5)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=0.005867103470513595, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0006241065370661097, embedding_dim=256, hidden_dims=[256, 128], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.6935, LR: 2.35e-04
Epoch 1, Batch 26/125, Train Loss: 0.5449, LR: 5.

[I 2025-06-22 13:12:34,972] Trial 33 finished with value: 0.9597644749754661 and parameters: {'max_learning_rate': 0.005867103470513595, 'batch_size': 16, 'embedding_dim': 256, 'hidden_dims_idx': 5, 'dropout': 0.30000000000000004, 'weight_decay': 0.0006241065370661097}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9598
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=0.0005252620660706986, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0003153548458680616, embedding_dim=256, hidden_dims=[256], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.7188, LR: 2.10e-05
Epoch 1, Batch 26/125, Train Loss: 0.6373, LR: 4.66e-05
Epoch 1, Batch 52/

[I 2025-06-22 13:40:16,782] Trial 34 finished with value: 0.9704556835252879 and parameters: {'max_learning_rate': 0.0005252620660706986, 'batch_size': 16, 'embedding_dim': 256, 'hidden_dims_idx': 4, 'dropout': 0.4, 'weight_decay': 0.0003153548458680616}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9705
Selected hidden_dims: [256, 64] (index 6)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0067740946004487875, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0009923518634051248, embedding_dim=256, hidden_dims=[256, 64], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6953, LR: 2.78e-04
Epoch 1, Batch 7/32, Train Loss: 0.6072, LR: 7.25e-04
Epoch 1, Batc

[I 2025-06-22 13:47:02,895] Trial 35 finished with value: 0.9620512820512821 and parameters: {'max_learning_rate': 0.0067740946004487875, 'batch_size': 64, 'embedding_dim': 256, 'hidden_dims_idx': 6, 'dropout': 0.5, 'weight_decay': 0.0009923518634051248}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9621
Selected hidden_dims: [256, 16] (index 8)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=0.0003129349723344516, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0003687749224284969, embedding_dim=64, hidden_dims=[256, 16], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.6852, LR: 1.25e-05
Epoch 1, Batch 26/125, Train Loss: 0.6869, LR: 2.78e-05
Epoch 1, Ba

[I 2025-06-22 13:48:18,854] Trial 36 finished with value: 0.9647532729103726 and parameters: {'max_learning_rate': 0.0003129349723344516, 'batch_size': 16, 'embedding_dim': 64, 'hidden_dims_idx': 8, 'dropout': 0.2, 'weight_decay': 0.0003687749224284969}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9648
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.00013682155455055448, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=7.288760580314762e-05, embedding_dim=256, hidden_dims=[64], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6819, LR: 5.62e-06
Epoch 1, Batch 7/32, Train Loss: 0.6850, LR: 1.46e-05
Epoch 

[I 2025-06-22 13:54:39,273] Trial 37 finished with value: 0.9478303266699171 and parameters: {'max_learning_rate': 0.00013682155455055448, 'batch_size': 64, 'embedding_dim': 256, 'hidden_dims_idx': 2, 'dropout': 0.30000000000000004, 'weight_decay': 7.288760580314762e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9478
Selected hidden_dims: [32] (index 1)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.002771202386714621, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=3.096098115678274e-05, embedding_dim=64, hidden_dims=[32], dropout=0.1, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6817, LR: 1.14e-04
Epoch 1, Batch 7/32, Train Loss: 0.6845, LR: 2.97e-04
Epoch 1, Batch 14/32, Tra

[I 2025-06-22 13:55:25,230] Trial 38 finished with value: 0.9711099847947289 and parameters: {'max_learning_rate': 0.002771202386714621, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 1, 'dropout': 0.1, 'weight_decay': 3.096098115678274e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9711
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=0.0017962176894353544, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00026043267834772787, embedding_dim=64, hidden_dims=[256], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.7057, LR: 7.20e-05
Epoch 1, Batch 26/125, Train Loss: 0.6545, LR: 1.59e-04
Epoch 1, Batch 52/

[I 2025-06-22 13:56:39,653] Trial 39 finished with value: 0.9703368526897939 and parameters: {'max_learning_rate': 0.0017962176894353544, 'batch_size': 16, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.4, 'weight_decay': 0.00026043267834772787}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9703
Selected hidden_dims: [256, 16] (index 8)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0008264216554763365, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0006483336727608591, embedding_dim=256, hidden_dims=[256, 16], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6899, LR: 3.39e-05
Epoch 1, Batch 7/32, Train Loss: 0.6761, LR: 8.84e-05
Epoch 1, Batc

[I 2025-06-22 14:03:21,676] Trial 40 finished with value: 0.9698189134808853 and parameters: {'max_learning_rate': 0.0008264216554763365, 'batch_size': 64, 'embedding_dim': 256, 'hidden_dims_idx': 8, 'dropout': 0.5, 'weight_decay': 0.0006483336727608591}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9698
Selected hidden_dims: [32] (index 1)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.002800823033426711, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=2.5232938155273707e-05, embedding_dim=64, hidden_dims=[32], dropout=0.1, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6817, LR: 1.15e-04
Epoch 1, Batch 7/32, Train Loss: 0.6843, LR: 3.00e-04
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 14:04:07,392] Trial 41 finished with value: 0.9706477732793523 and parameters: {'max_learning_rate': 0.002800823033426711, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 1, 'dropout': 0.1, 'weight_decay': 2.5232938155273707e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9706
Selected hidden_dims: [32] (index 1)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.003323361277491239, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.9175574886776315e-05, embedding_dim=64, hidden_dims=[32], dropout=0.1, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6817, LR: 1.36e-04
Epoch 1, Batch 7/32, Train Loss: 0.6797, LR: 3.56e-04
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 14:04:52,164] Trial 42 finished with value: 0.9718875502008032 and parameters: {'max_learning_rate': 0.003323361277491239, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 1, 'dropout': 0.1, 'weight_decay': 1.9175574886776315e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9719
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0034715859703401287, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=8.822125767359051e-06, embedding_dim=64, hidden_dims=[64], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6876, LR: 1.43e-04
Epoch 1, Batch 7/32, Train Loss: 0.6575, LR: 3.71e-04
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 14:05:36,254] Trial 43 finished with value: 0.970912738214644 and parameters: {'max_learning_rate': 0.0034715859703401287, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 2, 'dropout': 0.2, 'weight_decay': 8.822125767359051e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9709
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.007057379850224284, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=3.681290285793012e-06, embedding_dim=64, hidden_dims=[128], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6937, LR: 2.90e-04
Epoch 1, Batch 7/32, Train Loss: 0.6089, LR: 7.55e-04
Epoch 1, Batch 14/32, T

[I 2025-06-22 14:06:20,871] Trial 44 finished with value: 0.9717741935483871 and parameters: {'max_learning_rate': 0.007057379850224284, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.2, 'weight_decay': 3.681290285793012e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9718
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.007499416744701274, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=3.179465984234056e-06, embedding_dim=64, hidden_dims=[128], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6937, LR: 3.08e-04
Epoch 1, Batch 7/32, Train Loss: 0.6034, LR: 8.02e-04
Epoch 1, Batch 14/32, T

[I 2025-06-22 14:07:05,335] Trial 45 finished with value: 0.9708542713567839 and parameters: {'max_learning_rate': 0.007499416744701274, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.2, 'weight_decay': 3.179465984234056e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9709
Selected hidden_dims: [32] (index 1)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.006739027934069543, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.5441575789240153e-05, embedding_dim=64, hidden_dims=[32], dropout=0.1, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6817, LR: 2.77e-04
Epoch 1, Batch 7/32, Train Loss: 0.6496, LR: 7.21e-04
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 14:07:49,863] Trial 46 finished with value: 0.9704383282364933 and parameters: {'max_learning_rate': 0.006739027934069543, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 1, 'dropout': 0.1, 'weight_decay': 1.5441575789240153e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9704
Selected hidden_dims: [256, 128] (index 5)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.003829252053986063, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.3887739393895064e-06, embedding_dim=64, hidden_dims=[256, 128], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6950, LR: 1.57e-04
Epoch 1, Batch 7/32, Train Loss: 0.6611, LR: 4.10e-04
Epoch 1, Bat

[I 2025-06-22 14:08:34,433] Trial 47 finished with value: 0.9701568032372281 and parameters: {'max_learning_rate': 0.003829252053986063, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 5, 'dropout': 0.2, 'weight_decay': 1.3887739393895064e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9702
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=7.659257759498833e-05, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.0955839445386013e-05, embedding_dim=64, hidden_dims=[64], dropout=0.1, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6866, LR: 3.15e-06
Epoch 1, Batch 7/32, Train Loss: 0.6913, LR: 8.20e-06
Epoch 1, Batch 14/32, T

[I 2025-06-22 14:09:19,163] Trial 48 finished with value: 0.8674223755544603 and parameters: {'max_learning_rate': 7.659257759498833e-05, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 2, 'dropout': 0.1, 'weight_decay': 1.0955839445386013e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.8674
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.009653989451099665, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=5.000122699844908e-06, embedding_dim=64, hidden_dims=[128], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6937, LR: 3.97e-04
Epoch 1, Batch 7/32, Train Loss: 0.5755, LR: 1.03e-03
Epoch 1, Batch 14/32, T

[I 2025-06-22 14:09:59,195] Trial 49 finished with value: 0.968068930562595 and parameters: {'max_learning_rate': 0.009653989451099665, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.2, 'weight_decay': 5.000122699844908e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9681
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.007549771670678614, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=2.1697883347776256e-05, embedding_dim=64, hidden_dims=[256], dropout=0.1, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6985, LR: 3.10e-04
Epoch 1, Batch 7/32, Train Loss: 0.5738, LR: 8.08e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 14:10:38,939] Trial 50 finished with value: 0.9686552072800809 and parameters: {'max_learning_rate': 0.007549771670678614, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.1, 'weight_decay': 2.1697883347776256e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9687
Selected hidden_dims: [256, 64] (index 6)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=16, max_learning_rate=0.0013281427489252125, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=2.266944820555747e-06, embedding_dim=256, hidden_dims=[256, 64], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/125, Train Loss: 0.7286, LR: 5.32e-05
Epoch 1, Batch 26/125, Train Loss: 0.6848, LR: 1.1

[I 2025-06-22 14:34:38,623] Trial 51 finished with value: 0.9714285714285714 and parameters: {'max_learning_rate': 0.0013281427489252125, 'batch_size': 16, 'embedding_dim': 256, 'hidden_dims_idx': 6, 'dropout': 0.30000000000000004, 'weight_decay': 2.266944820555747e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9714
Selected hidden_dims: [256, 64] (index 6)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.002324635205605679, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.8681822334832825e-06, embedding_dim=64, hidden_dims=[256, 64], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6925, LR: 9.55e-05
Epoch 1, Batch 7/32, Train Loss: 0.6956, LR: 2.49e-0

[I 2025-06-22 14:35:24,862] Trial 52 finished with value: 0.9717741935483871 and parameters: {'max_learning_rate': 0.002324635205605679, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 6, 'dropout': 0.30000000000000004, 'weight_decay': 1.8681822334832825e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9718
Selected hidden_dims: [256, 128] (index 5)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.005021819508804257, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.0758615799743215e-06, embedding_dim=64, hidden_dims=[256, 128], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6950, LR: 2.06e-04
Epoch 1, Batch 7/32, Train Loss: 0.6524, LR: 5.37e-04
Epoch 1, Bat

[I 2025-06-22 14:36:08,961] Trial 53 finished with value: 0.9673202614379085 and parameters: {'max_learning_rate': 0.005021819508804257, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 5, 'dropout': 0.2, 'weight_decay': 1.0758615799743215e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9673
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.00237264866231627, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.866092646435174e-06, embedding_dim=64, hidden_dims=[128], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6935, LR: 9.74e-05
Epoch 1, Batch 7/32, Train Loss: 0.6661, LR: 2.54e-04
Epoch 1,

[I 2025-06-22 14:36:52,877] Trial 54 finished with value: 0.9677744209466264 and parameters: {'max_learning_rate': 0.00237264866231627, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.30000000000000004, 'weight_decay': 1.866092646435174e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9678
Selected hidden_dims: [256, 64] (index 6)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.004350249077303315, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=3.8407113101477e-06, embedding_dim=64, hidden_dims=[256, 64], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6925, LR: 1.79e-04
Epoch 1, Batch 7/32, Train Loss: 0.6845, LR: 4.66e-04
E

[I 2025-06-22 14:37:36,586] Trial 55 finished with value: 0.971342383107089 and parameters: {'max_learning_rate': 0.004350249077303315, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 6, 'dropout': 0.30000000000000004, 'weight_decay': 3.8407113101477e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9713
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=0.003383042351743511, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00013189627394376267, embedding_dim=64, hidden_dims=[256], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.7028, LR: 1.36e-04
Epoch 1, Batch 13/63, Train Loss: 0.6156, LR: 3.12e-04
Epoch 1, Batch 26/63,

[I 2025-06-22 14:38:23,858] Trial 56 finished with value: 0.9645885286783042 and parameters: {'max_learning_rate': 0.003383042351743511, 'batch_size': 32, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.2, 'weight_decay': 0.00013189627394376267}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9646
Selected hidden_dims: [16] (index 0)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.005674547319844819, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=4.043860907800924e-05, embedding_dim=128, hidden_dims=[16], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6954, LR: 2.33e-04
Epoch 1, Batch 7/32, Train Loss: 0.6365, LR: 6.07e-04
Epoch 1,

[I 2025-06-22 14:39:42,224] Trial 57 finished with value: 0.9705882352941176 and parameters: {'max_learning_rate': 0.005674547319844819, 'batch_size': 64, 'embedding_dim': 128, 'hidden_dims_idx': 0, 'dropout': 0.30000000000000004, 'weight_decay': 4.043860907800924e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9706
Selected hidden_dims: [256, 128] (index 5)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0020158600955377044, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=5.7300513877587005e-06, embedding_dim=64, hidden_dims=[256, 128], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6950, LR: 8.28e-05
Epoch 1, Batch 7/32, Train Loss: 0.6735, LR: 2.16e-04
Epoch 1, Ba

[I 2025-06-22 14:40:26,759] Trial 58 finished with value: 0.9668008048289738 and parameters: {'max_learning_rate': 0.0020158600955377044, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 5, 'dropout': 0.2, 'weight_decay': 5.7300513877587005e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9668
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0014379201746909933, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.6400262607564408e-06, embedding_dim=64, hidden_dims=[128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6918, LR: 5.91e-05
Epoch 1, Batch 7/32, Train Loss: 0.6823, LR: 1.54e-04
Epoch 1, Batch 14/32,

[I 2025-06-22 14:41:10,563] Trial 59 finished with value: 0.9703069954705587 and parameters: {'max_learning_rate': 0.0014379201746909933, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.5, 'weight_decay': 1.6400262607564408e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9703
Selected hidden_dims: [256, 32] (index 7)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0029766576498514823, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00018061845437668714, embedding_dim=64, hidden_dims=[256, 32], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6970, LR: 1.22e-04
Epoch 1, Batch 7/32, Train Loss: 0.6834, LR: 3.19e-

[I 2025-06-22 14:41:54,214] Trial 60 finished with value: 0.9718875502008032 and parameters: {'max_learning_rate': 0.0029766576498514823, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 7, 'dropout': 0.30000000000000004, 'weight_decay': 0.00018061845437668714}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9719
Selected hidden_dims: [256, 32] (index 7)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0031249881086333493, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=9.797624711192913e-05, embedding_dim=64, hidden_dims=[256, 32], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6970, LR: 1.28e-04
Epoch 1, Batch 7/32, Train Loss: 0.6829, LR: 3.34e-0

[I 2025-06-22 14:42:37,918] Trial 61 finished with value: 0.9713135379969804 and parameters: {'max_learning_rate': 0.0031249881086333493, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 7, 'dropout': 0.30000000000000004, 'weight_decay': 9.797624711192913e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9713
Selected hidden_dims: [256, 32] (index 7)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0024628396305713613, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00020065710101465958, embedding_dim=64, hidden_dims=[256, 32], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6970, LR: 1.01e-04
Epoch 1, Batch 7/32, Train Loss: 0.6854, LR: 2.64e-

[I 2025-06-22 14:43:21,157] Trial 62 finished with value: 0.9673530889000502 and parameters: {'max_learning_rate': 0.0024628396305713613, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 7, 'dropout': 0.30000000000000004, 'weight_decay': 0.00020065710101465958}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9674
Selected hidden_dims: [256, 64] (index 6)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0041869033102684, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=3.0223907628695964e-06, embedding_dim=64, hidden_dims=[256, 64], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6925, LR: 1.72e-04
Epoch 1, Batch 7/32, Train Loss: 0.6854, LR: 4.48e-04


[I 2025-06-22 14:44:00,656] Trial 63 finished with value: 0.9713135379969804 and parameters: {'max_learning_rate': 0.0041869033102684, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 6, 'dropout': 0.30000000000000004, 'weight_decay': 3.0223907628695964e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9713
Selected hidden_dims: [256, 128] (index 5)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0010720989362068222, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00017046401180989928, embedding_dim=64, hidden_dims=[256, 128], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6880, LR: 4.40e-05
Epoch 1, Batch 7/32, Train Loss: 0.6836, LR: 1.15e-04
Epoch 1, Ba

[I 2025-06-22 14:44:44,526] Trial 64 finished with value: 0.9653092006033183 and parameters: {'max_learning_rate': 0.0010720989362068222, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 5, 'dropout': 0.4, 'weight_decay': 0.00017046401180989928}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9653
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=1.1632998764279112e-05, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=6.769225483674067e-05, embedding_dim=64, hidden_dims=[256], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.7006, LR: 4.68e-07
Epoch 1, Batch 13/63, Train Loss: 0.7088, LR: 1.07e-06
Epoc

[I 2025-06-22 14:45:37,711] Trial 65 finished with value: 0.7901639344262295 and parameters: {'max_learning_rate': 1.1632998764279112e-05, 'batch_size': 32, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.30000000000000004, 'weight_decay': 6.769225483674067e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.7902
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.005845987426525882, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=7.73613136733225e-06, embedding_dim=64, hidden_dims=[64], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6859, LR: 2.40e-04
Epoch 1, Batch 7/32, Train Loss: 0.6345, LR: 6.26e-04
Epoch 1, Batch 14/32, Trai

[I 2025-06-22 14:46:16,440] Trial 66 finished with value: 0.965925925925926 and parameters: {'max_learning_rate': 0.005845987426525882, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 2, 'dropout': 0.4, 'weight_decay': 7.73613136733225e-06}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9659
Selected hidden_dims: [128, 64] (index 9)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.007869747342425501, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00010779531381111566, embedding_dim=128, hidden_dims=[128, 64], dropout=0.30000000000000004, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6972, LR: 3.23e-04
Epoch 1, Batch 7/32, Train Loss: 0.6613, LR: 8.42e-

[I 2025-06-22 14:47:26,740] Trial 67 finished with value: 0.9593830334190231 and parameters: {'max_learning_rate': 0.007869747342425501, 'batch_size': 64, 'embedding_dim': 128, 'hidden_dims_idx': 9, 'dropout': 0.30000000000000004, 'weight_decay': 0.00010779531381111566}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9594
Selected hidden_dims: [256, 32] (index 7)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.004690617671725093, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00040944037963854555, embedding_dim=64, hidden_dims=[256, 32], dropout=0.2, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6987, LR: 1.93e-04
Epoch 1, Batch 7/32, Train Loss: 0.6748, LR: 5.02e-04
Epoch 1, Batch

[I 2025-06-22 14:48:07,192] Trial 68 finished with value: 0.9656887120835406 and parameters: {'max_learning_rate': 0.004690617671725093, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 7, 'dropout': 0.2, 'weight_decay': 0.00040944037963854555}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9657
Selected hidden_dims: [32] (index 1)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0004345252238696999, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0002579497262152991, embedding_dim=64, hidden_dims=[32], dropout=0.1, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6817, LR: 1.78e-05
Epoch 1, Batch 7/32, Train Loss: 0.7057, LR: 4.65e-05
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 14:48:50,870] Trial 69 finished with value: 0.9530864197530864 and parameters: {'max_learning_rate': 0.0004345252238696999, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 1, 'dropout': 0.1, 'weight_decay': 0.0002579497262152991}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9531
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.002381087409215055, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=4.1252075198582384e-05, embedding_dim=64, hidden_dims=[256], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.7020, LR: 9.78e-05
Epoch 1, Batch 7/32, Train Loss: 0.6637, LR: 2.55e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 14:49:34,676] Trial 70 finished with value: 0.9728370221327968 and parameters: {'max_learning_rate': 0.002381087409215055, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.4, 'weight_decay': 4.1252075198582384e-05}. Best is trial 23 with value: 0.9728370221327968.


Trial finished. Validation Recall: 0.9728
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0021826284394301206, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=4.3224939159096505e-05, embedding_dim=64, hidden_dims=[256], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6983, LR: 8.96e-05
Epoch 1, Batch 7/32, Train Loss: 0.6684, LR: 2.34e-04
Epoch 1, Batch 14/32,

[I 2025-06-22 14:50:18,841] Trial 71 finished with value: 0.9733802109492717 and parameters: {'max_learning_rate': 0.0021826284394301206, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.5, 'weight_decay': 4.3224939159096505e-05}. Best is trial 71 with value: 0.9733802109492717.


Trial finished. Validation Recall: 0.9734
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.002224671363017776, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=3.958859596707406e-05, embedding_dim=64, hidden_dims=[256], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6983, LR: 9.14e-05
Epoch 1, Batch 7/32, Train Loss: 0.6679, LR: 2.38e-04
Epoch 1, Batch 14/32, T

[I 2025-06-22 14:51:02,862] Trial 72 finished with value: 0.9718875502008032 and parameters: {'max_learning_rate': 0.002224671363017776, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.5, 'weight_decay': 3.958859596707406e-05}. Best is trial 71 with value: 0.9733802109492717.


Trial finished. Validation Recall: 0.9719
Selected hidden_dims: [256, 64] (index 6)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0021359966394775567, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=2.3536115793976603e-05, embedding_dim=64, hidden_dims=[256, 64], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.7003, LR: 8.77e-05
Epoch 1, Batch 7/32, Train Loss: 0.7045, LR: 2.29e-04
Epoch 1, Batc

[I 2025-06-22 14:51:42,912] Trial 73 finished with value: 0.9569520039584364 and parameters: {'max_learning_rate': 0.0021359966394775567, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 6, 'dropout': 0.5, 'weight_decay': 2.3536115793976603e-05}. Best is trial 71 with value: 0.9733802109492717.


Trial finished. Validation Recall: 0.9570
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0030351319771814867, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=4.979288879054223e-05, embedding_dim=64, hidden_dims=[64], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6865, LR: 1.25e-04
Epoch 1, Batch 7/32, Train Loss: 0.6629, LR: 3.25e-04
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 14:52:26,917] Trial 74 finished with value: 0.9702770780856423 and parameters: {'max_learning_rate': 0.0030351319771814867, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 2, 'dropout': 0.5, 'weight_decay': 4.979288879054223e-05}. Best is trial 71 with value: 0.9733802109492717.


Trial finished. Validation Recall: 0.9703
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0023623514241845928, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=3.939736612736251e-05, embedding_dim=64, hidden_dims=[128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6918, LR: 9.70e-05
Epoch 1, Batch 7/32, Train Loss: 0.6724, LR: 2.53e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 14:53:10,819] Trial 75 finished with value: 0.9712556732223904 and parameters: {'max_learning_rate': 0.0023623514241845928, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.5, 'weight_decay': 3.939736612736251e-05}. Best is trial 71 with value: 0.9733802109492717.


Trial finished. Validation Recall: 0.9713
Selected hidden_dims: [64, 16] (index 13)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=0.0015003779118791976, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.1934910208205549e-05, embedding_dim=64, hidden_dims=[64, 16], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.7010, LR: 6.04e-05
Epoch 1, Batch 13/63, Train Loss: 0.6935, LR: 1.38e-04
Epoch 1, Batc

[I 2025-06-22 14:54:04,776] Trial 76 finished with value: 0.967741935483871 and parameters: {'max_learning_rate': 0.0015003779118791976, 'batch_size': 32, 'embedding_dim': 64, 'hidden_dims_idx': 13, 'dropout': 0.5, 'weight_decay': 1.1934910208205549e-05}. Best is trial 71 with value: 0.9733802109492717.


Trial finished. Validation Recall: 0.9677
Selected hidden_dims: [256, 128] (index 5)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=2.2199402095439714e-05, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=6.704692944992521e-05, embedding_dim=128, hidden_dims=[256, 128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6909, LR: 9.12e-07
Epoch 1, Batch 7/32, Train Loss: 0.6895, LR: 2.38e-06
Epoch 1, B

[I 2025-06-22 14:55:23,069] Trial 77 finished with value: 0.8863636363636364 and parameters: {'max_learning_rate': 2.2199402095439714e-05, 'batch_size': 64, 'embedding_dim': 128, 'hidden_dims_idx': 5, 'dropout': 0.5, 'weight_decay': 6.704692944992521e-05}. Best is trial 71 with value: 0.9733802109492717.


Trial finished. Validation Recall: 0.8864
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0012111910174994387, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=3.592750383629086e-05, embedding_dim=64, hidden_dims=[256], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6983, LR: 4.97e-05
Epoch 1, Batch 7/32, Train Loss: 0.6818, LR: 1.30e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 14:56:07,202] Trial 78 finished with value: 0.970912738214644 and parameters: {'max_learning_rate': 0.0012111910174994387, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.5, 'weight_decay': 3.592750383629086e-05}. Best is trial 71 with value: 0.9733802109492717.


Trial finished. Validation Recall: 0.9709
Selected hidden_dims: [256, 128] (index 5)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0008367180243799377, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=1.8562334653201786e-05, embedding_dim=64, hidden_dims=[256, 128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6830, LR: 3.44e-05
Epoch 1, Batch 7/32, Train Loss: 0.6869, LR: 8.95e-05
Epoch 1, Ba

[I 2025-06-22 14:56:51,007] Trial 79 finished with value: 0.9633350075339026 and parameters: {'max_learning_rate': 0.0008367180243799377, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 5, 'dropout': 0.5, 'weight_decay': 1.8562334653201786e-05}. Best is trial 71 with value: 0.9733802109492717.


Trial finished. Validation Recall: 0.9633
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.006311264200084833, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=8.413267202596088e-05, embedding_dim=64, hidden_dims=[128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6918, LR: 2.59e-04
Epoch 1, Batch 7/32, Train Loss: 0.6285, LR: 6.75e-04
Epoch 1, Batch 14/32, T

[I 2025-06-22 14:57:34,772] Trial 80 finished with value: 0.9734335839598998 and parameters: {'max_learning_rate': 0.006311264200084833, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.5, 'weight_decay': 8.413267202596088e-05}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9734
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0062338028367451195, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=2.8138484113782066e-05, embedding_dim=64, hidden_dims=[128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6918, LR: 2.56e-04
Epoch 1, Batch 7/32, Train Loss: 0.6294, LR: 6.67e-04
Epoch 1, Batch 14/32,

[I 2025-06-22 14:58:18,901] Trial 81 finished with value: 0.9712266532054518 and parameters: {'max_learning_rate': 0.0062338028367451195, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.5, 'weight_decay': 2.8138484113782066e-05}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9712
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.008371098494573922, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=5.282023956047794e-05, embedding_dim=64, hidden_dims=[64], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6865, LR: 3.44e-04
Epoch 1, Batch 7/32, Train Loss: 0.6109, LR: 8.96e-04
Epoch 1, Batch 14/32, Tra

[I 2025-06-22 14:58:57,942] Trial 82 finished with value: 0.9701789264413518 and parameters: {'max_learning_rate': 0.008371098494573922, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 2, 'dropout': 0.5, 'weight_decay': 5.282023956047794e-05}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9702
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0036641223825818665, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=8.568563130131614e-05, embedding_dim=64, hidden_dims=[128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6918, LR: 1.50e-04
Epoch 1, Batch 7/32, Train Loss: 0.6582, LR: 3.92e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 14:59:41,690] Trial 83 finished with value: 0.9693621295831241 and parameters: {'max_learning_rate': 0.0036641223825818665, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.5, 'weight_decay': 8.568563130131614e-05}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9694
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0049924552122237766, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=7.976990608893044e-05, embedding_dim=64, hidden_dims=[256], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6983, LR: 2.05e-04
Epoch 1, Batch 7/32, Train Loss: 0.6301, LR: 5.34e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 15:00:25,459] Trial 84 finished with value: 0.9728915662650602 and parameters: {'max_learning_rate': 0.0049924552122237766, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.5, 'weight_decay': 7.976990608893044e-05}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9729
Selected hidden_dims: [32] (index 1)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.005149598527004532, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00012174638608686812, embedding_dim=64, hidden_dims=[32], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6895, LR: 2.12e-04
Epoch 1, Batch 7/32, Train Loss: 0.6728, LR: 5.51e-04
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 15:01:09,214] Trial 85 finished with value: 0.9722081859525012 and parameters: {'max_learning_rate': 0.005149598527004532, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 1, 'dropout': 0.5, 'weight_decay': 0.00012174638608686812}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9722
Selected hidden_dims: [32] (index 1)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.005169115428450399, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=8.02622298471298e-05, embedding_dim=64, hidden_dims=[32], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6895, LR: 2.12e-04
Epoch 1, Batch 7/32, Train Loss: 0.6726, LR: 5.53e-04
Epoch 1, Batch 14/32, Trai

[I 2025-06-22 15:01:54,297] Trial 86 finished with value: 0.9706477732793523 and parameters: {'max_learning_rate': 0.005169115428450399, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 1, 'dropout': 0.5, 'weight_decay': 8.02622298471298e-05}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9706
Selected hidden_dims: [16] (index 0)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.004239308070358591, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00011963457479574425, embedding_dim=64, hidden_dims=[16], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.7248, LR: 1.74e-04
Epoch 1, Batch 7/32, Train Loss: 0.6596, LR: 4.54e-04
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 15:02:39,161] Trial 87 finished with value: 0.9649298597194389 and parameters: {'max_learning_rate': 0.004239308070358591, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 0, 'dropout': 0.5, 'weight_decay': 0.00011963457479574425}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9649
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0030205050841267886, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0001700339922872021, embedding_dim=64, hidden_dims=[256], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6983, LR: 1.24e-04
Epoch 1, Batch 7/32, Train Loss: 0.6570, LR: 3.23e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 15:03:24,124] Trial 88 finished with value: 0.9708542713567839 and parameters: {'max_learning_rate': 0.0030205050841267886, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.5, 'weight_decay': 0.0001700339922872021}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9709
Selected hidden_dims: [16] (index 0)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0038535216342476474, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=5.9010470975694065e-05, embedding_dim=64, hidden_dims=[16], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.7248, LR: 1.58e-04
Epoch 1, Batch 7/32, Train Loss: 0.6624, LR: 4.12e-04
Epoch 1, Batch 14/32, T

[I 2025-06-22 15:04:09,177] Trial 89 finished with value: 0.9682939104177152 and parameters: {'max_learning_rate': 0.0038535216342476474, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 0, 'dropout': 0.5, 'weight_decay': 5.9010470975694065e-05}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9683
Selected hidden_dims: [128, 32] (index 10)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=32, max_learning_rate=0.001758238179434871, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=4.508309257595244e-05, embedding_dim=64, hidden_dims=[128, 32], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/63, Train Loss: 0.6807, LR: 7.08e-05
Epoch 1, Batch 13/63, Train Loss: 0.6796, LR: 1.62e-04
Epoch 1, Batc

[I 2025-06-22 15:05:05,108] Trial 90 finished with value: 0.9658291457286432 and parameters: {'max_learning_rate': 0.001758238179434871, 'batch_size': 32, 'embedding_dim': 64, 'hidden_dims_idx': 10, 'dropout': 0.4, 'weight_decay': 4.508309257595244e-05}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9658
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.006721419632990078, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=3.452450003802786e-05, embedding_dim=64, hidden_dims=[64], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6865, LR: 2.76e-04
Epoch 1, Batch 7/32, Train Loss: 0.6275, LR: 7.19e-04
Epoch 1, Batch 14/32, Tra

[I 2025-06-22 15:05:45,562] Trial 91 finished with value: 0.9691542288557214 and parameters: {'max_learning_rate': 0.006721419632990078, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 2, 'dropout': 0.5, 'weight_decay': 3.452450003802786e-05}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9692
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.008400803752633726, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00012099894437758282, embedding_dim=64, hidden_dims=[256], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6983, LR: 3.45e-04
Epoch 1, Batch 7/32, Train Loss: 0.5821, LR: 8.99e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 15:06:25,840] Trial 92 finished with value: 0.9698795180722891 and parameters: {'max_learning_rate': 0.008400803752633726, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 4, 'dropout': 0.5, 'weight_decay': 0.00012099894437758282}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9699
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.005467859065651327, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0001396283886226041, embedding_dim=64, hidden_dims=[128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6918, LR: 2.25e-04
Epoch 1, Batch 7/32, Train Loss: 0.6381, LR: 5.85e-04
Epoch 1, Batch 14/32, T

[I 2025-06-22 15:07:10,901] Trial 93 finished with value: 0.9728915662650602 and parameters: {'max_learning_rate': 0.005467859065651327, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.5, 'weight_decay': 0.0001396283886226041}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9729
Selected hidden_dims: [32] (index 1)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.00972732297110542, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00015342733643080027, embedding_dim=64, hidden_dims=[32], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6895, LR: 4.00e-04
Epoch 1, Batch 7/32, Train Loss: 0.6345, LR: 1.04e-03
Epoch 1, Batch 14/32, Tra

[I 2025-06-22 15:07:51,721] Trial 94 finished with value: 0.9703069954705587 and parameters: {'max_learning_rate': 0.00972732297110542, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 1, 'dropout': 0.5, 'weight_decay': 0.00015342733643080027}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9703
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.005663552268980759, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.0002258971929384003, embedding_dim=64, hidden_dims=[128], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6918, LR: 2.33e-04
Epoch 1, Batch 7/32, Train Loss: 0.6359, LR: 6.06e-04
Epoch 1, Batch 14/32, T

[I 2025-06-22 15:08:32,245] Trial 95 finished with value: 0.9675785207700102 and parameters: {'max_learning_rate': 0.005663552268980759, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.5, 'weight_decay': 0.0002258971929384003}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9676
Selected hidden_dims: [256] (index 4)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0048056907860473405, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00010212491689205413, embedding_dim=128, hidden_dims=[256], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6980, LR: 1.97e-04
Epoch 1, Batch 7/32, Train Loss: 0.5720, LR: 5.14e-04
Epoch 1, Batch 14/32

[I 2025-06-22 15:09:47,502] Trial 96 finished with value: 0.9615384615384616 and parameters: {'max_learning_rate': 0.0048056907860473405, 'batch_size': 64, 'embedding_dim': 128, 'hidden_dims_idx': 4, 'dropout': 0.5, 'weight_decay': 0.00010212491689205413}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9615
Selected hidden_dims: [128] (index 3)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.002662825394026344, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=0.00014267692261990333, embedding_dim=64, hidden_dims=[128], dropout=0.4, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6957, LR: 1.09e-04
Epoch 1, Batch 7/32, Train Loss: 0.6721, LR: 2.85e-04
Epoch 1, Batch 14/32, 

[I 2025-06-22 15:10:33,160] Trial 97 finished with value: 0.969758064516129 and parameters: {'max_learning_rate': 0.002662825394026344, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 3, 'dropout': 0.4, 'weight_decay': 0.00014267692261990333}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9698
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.0033999359061936403, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=8.272428599497174e-05, embedding_dim=64, hidden_dims=[64], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6865, LR: 1.40e-04
Epoch 1, Batch 7/32, Train Loss: 0.6595, LR: 3.64e-04
Epoch 1, Batch 14/32, Tr

[I 2025-06-22 15:11:18,487] Trial 98 finished with value: 0.9723756906077348 and parameters: {'max_learning_rate': 0.0033999359061936403, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 2, 'dropout': 0.5, 'weight_decay': 8.272428599497174e-05}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9724
Selected hidden_dims: [64] (index 2)

--- Optuna Trial ---
Suggested Hyperparams: NNHyperparams(batch_size=64, max_learning_rate=0.003279620690106178, epochs=5, early_stopping=True, patience=2, optimizer='adamw', weight_decay=8.308178047031056e-05, embedding_dim=64, hidden_dims=[64], dropout=0.5, seq_pooling='mean', n_classes=2, label_col='is_malware', dataloader_num_workers=2, dataloader_pin_memory=True, dataloader_persistent_workers=True, grad_scaler_max_norm=1.0)
Using device: cuda
Using f1 as the primary scoring metric for validation.
Performing internal train/validation split with ratio: 0.5
Training set size: 1997, Validation set size: 1998
Training set class distribution: {0: 1000, 1: 997}
Validation set class distribution: {0: 1000, 1: 998}
Using class weights: [0.9985     1.00150451]
Starting training...
Epoch 1, Batch 0/32, Train Loss: 0.6865, LR: 1.35e-04
Epoch 1, Batch 7/32, Train Loss: 0.6606, LR: 3.51e-04
Epoch 1, Batch 14/32, Tra

[I 2025-06-22 15:12:04,295] Trial 99 finished with value: 0.9723756906077348 and parameters: {'max_learning_rate': 0.003279620690106178, 'batch_size': 64, 'embedding_dim': 64, 'hidden_dims_idx': 2, 'dropout': 0.5, 'weight_decay': 8.308178047031056e-05}. Best is trial 80 with value: 0.9734335839598998.


Trial finished. Validation Recall: 0.9724

--- Optuna Study Complete ---
Number of finished trials: 100
Best trial:
  Value (F1 Score): 0.9734
  Params: 
    max_learning_rate: 0.006311264200084833
    batch_size: 64
    embedding_dim: 64
    hidden_dims_idx: 3
    dropout: 0.5
    weight_decay: 8.413267202596088e-05

All trials and their hyperparameters:
Trial 0: Value = 0.9674465920651069, Params = {'max_learning_rate': 0.0023856730313390243, 'batch_size': 16, 'embedding_dim': 128, 'hidden_dims_idx': 10, 'dropout': 0.4, 'weight_decay': 1.6634066787303807e-05}
Trial 1: Value = 0.8289085545722714, Params = {'max_learning_rate': 3.0173055455242765e-05, 'batch_size': 32, 'embedding_dim': 64, 'hidden_dims_idx': 14, 'dropout': 0.30000000000000004, 'weight_decay': 0.0003004062167267393}
Trial 2: Value = 0.9685785536159601, Params = {'max_learning_rate': 0.00047686427118995696, 'batch_size': 16, 'embedding_dim': 256, 'hidden_dims_idx': 14, 'dropout': 0.1, 'weight_decay': 0.000709815508396805

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batch_size,params_dropout,params_embedding_dim,params_hidden_dims_idx,params_max_learning_rate,params_weight_decay,state
0,0,0.967447,2025-06-22 10:21:52.275166,2025-06-22 10:23:35.920028,0 days 00:01:43.644862,16,0.4,128,10,0.002386,0.000017,COMPLETE
1,1,0.828909,2025-06-22 10:23:35.920028,2025-06-22 10:24:30.653731,0 days 00:00:54.733703,32,0.3,64,14,0.000030,0.000300,COMPLETE
2,2,0.968579,2025-06-22 10:24:30.653731,2025-06-22 10:40:31.917986,0 days 00:16:01.264255,16,0.1,256,14,0.000477,0.000710,COMPLETE
3,3,0.927717,2025-06-22 10:40:31.918986,2025-06-22 10:41:50.316013,0 days 00:01:18.397027,32,0.4,128,2,0.000086,0.000049,COMPLETE
4,4,0.962038,2025-06-22 10:41:50.317013,2025-06-22 10:48:57.538700,0 days 00:07:07.221687,64,0.2,256,14,0.000549,0.000031,COMPLETE
...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,0.967579,2025-06-22 15:07:51.722563,2025-06-22 15:08:32.244461,0 days 00:00:40.521898,64,0.5,64,3,0.005664,0.000226,COMPLETE
96,96,0.961538,2025-06-22 15:08:32.245964,2025-06-22 15:09:47.502461,0 days 00:01:15.256497,64,0.5,128,4,0.004806,0.000102,COMPLETE
97,97,0.969758,2025-06-22 15:09:47.503461,2025-06-22 15:10:33.159978,0 days 00:00:45.656517,64,0.4,64,3,0.002663,0.000143,COMPLETE
98,98,0.972376,2025-06-22 15:10:33.160982,2025-06-22 15:11:18.486789,0 days 00:00:45.325807,64,0.5,64,2,0.003400,0.000083,COMPLETE



Best NNHyperparams based on Optuna search:
NNHyperparams(batch_size=64,
              max_learning_rate=0.006311264200084833,
              epochs=20,
              early_stopping=True,
              patience=5,
              optimizer='adamw',
              weight_decay=8.413267202596088e-05,
              embedding_dim=64,
              hidden_dims=(128,),
              dropout=0.5,
              seq_pooling='mean',
              n_classes=2,
              label_col='is_malware',
              dataloader_num_workers=0,
              dataloader_pin_memory=True,
              dataloader_persistent_workers=False,
              grad_scaler_max_norm=1.0)


In [7]:
all_trails_df.sort_values(
    by=["value", "number"], ascending=[False, True], inplace=True
)

all_trails_df

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batch_size,params_dropout,params_embedding_dim,params_hidden_dims_idx,params_max_learning_rate,params_weight_decay,state
80,80,0.973434,2025-06-22 14:56:51.007751,2025-06-22 14:57:34.772150,0 days 00:00:43.764399,64,0.5,64,3,0.006311,0.000084,COMPLETE
71,71,0.973380,2025-06-22 14:49:34.676567,2025-06-22 14:50:18.841917,0 days 00:00:44.165350,64,0.5,64,4,0.002183,0.000043,COMPLETE
84,84,0.972892,2025-06-22 14:59:41.690610,2025-06-22 15:00:25.458343,0 days 00:00:43.767733,64,0.5,64,4,0.004992,0.000080,COMPLETE
93,93,0.972892,2025-06-22 15:06:25.841823,2025-06-22 15:07:10.901604,0 days 00:00:45.059781,64,0.5,64,3,0.005468,0.000140,COMPLETE
23,23,0.972837,2025-06-22 11:23:19.814748,2025-06-22 11:24:05.650193,0 days 00:00:45.835445,64,0.5,64,3,0.005856,0.000161,COMPLETE
...,...,...,...,...,...,...,...,...,...,...,...,...
1,1,0.828909,2025-06-22 10:23:35.920028,2025-06-22 10:24:30.653731,0 days 00:00:54.733703,32,0.3,64,14,0.000030,0.000300,COMPLETE
5,5,0.801853,2025-06-22 10:48:57.538700,2025-06-22 10:49:52.828404,0 days 00:00:55.289704,32,0.2,64,0,0.000037,0.000010,COMPLETE
65,65,0.790164,2025-06-22 14:44:44.527855,2025-06-22 14:45:37.711430,0 days 00:00:53.183575,32,0.3,64,4,0.000012,0.000068,COMPLETE
8,8,0.770241,2025-06-22 11:06:11.765708,2025-06-22 11:08:17.447119,0 days 00:02:05.681411,16,0.2,128,14,0.000036,0.000015,COMPLETE
