In [1]:
%load_ext autoreload
%autoreload 2

import os
import importlib
import pandas as pd 
import json
from qsprpred.data import QSPRDataset, RandomSplit
from qsprpred.models import QSPRModel
from qsprpred.data.descriptors.fingerprints import MorganFP
from qsprpred.data.descriptors.sets import SmilesDesc
from qsprpred.models import OptunaOptimization, TestSetAssessor, CrossValAssessor, SklearnModel
from qsprpred.data.sampling.splits import DataSplit
from qsprpred.data.processing.data_filters import RepeatsFilter, CategoryFilter
from qsprpred.models import EarlyStoppingMode
from sklearn.ensemble import GradientBoostingClassifier
from qsprpred.extra.gpu.models.chemprop import ChempropModel




import numpy as np
from typing import Iterable, List, Tuple

from typing import Literal
import sys
#sys.path.insert(0, '/home/ubuntu/implementation/QSPRpred')

#import qsprpred.extra.gpu.models.gdnn as gdnn_module
#from importlib import reload
#
#reload(gdnn_module)

modname = 'qsprpred.extra.gpu.models.gdnn'
if modname in sys.modules:
    del sys.modules[modname]

import qsprpred.extra.gpu.models.gdnn as gdnn_module
from qsprpred.extra.gpu.models.gdnn import GGNN
importlib.reload(gdnn_module)

#from qsprpred.extra.gpu.models.gdnn import DNNModel, GGNN
#print(DNNModel.__init__.__code__.co_varnames)

Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality


<module 'qsprpred.extra.gpu.models.gdnn' from '/home/brokesm/anaconda3/lib/python3.12/site-packages/qsprpred/extra/gpu/models/gdnn.py'>

In [60]:
#create a folder structure

os.makedirs("./output/models", exist_ok=True)
os.makedirs("./output/benchmarking/data", exist_ok=True)
os.makedirs("./output/optimization/data", exist_ok=True)

In [61]:
# define a customsplit class
# inherits from datasplit
# input - QSPRDataset ids
# output - (train,test) splits

class CustomSplit(DataSplit):

    def __init__(self, test_ids: list[list[str]]):
        super().__init__()
        self.test_ids = test_ids

    def split(
        self,
        X: np.ndarray | pd.DataFrame, 
        y: np.ndarray | pd.DataFrame | pd.Series
    ) -> Iterable[tuple[list[int], list[int]]]:
        """Uses only the specified IDs from the data set as test set
        Returns an iterator of training and test split indices, 
        just like a scikit learn splitter would.
        """
        splits = []
        for test_ids in self.test_ids:
            test = np.where(X.index.isin(test_ids))[0]
            train = np.where(~X.index.isin(test_ids))[0]
            splits.append((train, test))
        return splits

In [62]:
def select_ids(dataset_name, keep_ids):
    return [f"{dataset_name}_{"0" * (4 - len(str(id)))}{id}" for id in keep_ids]

In [63]:
def data_loading(
        target:Literal["P00918","P03372","P04637","P08684","P14416","P22303","P42336","Q12809","Q16637","Q9Y468"], 
        purpose:Literal["ForOptimization","ForBenchmarking"],
        model:QSPRModel | None = None,
        save = True
        ) -> Tuple[QSPRDataset, List, List, List]:

    dataset_name = f"{purpose}_{target}"
    store_dir = f"./output/{purpose[3:].lower()}/data"

    dataset = QSPRDataset.fromTableFile(
        filename=f"./papyrus_datasets/{target}.csv",
        sep=",",
        store_dir=store_dir,
        name=dataset_name,
        target_props=[{"name": "Y", "task": "SINGLECLASS", "th":"precomputed"}],
        random_state=42
    )

    if model is not None:
        if model.supportsEarlyStopping:
            # In case of GNNs (both support early stopping) add SmilesDesc as descriptors
            dataset.addDescriptors([SmilesDesc()])
        else:
            # In case of XGB (doesn't support early stopping) add MorganFP with default parameters as descriptors
            dataset.addDescriptors([MorganFP()])

    if save:
        dataset.save()
    

    return dataset


In [None]:
def hyperparameter_optimization(
        model:QSPRModel, 
        dataset:QSPRDataset, 
        search_space:dict, 
        scoring:str, 
        val_ids:List
        ):

    gridsearcher = OptunaOptimization(
        n_trials=1000,
        param_grid=search_space,
        model_assessor=CrossValAssessor(scoring=scoring, split=CustomSplit([val_ids])),
    )

    gridsearcher.optimize(model, dataset)


In [65]:
model_ggnn = gdnn_module.DNNModel(
    base_dir='./output/models/GGNN',
    name='GGNNModel',
    parameters={'n_epochs': 100,
                'n_dim': 74,          # 74-256 for example
                'in_feats': 74,
                'n_steps': 3,
                'n_etypes': 1,
                'n_hidden_layers': 2,
                'dropout_rate': 0.2,
                "lr":1e-4,
                "hidden_dim":100
               },
    tol=0.01,
    random_state=42,
    patience=5
)

search_space_ggnn = {
    "n_hidden_layers": ["int", 1, 5],
    "dropout_rate": ["float", 0.05, 0.5],
    "n_steps": ["int", 2, 5],
    "batch_size": ["int", 32,64,128,256],
    'n_dim': ["int",74,74],
    'in_feats': ["int",74,74],
    'n_steps': ["int",3,3],
    'n_etypes': ["int",1,1]
    }



GGNN updated


In [66]:
model_chemprop = ChempropModel(
    base_dir='./output/models/Chemprop',
    name='ChempropModel',
    parameters={
        "epochs": 5,
        "loss_function":'binary_cross_entropy'
        },
    quiet_logger=False
)

search_space_chemprop = {
    "epochs": ["int", 1, 2],
    }

In [67]:
model_xgb = SklearnModel(
            name="XGBModel",
            alg=GradientBoostingClassifier,
            base_dir="./output/models/XGB",
            parameters={
                "max_depth":2,
                "n_estimators":10
            }
        )

search_space_xgb = {
    "max_depth": ["int", 2, 10],
    "n_estimators": ["int", 5,500]
    }

In [68]:
def set_loader(
        target:Literal["P00918","P03372","P04637","P08684","P14416","P22303","P42336","Q12809","Q16637","Q9Y468"], 
        split_type:Literal["random", "cluster", "aggregate_cluster"],
        seed,
        purpose:Literal["ForBenchmarking","ForOptimization"]
        ):
    seed = str(seed)

    with open(f"./papyrus_datasets/{split_type}_split.json") as file:
        json_file = file.read()

    split = json.loads(json_file)
    
    train_ids = split[split_type][target][seed]["train"]
    val_ids = split[split_type][target][seed]["valid"]
    test_ids = split[split_type][target][seed]["test"]

    train_ids = select_ids(f"{purpose}_{target}",list(train_ids))
    val_ids = select_ids(f"{purpose}_{target}",list(val_ids))
    test_ids = select_ids(f"{purpose}_{target}",list(test_ids))

    return train_ids, val_ids, test_ids

In [69]:
def optimize(
        target:Literal["P00918","P03372","P04637","P08684","P14416","P22303","P42336","Q12809","Q16637","Q9Y468"],
        split:Literal["random","cluster","aggregate_cluster"],
        model:QSPRModel,
        search_space:dict,
        seed = 0
        ):

        dataset = data_loading(target,model=model, purpose="ForOptimization")
        train_ids, val_ids, test_ids = set_loader(target,split,seed,purpose="ForOptimization")
        selected_ids = train_ids + val_ids
        dataset.prepareDataset(data_filters=[CategoryFilter(name="QSPRID", values=selected_ids, keep=True)])
        
        hyperparameter_optimization(model=model, dataset=dataset, search_space=search_space, scoring="matthews_corrcoef", val_ids=val_ids)
        


In [None]:
targets = ["P00918","P03372","P04637","P08684","P14416","P22303","P42336","Q12809","Q16637","Q9Y468"]
splits = ["random","cluster", "aggregate_cluster"]
models = [model_xgb, model_ggnn, model_chemprop]
search_spaces = [search_space_xgb, search_space_ggnn, search_space_chemprop]

for target in targets:
    for split in splits:
        for model, search_space in zip(models, search_spaces):
            model.name += f"_{target}_{split}"
            optimize(target = target, split = split, model = model, search_space = search_space)
            model.name = model.name.split("_")[0]

[I 2025-11-29 10:43:15,341] A new study created in memory with name: no-name-a4240b2c-26a9-4923-a41b-e3d4031d55ad
[I 2025-11-29 10:44:54,735] Trial 0 finished with value: 0.5677757655483825 and parameters: {'max_depth': 10, 'n_estimators': 455}. Best is trial 0 with value: 0.5677757655483825.
[I 2025-11-29 10:45:22,972] Trial 1 finished with value: 0.6303318950582116 and parameters: {'max_depth': 3, 'n_estimators': 475}. Best is trial 1 with value: 0.6303318950582116.
[I 2025-11-29 10:45:23,028] A new study created in memory with name: no-name-c7679c35-4ec5-46df-89de-a995bb04660b
[I 2025-11-29 10:45:50,725] Trial 0 finished with value: 0.6199091257360924 and parameters: {'max_depth': 2, 'n_estimators': 698}. Best is trial 0 with value: 0.6199091257360924.
[I 2025-11-29 10:46:42,322] Trial 1 finished with value: 0.5723182533800396 and parameters: {'max_depth': 5, 'n_estimators': 523}. Best is trial 0 with value: 0.6199091257360924.
[I 2025-11-29 10:46:42,360] A new study created in memo

In [17]:
def get_model_params(
        target:Literal["P00918","P03372","P04637","P08684","P14416","P22303","P42336","Q12809","Q16637","Q9Y468"], 
        split_type:Literal["random","cluster","aggregate_cluster"],
        model:Literal["XGB","GGNN","Chemprop"]
    ):

    with open(f"./output/models/{model}/{model}Model_{target}_{split_type}/{model}Model_{target}_{split_type}_meta.json") as f:
        params = f.read()

    params = json.loads(params)
    return params["py/state"]["parameters"]

In [None]:
def prepare_for_benchmarking(dataset:QSPRDataset,descriptors,keep_ids, split=None, chemprop=False):

    dataset.addDescriptors([descriptors])

    if chemprop:
        # binary cross entropy loss cannot deal with target variable being of type int
        dataset.transformProperties(["Y","Y_original"],transformer=np.float32)
    
    dataset.prepareDataset(
        data_filters=[CategoryFilter(name="QSPRID", values=keep_ids, keep=True)],
        split=split
    )

In [None]:
def benchmark(
    target:Literal["P00918","P03372","P04637","P08684","P14416","P22303","P42336","Q12809","Q16637","Q9Y468"],
    split_type:Literal["random","cluster","aggregate_cluster"]
):
    os.makedirs(f"./output/benchmarking/{target}/{split_type}", exist_ok=True)

    # save the dataset corresponding to a given target
    data_loading(target,purpose="ForBenchmarking")
    
    results = {
        "model":[],
        "metric":[],
        "score":[]
    }
    for metric in ["matthews_corrcoef","f1","recall","precision","roc_auc"]:
        for seed in range(1,21):
            # get the ids for training, validation and test sets for a given combination of target + split + seed
            train_ids, val_ids, test_ids = set_loader(target,split_type,seed=seed, purpose="ForBenchmarking")
            dataset_path = f"./output/benchmarking/data/ForBenchmarking_{target}/ForBenchmarking_{target}_meta.json"
            
            dataset_xgb_val = QSPRDataset.fromFile(dataset_path)
            dataset_ggnn_val = QSPRDataset.fromFile(dataset_path)
            dataset_chemprop_val = QSPRDataset.fromFile(dataset_path)
            dataset_xgb_test = QSPRDataset.fromFile(dataset_path)
            dataset_ggnn_test = QSPRDataset.fromFile(dataset_path)
            dataset_chemprop_test = QSPRDataset.fromFile(dataset_path)

            prepare_for_benchmarking(dataset_xgb_val,MorganFP(),keep_ids = train_ids + val_ids)
            prepare_for_benchmarking(dataset_xgb_test,MorganFP(),keep_ids = train_ids + test_ids,split = CustomSplit([test_ids]))

            prepare_for_benchmarking(dataset_ggnn_val,SmilesDesc(),keep_ids = train_ids + val_ids)
            prepare_for_benchmarking(dataset_ggnn_test,SmilesDesc(),keep_ids = train_ids + test_ids,split = CustomSplit([test_ids]))

            prepare_for_benchmarking(dataset_chemprop_val,SmilesDesc(),keep_ids = train_ids + val_ids, chemprop=True)
            prepare_for_benchmarking(dataset_chemprop_test,SmilesDesc(),keep_ids = train_ids + test_ids,split = CustomSplit([test_ids]), chemprop=True)

            model_xgb.parameters = get_model_params(target,split_type,"XGB")
            model_ggnn.parameters = get_model_params(target,split_type,"GGNN")
            model_chemprop.parameters["epochs"] = get_model_params(target,split_type,"Chemprop")["epochs"]

            proba = True
            if metric == "matthews_corrcoef":
                proba = False

            xgb_score = TestSetAssessor(scoring=metric, use_proba=proba)(model_xgb, dataset_xgb_test)
            results["model"].append("XGB")
            results["metric"].append(metric)
            results["score"].append(xgb_score.item())
            
            CrossValAssessor(
                scoring=metric,
                use_proba=proba,
                mode=EarlyStoppingMode.RECORDING,
                split=CustomSplit([val_ids]))(model_ggnn, dataset_ggnn_val)
            print(f"Best epoch found for GGNN: {model_ggnn.earlyStopping.optimalEpochs}")
            ggnn_score = TestSetAssessor(scoring=metric, use_proba=proba, mode=EarlyStoppingMode.OPTIMAL)(model_ggnn, dataset_ggnn_test)
            results["model"].append("GGNN")
            results["metric"].append(metric)
            results["score"].append(ggnn_score.item())

            CrossValAssessor(
                scoring=metric,
                use_proba=proba,
                mode=EarlyStoppingMode.RECORDING,
                split=CustomSplit([val_ids]))(model_chemprop, dataset_chemprop_val)
            print(f"Best epoch found for Chemprop: {model_chemprop.earlyStopping.optimalEpochs}")
            chemprop_score = TestSetAssessor(scoring=metric, use_proba=proba ,mode=EarlyStoppingMode.OPTIMAL)(model_chemprop, dataset_chemprop_test)
            results["model"].append("Chemprop")
            results["metric"].append(metric)
            results["score"].append(chemprop_score.item())
            
    pd.DataFrame(results).to_csv(f"./output/benchmarking/{target}/{split_type}/results.csv")

In [None]:
# import os

# from qsprpred.data import QSPRDataset, RandomSplit
# from qsprpred.data.descriptors.fingerprints import MorganFP
# from qsprpred.data.descriptors.sets import SmilesDesc

# # Create dataset
# dataset = QSPRDataset.fromTableFile(
#     filename="./papyrus_datasets/P00918.csv",
#     sep=",",
#     store_dir="./tutorial_output/data",
#     name="ChempropTutorialDataset",
#     target_props=[{"name": "Y", "task": "SINGLECLASS", "th":"precomputed"}],
#     random_state=42
# )

# # calculate compound features and split dataset into train and test
# feature_calculators = [SmilesDesc()]
# dataset.prepareDataset(
#     split=RandomSplit(test_fraction=0.2, dataset=dataset),
#     feature_calculators=feature_calculators)

# dataset.getDF().head()

Unnamed: 0_level_0,SMILES,Y,QSPRID,Y_original
QSPRID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ChempropTutorialDataset_0000,CC(=O)OCC1OC(NS(=O)(=O)NO)C=CC1OC(C)=O,1,ChempropTutorialDataset_0000,1
ChempropTutorialDataset_0001,CC(=O)OCC1OC(CC(=O)C=Cc2cccc(O)c2)C(OC(C)=O)C(...,0,ChempropTutorialDataset_0001,0
ChempropTutorialDataset_0002,COc1ccc(CNc2ccc(S(N)(=O)=O)cc2)cc1,1,ChempropTutorialDataset_0002,1
ChempropTutorialDataset_0003,CN(C)C=Nc1ncnc2c1ncn2CC(=O)Nc1ccc(S(N)(=O)=O)cc1,1,ChempropTutorialDataset_0003,1
ChempropTutorialDataset_0004,Cc1sc(-c2noc(N)c2S(N)(=O)=O)cc1S(N)(=O)=O,1,ChempropTutorialDataset_0004,1


In [None]:
for target in targets:
    for split in splits:
        benchmark(target,split)



GGNN updated
Fitting...
Epoch 1/10 Train loss: 0.6277 Val loss: 0.5874 Val accuracy: 73.06% MCC: 0
Epoch 5/10 Train loss: 0.5382 Val loss: 0.4760 Val accuracy: 81.67% MCC: 0.4912225591995657
Early stopping triggered at epoch 10
Best epoch found for GGNN: 10
GGNN updated
Fitting...


Class sizes
Y 0: 29.12%, 1: 70.88%
train size = 3,238 | val size = 360
Total size = 3,598
Number of parameters = 355,201
  self.exponential_gamma = (self.final_lr / self.max_lr) ** (1 / (self.total_steps - self.warmup_steps))
  0%|          | 0/2 [00:00<?, ?it/s]Epoch 0
Loss = 6.1273e-01, PNorm = 33.9643, GNorm = 1.2816, lr_0 = 1.7734e-04
Loss = 5.8429e-01, PNorm = 33.9644, GNorm = 0.8302, lr_0 = 2.4766e-04
Loss = 6.2623e-01, PNorm = 33.9663, GNorm = 0.8440, lr_0 = 3.1797e-04
Loss = 5.8786e-01, PNorm = 33.9733, GNorm = 0.4684, lr_0 = 3.8828e-04
Loss = 6.0948e-01, PNorm = 33.9809, GNorm = 1.1587, lr_0 = 4.5859e-04
Loss = 6.2300e-01, PNorm = 33.9960, GNorm = 0.7858, lr_0 = 5.2891e-04
Validation auc = 0.673553
Model best validation auc = 0.673553 on epoch                     0
 50%|█████     | 1/2 [00:09<00:09,  9.88s/it]Epoch 1
Loss = 5.8928e-01, PNorm = 34.0197, GNorm = 1.5068, lr_0 = 6.0625e-04
Loss = 5.4187e-01, PNorm = 34.0457, GNorm = 0.1440, lr_0 = 6.7656e-04
Loss = 5.8971e-01, PNo

Best epoch found for Chemprop: 2


Number of parameters = 355,201
  self.exponential_gamma = (self.final_lr / self.max_lr) ** (1 / (self.total_steps - self.warmup_steps))
  0%|          | 0/2 [00:00<?, ?it/s]Epoch 0
Loss = 5.9732e-01, PNorm = 33.9645, GNorm = 0.4683, lr_0 = 1.6972e-04
Loss = 5.9573e-01, PNorm = 33.9650, GNorm = 0.9197, lr_0 = 2.3310e-04
Loss = 5.9431e-01, PNorm = 33.9690, GNorm = 1.1844, lr_0 = 2.9648e-04
Loss = 5.8436e-01, PNorm = 33.9776, GNorm = 0.7600, lr_0 = 3.5986e-04
Loss = 6.3003e-01, PNorm = 33.9869, GNorm = 0.4278, lr_0 = 4.2324e-04
Loss = 5.8825e-01, PNorm = 33.9991, GNorm = 1.0894, lr_0 = 4.8662e-04
Loss = 5.6111e-01, PNorm = 34.0211, GNorm = 1.1246, lr_0 = 5.5000e-04
 50%|█████     | 1/2 [00:13<00:13, 13.22s/it]Epoch 1
Loss = 5.3913e-01, PNorm = 34.0530, GNorm = 0.7539, lr_0 = 6.1972e-04
Loss = 5.7939e-01, PNorm = 34.0866, GNorm = 0.8602, lr_0 = 6.8310e-04
Loss = 5.4159e-01, PNorm = 34.1256, GNorm = 0.2820, lr_0 = 7.4648e-04
Loss = 5.3249e-01, PNorm = 34.1803, GNorm = 0.5851, lr_0 = 8.0986e

GGNN updated
Fitting...
Epoch 1/10 Train loss: 0.6156 Val loss: 0.6076 Val accuracy: 70.22% MCC: 0
Epoch 5/10 Train loss: 0.5767 Val loss: 0.5835 Val accuracy: 74.32% MCC: 0.30544384977136074
Epoch 10/10 Train loss: 0.5041 Val loss: 0.5028 Val accuracy: 78.69% MCC: 0.44532157846340825
Best epoch found for GGNN: 10
GGNN updated
Fitting...


Class sizes
Y 0: 29.04%, 1: 70.96%
train size = 3,292 | val size = 366
Total size = 3,658
Number of parameters = 355,201
  self.exponential_gamma = (self.final_lr / self.max_lr) ** (1 / (self.total_steps - self.warmup_steps))
  0%|          | 0/2 [00:00<?, ?it/s]Epoch 0
Loss = 6.2813e-01, PNorm = 33.9641, GNorm = 0.3818, lr_0 = 1.7615e-04
Loss = 6.0641e-01, PNorm = 33.9644, GNorm = 1.7211, lr_0 = 2.4538e-04
Loss = 6.0463e-01, PNorm = 33.9680, GNorm = 0.1988, lr_0 = 3.1462e-04
Loss = 5.9299e-01, PNorm = 33.9745, GNorm = 0.3945, lr_0 = 3.8385e-04
Loss = 6.0138e-01, PNorm = 33.9809, GNorm = 1.0192, lr_0 = 4.5308e-04
Loss = 6.0321e-01, PNorm = 33.9946, GNorm = 0.1473, lr_0 = 5.2231e-04
Validation auc = 0.650484
Model best validation auc = 0.650484 on epoch                     0
 50%|█████     | 1/2 [00:09<00:09,  9.95s/it]Epoch 1
Loss = 5.7469e-01, PNorm = 34.0198, GNorm = 0.1788, lr_0 = 5.9846e-04
Loss = 5.7242e-01, PNorm = 34.0517, GNorm = 2.3409, lr_0 = 6.6769e-04
Loss = 5.6928e-01, PNo

Best epoch found for Chemprop: 2


  0%|          | 0/2 [00:00<?, ?it/s]Epoch 0
Loss = 6.4253e-01, PNorm = 33.9635, GNorm = 0.2139, lr_0 = 1.6781e-04
Loss = 5.7699e-01, PNorm = 33.9653, GNorm = 0.4149, lr_0 = 2.2945e-04
Loss = 5.8682e-01, PNorm = 33.9685, GNorm = 0.3321, lr_0 = 2.9110e-04
Loss = 5.9421e-01, PNorm = 33.9767, GNorm = 1.2703, lr_0 = 3.5274e-04
Loss = 6.4664e-01, PNorm = 33.9802, GNorm = 1.0554, lr_0 = 4.1438e-04
Loss = 6.2460e-01, PNorm = 33.9884, GNorm = 0.5944, lr_0 = 4.7603e-04
Loss = 5.9493e-01, PNorm = 34.0010, GNorm = 0.2046, lr_0 = 5.3767e-04
 50%|█████     | 1/2 [00:12<00:12, 12.55s/it]Epoch 1
Loss = 6.0078e-01, PNorm = 34.0224, GNorm = 0.6440, lr_0 = 6.0548e-04
Loss = 5.9928e-01, PNorm = 34.0559, GNorm = 0.7725, lr_0 = 6.6712e-04
Loss = 5.7354e-01, PNorm = 34.0878, GNorm = 0.1453, lr_0 = 7.2877e-04
Loss = 5.5060e-01, PNorm = 34.1318, GNorm = 0.2393, lr_0 = 7.9041e-04
Loss = 5.5535e-01, PNorm = 34.1825, GNorm = 0.6593, lr_0 = 8.5205e-04
Loss = 5.6046e-01, PNorm = 34.2480, GNorm = 0.3197, lr_0 = 9.1

GGNN updated
Fitting...
Epoch 1/10 Train loss: 0.6252 Val loss: 0.5911 Val accuracy: 71.66% MCC: 0
Epoch 5/10 Train loss: 0.5354 Val loss: 0.5314 Val accuracy: 77.27% MCC: 0.3807748612260348
Epoch 10/10 Train loss: 0.4960 Val loss: 0.4986 Val accuracy: 79.14% MCC: 0.4419066127174418
Best epoch found for GGNN: 10
GGNN updated
Fitting...


Class sizes
Y 0: 29.41%, 1: 70.59%
train size = 3,363 | val size = 374
Total size = 3,737
Number of parameters = 355,201
  0%|          | 0/1 [00:00<?, ?it/s]Epoch 0
Loss = 6.1593e-01, PNorm = 33.9641, GNorm = 0.2512, lr_0 = 1.7388e-04
Loss = 6.2355e-01, PNorm = 33.9638, GNorm = 0.5484, lr_0 = 2.4104e-04
Loss = 6.4036e-01, PNorm = 33.9671, GNorm = 0.6698, lr_0 = 3.0821e-04
Loss = 5.7167e-01, PNorm = 33.9738, GNorm = 0.9594, lr_0 = 3.7537e-04
Loss = 6.0334e-01, PNorm = 33.9813, GNorm = 0.7544, lr_0 = 4.4254e-04
Loss = 5.9697e-01, PNorm = 33.9928, GNorm = 0.4433, lr_0 = 5.0970e-04
Validation auc = 0.643974
Model best validation auc = 0.643974 on epoch                     0
100%|██████████| 1/1 [00:10<00:00, 10.63s/it]
Class sizes
Y 0: 29.30%, 1: 70.70%
Total size = 3,737
Number of parameters = 355,201


Best epoch found for Chemprop: 2


  0%|          | 0/2 [00:00<?, ?it/s]Epoch 0
Loss = 6.2852e-01, PNorm = 33.9636, GNorm = 0.4016, lr_0 = 1.6689e-04
Loss = 6.1004e-01, PNorm = 33.9652, GNorm = 0.2256, lr_0 = 2.2770e-04
Loss = 6.0313e-01, PNorm = 33.9699, GNorm = 0.2083, lr_0 = 2.8851e-04
Loss = 5.6892e-01, PNorm = 33.9764, GNorm = 1.0889, lr_0 = 3.4932e-04
Loss = 6.0033e-01, PNorm = 33.9856, GNorm = 1.3300, lr_0 = 4.1014e-04
Loss = 5.9776e-01, PNorm = 34.0019, GNorm = 2.5208, lr_0 = 4.7095e-04
Loss = 5.8490e-01, PNorm = 34.0204, GNorm = 1.3447, lr_0 = 5.3176e-04
 50%|█████     | 1/2 [00:10<00:10, 10.89s/it]Epoch 1
Loss = 5.2659e-01, PNorm = 34.0513, GNorm = 1.8178, lr_0 = 5.9865e-04
Loss = 5.6827e-01, PNorm = 34.0809, GNorm = 0.4465, lr_0 = 6.5946e-04
Loss = 5.8258e-01, PNorm = 34.1254, GNorm = 1.4900, lr_0 = 7.2027e-04
Loss = 5.9850e-01, PNorm = 34.1672, GNorm = 1.0959, lr_0 = 7.8108e-04
Loss = 5.7372e-01, PNorm = 34.2172, GNorm = 0.4803, lr_0 = 8.4189e-04
Loss = 5.3135e-01, PNorm = 34.2781, GNorm = 0.8050, lr_0 = 9.0

In [None]:
# 'C:\\Users\\marti\\AppData\\Roaming\\Python\\Python312\\site-packages'