In [1]:

import os
import torch
import logging
import torch.nn as nn
import numpy as np
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from enum import Enum
from functools import cached_property
from mubench.base.model import DNN
from mubench.base.dataset import Dataset
from mubench.base.args import Config
from mubench.utils.scaler import StandardScaler
import random
np.random.seed(0)
random.seed(0)

logger = logging.getLogger(__name__)



In [2]:
config = Config()
config.dataset_name = 'ESOL'
config.data_dir = "../data/files"
config.feature_type = "rdkit"
config.data_dir = os.path.join(config.data_dir, config.dataset_name, f"split-{config.dataset_splitting_random_seed}")


In [3]:
dataset = Dataset().prepare(config, partition='train')

In [4]:
scalar = StandardScaler()

In [5]:
scalar.fit(dataset.lbs)

<mubench.utils.scaler.StandardScaler at 0x135b0e160>

In [8]:
dataset.update_lbs(scalar.transform(dataset.lbs))

<mubench.base.dataset.dataset.Dataset at 0x135a86550>

In [13]:
lbs = list()
for inst in dataset.data_instances:
    lb = inst.lbs
    lbs.append(lb)

In [18]:
lb_insts = np.stack(lbs).astype(float)

In [19]:
lb_insts.mean()

-3.9387069609761654e-17

In [20]:
lb_insts.std()

0.9999999999999998

In [22]:
~dataset.masks.astype(bool)

array([[False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [

In [23]:
qwe = np.random.random([5,6])

In [25]:
scalar.inverse_transform(qwe)

array([[-1.8978385661671329, -1.548349074405276, -1.7845113495911256,
        -1.906094606702397, -2.1607472081620878, -1.6939108314019224],
       [-2.131480753937886, -1.1774171150673653, -1.0264053323615112,
        -2.2452193110384755, -1.3875780790543544, -1.939679586597793],
       [-1.8574417664365295, -1.106367109920066, -2.901458888426099,
        -2.867653390974394, -3.008206575077746, -1.3016743592076492],
       [-1.4160796519654553, -1.2231280083489116, -0.9949896054549008,
        -1.3719631981911637, -2.081292850572806, -1.4110961302397202],
       [-2.8022298724715644, -1.7064579206580048, -2.7495491623925252,
        -1.0663038404557403, -1.9544816854026774, -2.179637627703497]],
      dtype=object)