In [3]:
from utils.trainer import ClassifierTrainer

In [1]:
from utils.nn import create_mlp_layers
import torch


clf = torch.nn.Sequential(*create_mlp_layers(784, [300, 100], 10))

In [5]:
from sklearn.datasets import fetch_openml


mnist = fetch_openml('mnist_784', version=1, parser='auto')
X, y = mnist['data'], mnist['target']

In [2]:
class NamedDataset(torch.utils.data.Dataset):
    def __init__(self, names: list[str], data: list[torch.Tensor]):
        self.names = names
        self.data = data
        
    def __len__(self) -> int:
        return len(self.data[0])
    
    def __getitem__(self, idx) -> dict[str, torch.Tensor]:
        return {name: data[idx] for name, data in zip(self.names, self.data)}


def create_generator(dataset: torch.utils.data.Dataset, batch_size: int = 128, shuffle: bool = True, drop_last: bool = True, **kwargs):
    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, **kwargs)
    while True:
        yield from loader

In [7]:
train_generator = create_generator(NamedDataset(['x', 'y'], [torch.tensor(X.values) / 255.0, y.values.astype(int)]))

In [6]:
trainer = ClassifierTrainer(clf)

trainer.train(train_generator)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mantonii-belyshev[0m. Use [1m`wandb login --relogin`[0m to force relogin


100%|██████████| 5000/5000 [01:22<00:00, 60.35it/s]


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
loss/train,█▆▄▅▃▂▃▂▂▁▂▂▂▁▁▂▂▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
loss/train,0.00999


In [4]:
from utils.bmds import BMDS, BMDSTrainer
from utils.bmds_data import DefaultBMDSDataset
import torch


def create_generator(dataset: torch.utils.data.Dataset, batch_size: int = 128, shuffle: bool = True, drop_last: bool = True, **kwargs):
    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, **kwargs)
    while True:
        yield from loader


x = torch.randn(100, 2)

dist = (x[:, None] - x).pow(2).sum(2).pow(0.5)

bmds = BMDS(100, 100, hidden_dim=100)
bmds_trainer = BMDSTrainer(bmds, lr=1e-4)
bmds_dataset = DefaultBMDSDataset(dist)

bmds_trainer.train(create_generator(bmds_dataset, batch_size=1000), project_name='bmds', experiment_name='trial', total_iters=10000)

  return torch.tensor(t, dtype=dtype, **kwargs)


100%|██████████| 10000/10000 [06:24<00:00, 26.02it/s]


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
log_prob/train,▁▁▁▂▂▁▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇████
loglam #0/train,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loglam #1/train,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loglam #2/train,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loglam #3/train,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loglam #4/train,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss/train,███▇▇▇▇▆▆▆▆▆▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁
reg/train,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
scale #0/train,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
scale #1/train,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
log_prob/train,-0.24634
loglam #0/train,0.01
loglam #1/train,0.01
loglam #2/train,0.01
loglam #3/train,0.01
loglam #4/train,0.01
loss/train,0.24647
reg/train,0.00013
scale #0/train,0.00645
scale #1/train,0.00642


In [2]:
sorted(bmds.log_lambda)

[tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindBackward0>),
 tensor(0.0100, grad_fn=<UnbindB

In [4]:
mu = bmds(dist)

In [6]:
sorted(mu.pow(2).mean(0))

[tensor(7.2029e-06, grad_fn=<UnbindBackward0>),
 tensor(9.6591e-06, grad_fn=<UnbindBackward0>),
 tensor(1.1087e-05, grad_fn=<UnbindBackward0>),
 tensor(1.2270e-05, grad_fn=<UnbindBackward0>),
 tensor(1.3462e-05, grad_fn=<UnbindBackward0>),
 tensor(1.6875e-05, grad_fn=<UnbindBackward0>),
 tensor(1.8600e-05, grad_fn=<UnbindBackward0>),
 tensor(1.9149e-05, grad_fn=<UnbindBackward0>),
 tensor(2.3443e-05, grad_fn=<UnbindBackward0>),
 tensor(2.4978e-05, grad_fn=<UnbindBackward0>),
 tensor(2.5925e-05, grad_fn=<UnbindBackward0>),
 tensor(2.7593e-05, grad_fn=<UnbindBackward0>),
 tensor(3.0630e-05, grad_fn=<UnbindBackward0>),
 tensor(3.1121e-05, grad_fn=<UnbindBackward0>),
 tensor(3.1462e-05, grad_fn=<UnbindBackward0>),
 tensor(3.3057e-05, grad_fn=<UnbindBackward0>),
 tensor(4.2357e-05, grad_fn=<UnbindBackward0>),
 tensor(5.2929e-05, grad_fn=<UnbindBackward0>),
 tensor(5.6328e-05, grad_fn=<UnbindBackward0>),
 tensor(5.6675e-05, grad_fn=<UnbindBackward0>),
 tensor(5.7745e-05, grad_fn=<UnbindBackw