In [25]:
import torch
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms
import math
import numpy as np
from utils import process_image
from tqdm.notebook import tqdm
import os
import pickle as pkl


def get_pds_from_data(dataset_type, data_path, filtration_func, filtration_path_name, limit=None, **kwargs):
    # kwargs - additional params to filtr func
    # data load
    os.makedirs(filtration_path_name, exist_ok=True)
    os.makedirs(data_path, exist_ok=True)
    
    
    if dataset_type == "MNIST":
        dataset_train = torchvision.datasets.MNIST(root=data_path, train=True, download=True, transform=transforms.ToTensor())
        dataset_test = torchvision.datasets.MNIST(root=data_path, train=False, download=True, transform=transforms.ToTensor())
    
    pds_train = []
    for i, (img, label) in tqdm(enumerate(dataset_train)):
        diags, _ = filtration_func(img, **kwargs)
        pds_train.append(diags)
        
        if limit is not None and len(pds_train) >= limit:
            break
    
    name = f'{dataset_type}_pds.pkl'
    
    with open(filtration_path_name + '/' + name, 'wb') as f:
        pkl.dump(pds_train, f)
        
    if dataset_test is not None:
        pds_test = []
        for i, (img, label) in tqdm(enumerate(dataset_test)):
            diags, _ = filtration_func(img, **kwargs)
            pds_test.append(diags)
            
            if limit is not None and len(pds_test) >= limit:
                break
        name = f'{dataset_type}_pds_test.pkl'
        with open(filtration_path_name + '/' + name, 'wb') as f:
            pkl.dump(pds_test, f)

In [31]:
config = {
    'dataset_type' : "MNIST",
    'data_path': "RAW/MNIST",
    'filtration_func': process_image,
    'args': {
        'num_filtrations': 10,
        'filter_type': 'uniform',
    },
    'limit': None,
    'filtration_path_name': 'PD/direction/10/'
}

In [27]:
get_pds_from_data(config['dataset_type'], config['data_path'], 
                  config['filtration_func'], config['filtration_path_name'], **config['args'])

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [28]:
dataset_type = config['dataset_type']

name = f'{dataset_type}_pds.pkl'
with open(config['filtration_path_name'] + '/' + name, 'rb') as f:
        x = pkl.load(f)

In [30]:
x[0]

tensor([[0.6073, 0.6200, 0.0000, 0.0000],
        [0.3295, 1.0000, 0.0000, 0.0000],
        [0.9490, 0.9922, 1.0000, 0.0000],
        [0.7451, 0.9922, 1.0000, 0.0000],
        [0.8824, 0.9922, 1.0000, 0.0000],
        [0.9882, 0.9922, 1.0000, 0.0000],
        [0.6745, 0.9922, 1.0000, 0.0000],
        [0.6200, 1.0000, 1.0000, 0.0000],
        [0.4503, 0.4531, 0.0000, 0.6283],
        [0.4522, 0.4577, 0.0000, 0.6283],
        [0.4568, 0.4596, 0.0000, 0.6283],
        [0.4688, 0.4716, 0.0000, 0.6283],
        [0.4457, 0.4763, 0.0000, 0.6283],
        [0.5460, 0.5488, 0.0000, 0.6283],
        [0.5609, 0.5636, 0.0000, 0.6283],
        [0.6026, 0.6039, 0.0000, 0.6283],
        [0.2619, 1.0000, 0.0000, 0.6283],
        [0.9490, 0.9922, 1.0000, 0.6283],
        [0.7451, 0.9922, 1.0000, 0.6283],
        [0.8824, 0.9922, 1.0000, 0.6283],
        [0.9882, 0.9922, 1.0000, 0.6283],
        [0.6745, 0.9922, 1.0000, 0.6283],
        [0.6174, 1.0000, 1.0000, 0.6283],
        [0.4563, 0.5770, 0.0000, 1

In [1]:
import json
f = open('configs/train/img_transformer_config.json')


In [2]:
data = json.load(f)

In [3]:
data

{'arch': {'type': 'ImageSet2Set',
  'args': {'n_out_max': 32,
   'd_in': 2,
   'd_out': 2,
   'd_hidden': 64,
   'd_mlp': 256}},
 'data': {'train': {'batch_size': 64,
   'num_workers': 2,
   'dataset': {'type': 'PDMnist',
    'args': {'data_dir': 'RAW/MNIST',
     'pd_dir': 'PD/direction/10/MNIST_pds.pkl',
     'train': True}}},
  'val': {'batch_size': 64,
   'num_workers': 2,
   'dataset': {'type': 'PDMnist',
    'args': {'data_dir': 'RAW/MNIST',
     'pd_dir': 'PD/direction/10/MNIST_pds_test.pkl',
     'train': False}}}},
 'optimizer': {'type': 'Adam', 'args': {'lr': 0.0003, 'weight_decay': 0.0001}},
 'lr_scheduler': {'type': 'LinearLR',
  'args': {'start_factor': 0.2, 'total_iters': 250}},
 'loss': {'type': 'PersistenceWeightedSlicedWassersteinLoss',
  'args': {'q': 1, 'reduce': 'sum', 'random_seed': 0}},
 'trainer': {'n_epochs': 40,
  'wandb_project': 'mnist_pds',
  'run_name': 'mnist_10_filt_fixed_full',
  'grad_norm_clip': 10}}

In [101]:
import losses
loss_fn = getattr(losses, data['loss']['type'])

AttributeError: module 'torch' has no attribute 'vmap'

In [4]:
import datasets
train_dataset = getattr(datasets, data['data']['train']['dataset']['type'])(**data['data']['train']['dataset']['args'])

In [8]:
import models

model = getattr(models, data['arch']['type'])(**data['arch']['args'])

In [10]:
import torch
optimizer = getattr(torch.optim, data['optimizer']['type'])(model.parameters(), **data['optimizer']['args'])

In [15]:
scheduler = getattr(torch.optim.lr_scheduler, data['lr_scheduler']['type'])(optimizer, **data['lr_scheduler']['args'])

In [56]:
import utils
fn = getattr(utils, data['filtration_func']['type'])

In [57]:
fn

<function utils.process_image(img, num_filtrations, filter_type='uniform')>

In [76]:
! python prepare_dataset.py -c configs/data/mnist_dir10_config.json

0it [00:00, ?it/s]
0it [00:00, ?it/s]


In [79]:
dataset_type = "MNIST"

name = f'{dataset_type}_pds.pkl'
with open('PD/direction/10/' + name, 'rb') as f:
        x = pkl.load(f)
print(x[0])

tensor([[0.6073, 0.6200, 0.0000, 0.0000],
        [0.3295, 1.0000, 0.0000, 0.0000],
        [0.9490, 0.9922, 1.0000, 0.0000],
        [0.7451, 0.9922, 1.0000, 0.0000],
        [0.8824, 0.9922, 1.0000, 0.0000],
        [0.9882, 0.9922, 1.0000, 0.0000],
        [0.6745, 0.9922, 1.0000, 0.0000],
        [0.6200, 1.0000, 1.0000, 0.0000],
        [0.4503, 0.4531, 0.0000, 0.6283],
        [0.4522, 0.4577, 0.0000, 0.6283],
        [0.4568, 0.4596, 0.0000, 0.6283],
        [0.4688, 0.4716, 0.0000, 0.6283],
        [0.4457, 0.4763, 0.0000, 0.6283],
        [0.5460, 0.5488, 0.0000, 0.6283],
        [0.5609, 0.5636, 0.0000, 0.6283],
        [0.6026, 0.6039, 0.0000, 0.6283],
        [0.2619, 1.0000, 0.0000, 0.6283],
        [0.9490, 0.9922, 1.0000, 0.6283],
        [0.7451, 0.9922, 1.0000, 0.6283],
        [0.8824, 0.9922, 1.0000, 0.6283],
        [0.9882, 0.9922, 1.0000, 0.6283],
        [0.6745, 0.9922, 1.0000, 0.6283],
        [0.6174, 1.0000, 1.0000, 0.6283],
        [0.4563, 0.5770, 0.0000, 1

In [16]:
! python train_pd_model.py -c configs/train/img_transformer_config.json -w bbe60953ed99662c4459f461386ecd58a2f2ee3a 

2024-04-15 19:14:45.676386: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2024-04-15 19:14:45.676417: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Traceback (most recent call last):
  File "train_pd_model.py", line 9, in <module>
    import losses
  File "C:\Users\yura-\pd_prediction\Predicting_Persistence_Diagrams\mnist_dir_filtration\losses\__init__.py", line 1, in <module>
    from losses.losses import SlicedWassersteinLoss, WeightedSlicedWassersteinLoss, PersistenceWeightedSlicedWassersteinLoss, ChamferLoss, HausdorffLoss
  File "C:\Users\yura-\pd_prediction\Predicting_Persistence_Diagrams\mnist_dir_filtration\losses\losses.py", line 7, in <module>
    sliced_wasserstein_distance_batched = torch.vmap(sliced_wasserstein_distance, randomness="same")
AttributeError: module 'torch' has no attribute 