In [1]:
# %%
# for gridsearch

!pip install pyyaml==5.4.1

# %%
from IPython import get_ipython  # type: ignore

# %% 
get_ipython().magic('load_ext autoreload')
get_ipython().magic('autoreload 2')


# %%

from dotmap import DotMap
import json
import os

import numpy as np

# Import the PyTorch modules
import torch  # type: ignore
from torch import nn  # type: ignore
from torch.optim import SGD, Adam, RMSprop  # type: ignore

# Import Tensorflow writer
from torch.utils.tensorboard import SummaryWriter  # type: ignore

# Import modules from XTransformers
from x_transformers.x_transformers import AttentionLayers, Encoder, ContinuousTransformerWrapper


# Import the giotto-deep modules
from gdeep.data import OrbitsGenerator, DataLoaderKwargs
from gdeep.topology_layers import SetTransformer, PersFormer
#from gdeep.topology_layers import AttentionPooling
from gdeep.topology_layers import ISAB, PMA, SAB
from gdeep.pipeline import Pipeline
from gdeep.search import Gridsearch
import json
#from gdeep.search import Gridsearch

from optuna.pruners import MedianPruner, NopPruner

# %%

#Configs
config_data = DotMap({
    'batch_size_train': 64,
    'num_orbits_per_class': 2_000,
    'validation_percentage': 0.0,
    'test_percentage': 0.0,
    'num_jobs': 2,
    'dynamical_system': 'classical_convention',
    'homology_dimensions': (0, 1),
    'dtype': 'float32',
    'arbitrary_precision': False
})


config_model = DotMap({
    'implementation': 'Old_SetTransformer', # SetTransformer, PersFormer,
    # PytorchTransformer, DeepSet, X-Transformer
    'dim_input': 4,
    'num_outputs': 1,  # for classification tasks this should be 1
    'num_classes': 5,  # number of classes
    'dim_hidden': 128,
    'num_heads': 8,
    'num_induced_points': 32,
    'layer_norm': False,  # use layer norm
    'pre_layer_norm': False,
    'num_layers_encoder': 4,
    'num_layers_decoder': 3,
    'attention_type': "induced_attention",
    'activation': nn.GELU,
    'dropout': 0.2,
    'optimizer': torch.optim.Adam,
    'learning_rate': 1e-3,
    'num_epochs': 1000,
    'pooling_type': "max",
    'weight_decay': 0.0,
    'n_accumulated_grads': 0,
})



# %%



# Define the data loader


dataloaders_dicts = DataLoaderKwargs(train_kwargs = {"batch_size":
                                                        config_data.batch_size_train,},
                                     val_kwargs = {"batch_size": 4},
                                     test_kwargs = {"batch_size": 3})

og = OrbitsGenerator(num_orbits_per_class=config_data.num_orbits_per_class,
                     homology_dimensions = config_data.homology_dimensions,
                     validation_percentage=config_data.validation_percentage,
                     test_percentage=config_data.test_percentage,
                     n_jobs=config_data.num_jobs,
                     dynamical_system = config_data.dynamical_system,
                     dtype=config_data.dtype,
                     arbitrary_precision=config_data.arbitrary_precision,
                     )

if config_data.arbitrary_precision:
    orbits = np.load(os.path.join('data', 'orbit5k_arbitrary_precision.npy'))
    og.orbits_from_array(orbits)

if config_data.dim_input == 2:
    dl_train, _, _ = og.get_dataloader_orbits(dataloaders_dicts)
else:
    dl_train, _, _ = og.get_dataloader_persistence_diagrams(dataloaders_dicts)



# Define the model
if config_model.implementation == 'SetTransformer':
    model = SetTransformer(
            dim_input=config_model.dim_input,
            num_outputs=1,  # for classification tasks this should be 1
            dim_output=config_model.num_classes,  # number of classes
            dim_hidden=config_model.dim_hidden,
            num_heads=config_model.num_heads,
            num_inds=config_model.num_induced_points,
            ln=config_model.layer_norm,  # use layer norm
            n_layers_encoder=config_model.num_layers_encoder,
            n_layers_decoder=config_model.num_layers_decoder,
            attention_type=config_model.attention_type,
            dropout=config_model.dropout
    )

elif config_model.implementation == 'PersFormer':
    model = PersFormer(
            dim_input=2,
            dim_output=5,
            n_layers=5,
            hidden_size=32,
            n_heads=4,
            dropout=0.1,
            layer_norm=True,
            pre_layer_norm=False,
            activation=nn.GELU,
            attention_layer_type="self_attention")

elif config_model.implementation == 'PytorchTransformer':
    model = PytorchTransformer(
            dim_input=2,
            dim_output=5,
            hidden_size=64,
            nhead=8,
            activation='gelu',
            norm_first=True,
            num_layers=3,
            dropout=0.0,
    )
elif config_model.implementation == 'DeepSet':
    model = DeepSet(dim_input=2,
                    dim_output=config_model.num_classes,
                    dim_hidden=config_model.dim_hidden,
                    n_layers_encoder=config_model.num_layers_encoder,
                    n_layers_decoder=config_model.num_layers_decoder,
                    pool=config_model.pooling_type).double()

elif config_model.implementation == "X-Transformer":
    model = \
    nn.Sequential(
        ContinuousTransformerWrapper(
            dim_in = 2,
            use_pos_emb = True,
            max_seq_len = None,
            attn_layers = Encoder(
                dim = config_model.dim_hidden,
                depth = config_model.num_layers_encoder,
                heads = config_model.num_heads,
            ),
        ),
        AttentionPooling(hidden_dim = config_model.dim_hidden, q_length=1),
        nn.Sequential(*[nn.Sequential(nn.Linear(config_model.dim_hidden,
                            config_model.dim_hidden),
                            nn.ReLU())
                for _ in range(config_model.num_layers_decoder)]),
        nn.Linear(config_model.dim_hidden, config_model.num_classes)
    )
    
elif config_model.implementation == "Old_SetTransformer":
    # initialize SetTransformer model
    class SetTransformer(nn.Module):
        """ Vanilla SetTransformer from
        https://github.com/juho-lee/set_transformer/blob/master/main_pointcloud.py
        """
        def __init__(
            self,
            dim_input=3,  # dimension of input data for each element in the set
            num_outputs=1,
            dim_output=40,  # number of classes
            num_inds=32,  # number of induced points, see  Set Transformer paper
            dim_hidden=128,
            num_heads=4,
            ln=False,  # use layer norm
        ):
            super(SetTransformer, self).__init__()
            self.enc = nn.Sequential(
                ISAB(dim_input, dim_hidden, num_heads, num_inds, ln=ln),
                ISAB(dim_hidden, dim_hidden, num_heads, num_inds, ln=ln),
            )
            self.dec = nn.Sequential(
                nn.Dropout(),
                PMA(dim_hidden, num_heads, num_outputs, ln=ln),
                nn.Dropout(),
                nn.Linear(dim_hidden, dim_output),
            )

        def forward(self, input):
            return self.dec(self.enc(input)).squeeze()


    model = SetTransformer(dim_input=4, dim_output=5)
else:
    raise Exception("Unknown Implementation")
# %%

if config_data.dtype == "float64":
    print("Use float64 model")
    model = model.double()
else:
    print("use float32 model")

# %%
# Do training and validation

# initialise loss
loss_fn = nn.CrossEntropyLoss()

# Initialize the Tensorflow writer
#writer = SummaryWriter(comment=json.dumps(config_model.toDict())\
#                                + json.dumps(config_data.toDict()))
writer = SummaryWriter(comment=config_model.implementation)

# initialise pipeline class
pipe = Pipeline(model, [dl_train, None], loss_fn, writer)
# %%


# train the model
pipe.train(config_model.optimizer,
           config_model.num_epochs,
           cross_validation=False,
           optimizers_param={"lr": config_model.learning_rate,
                             "weight_decay": config_model.weight_decay},
           n_accumulated_grads=config_model.n_accumulated_grads)

# %%
# keep training
#pipe.train(Adam, 300, False, keep_training=True)

# %%
# %%
# Gridsearch

# initialise gridsearch
pruner = NopPruner()
search = Gridsearch(pipe, search_metric="accuracy", n_trials=50, best_not_last=True, pruner=pruner)

# dictionaries of hyperparameters
optimizers_params = {"lr": [1e-6, 1e-3],
                      "weight_decay": [0.0, 0.2] }
dataloaders_params = {"batch_size": [32, 64, 16]}
models_hyperparams = {"n_layer_enc": [2, 5],
                      "n_layer_dec": [1, 5],
                      "num_heads": ["2", "4", "8"],
                      "hidden_dim": ["16", "32", "64"],
                      "dropout": [0.0, 0.2],
                      "layer_norm": ["True", "False"],
                      'pre_layer_norm': ["True", "False"]}

# starting the gridsearch
#search.start((Adam,), n_epochs=500, cross_validation=False,
#             optimizers_params=optimizers_params,
#             dataloaders_params=dataloaders_params,
#             models_hyperparams=models_hyperparams, lr_scheduler=None,
#             scheduler_params=None)


# %%
#print(search.best_val_acc_gs, search.best_val_loss_gs)
# %%
#df_res = search._results()
#df_res
# %%


Using GPU!
No TPUs...


  defaults = yaml.load(f)


use float32 model
Epoch 1
-------------------------------
No TPUs
Batch training loss:  1.6216005430221558  	Batch training accuracy:  20.2125  	[ 125 / 125 ]                     
Time taken for this epoch: 8s
No TPUs
Validation results: 
 Accuracy: 19.335938%,                 Avg loss: 1.613257 

Epoch 2
-------------------------------
No TPUs
Batch training loss:  1.6150968685150147  	Batch training accuracy:  20.175  	[ 125 / 125 ]                     
Time taken for this epoch: 8s
No TPUs
Validation results: 
 Accuracy: 20.068359%,                 Avg loss: 1.613575 

Epoch 3
-------------------------------
No TPUs
Batch training loss:  1.614751015663147  	Batch training accuracy:  19.675  	[ 125 / 125 ]                     
Time taken for this epoch: 8s
No TPUs
Validation results: 
 Accuracy: 20.068359%,                 Avg loss: 1.610720 

Epoch 4
-------------------------------
No TPUs
Batch training loss:  1.614335199356079  	Batch training accuracy:  19.35  	[ 125 / 125 ]     

KeyboardInterrupt: 

In [None]:
x = torch.rand((2, 1000, 4)).double()
model(x).shape

In [4]:
df_res = search._results()
df_res

Study statistics: 
Number of pruned trials:  0
Number of complete trials:  50
Best trial:
Metric Value for best trial:  99.10714285714286



invalid value encountered in subtract



Unnamed: 0,model,dataset,optimizer,lr,batch_size,dropout,n_layer_enc,n_layer_dec,num_heads,hidden_dim,layer_norm,pre_layer_norm,loss,accuracy
0,model,dataset,Adam,0.000425,32,0.158746,4,4,4,64,True,True,inf,94.921875
1,model,dataset,Adam,0.000946,48,0.130351,3,4,8,32,True,False,inf,98.710317
2,model,dataset,Adam,0.000823,32,0.044103,5,2,8,16,True,False,inf,97.070312
3,model,dataset,Adam,0.000339,32,0.174644,3,3,2,32,False,True,inf,63.769531
4,model,dataset,Adam,0.000835,64,0.016269,2,1,2,16,True,True,inf,96.191406
5,model,dataset,Adam,7.8e-05,32,0.036563,2,3,4,64,True,False,inf,89.160156
6,model,dataset,Adam,0.000218,32,0.126969,5,5,8,16,True,False,inf,94.628906
7,model,dataset,Adam,0.000901,64,0.154917,2,4,8,16,False,False,inf,96.582031
8,model,dataset,Adam,0.00055,32,0.125399,2,1,2,64,True,False,inf,96.875
9,model,dataset,Adam,0.000493,32,0.00441,3,1,2,64,False,False,inf,96.777344


In [2]:
!pip install sympy

Collecting sympy
  Downloading sympy-1.9-py3-none-any.whl (6.2 MB)
[K     |████████████████████████████████| 6.2 MB 12.0 MB/s eta 0:00:01
[?25hCollecting mpmath>=0.19
  Downloading mpmath-1.2.1-py3-none-any.whl (532 kB)
[K     |████████████████████████████████| 532 kB 98.2 MB/s eta 0:00:01
[?25hInstalling collected packages: mpmath, sympy
Successfully installed mpmath-1.2.1 sympy-1.9
You should consider upgrading via the '/home/jovyan/conda-envs/giottodeep/bin/python -m pip install --upgrade pip' command.[0m


In [6]:
import torch_optimizer as optim

# model = ...
optimizer = optim.Shampoo(
    m.parameters(),
    lr=1e-1,
    momentum=0.0,
    weight_decay=0.0,
    epsilon=1e-4,
    update_freq=1,
)

pandas.core.frame.DataFrame

In [7]:
df_res.to_csv('set_transformer_grid_search.csv')

In [9]:
df_res = search._results()
df_res

Study statistics: 
Number of pruned trials:  0
Number of complete trials:  50
Best trial:
Metric Value for best trial:  99.10714285714286



invalid value encountered in subtract



Unnamed: 0,model,dataset,optimizer,lr,batch_size,dropout,n_layer_enc,n_layer_dec,num_heads,hidden_dim,layer_norm,pre_layer_norm,loss,accuracy
0,model,dataset,Adam,0.000425,32,0.158746,4,4,4,64,True,True,inf,94.921875
1,model,dataset,Adam,0.000946,48,0.130351,3,4,8,32,True,False,inf,98.710317
2,model,dataset,Adam,0.000823,32,0.044103,5,2,8,16,True,False,inf,97.070312
3,model,dataset,Adam,0.000339,32,0.174644,3,3,2,32,False,True,inf,63.769531
4,model,dataset,Adam,0.000835,64,0.016269,2,1,2,16,True,True,inf,96.191406
5,model,dataset,Adam,7.8e-05,32,0.036563,2,3,4,64,True,False,inf,89.160156
6,model,dataset,Adam,0.000218,32,0.126969,5,5,8,16,True,False,inf,94.628906
7,model,dataset,Adam,0.000901,64,0.154917,2,4,8,16,False,False,inf,96.582031
8,model,dataset,Adam,0.00055,32,0.125399,2,1,2,64,True,False,inf,96.875
9,model,dataset,Adam,0.000493,32,0.00441,3,1,2,64,False,False,inf,96.777344
