Template to train the Autoencoder / Encoder

In [1]:
import sys
import yaml
import torch
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger

sys.path.append("../")
from src.models.lit_encoder import LitAutoencoder
from src.data.make_dataset import train_dataloader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Data Parameters
dataset = "sphere"
n_obs = 1000
n_dim = 5
batch_size = 200

# Model param
input_dim = n_dim
encoder_layer = [10, 10, 10]
activation = "ReLU"
lr = 0.001
kernel_type = "phate"  # "gaussian" and "phate".
loss_emb = False  # Not really working for now...
bandwitdh = 1  # bandwitdh for the gaussian kernel
t = 1  # time for the gaussian kernel. For PHATE, we use their optimal time.
scale = 0.0005  # scale when we add noise to do the data (before the encoder).

# Trainer param
max_epochs = 50
fast_dev_run = False
accelerator = "gpu"
devices = 1

exp_name = "phate_small"
seed = torch.randint(0, 1000, size=(1,))

In [3]:
# Uncomment to save the params.

# params = {"dataset":dataset, "n_obs":n_obs, "n_dim":n_dim, "batch_size":batch_size, "input_dim":input_dim, "encoder_layer":encoder_layer, "activation":activation, "lr":lr,
#            "kernel_type":kernel_type, "loss_emb":loss_emb, "max_epochs":max_epochs, "fast_dev_run":fast_dev_run, "accelerator":accelerator, "devices":devices, "exp_name":exp_name, "seed":seed, "bandwitdh":bandwitdh, "t":t, "scale":scale}

# with open("runs/param_"+exp_name+".yaml","w") as file:
#     yaml.dump(params,file)

In [None]:
# If you want to save and view in Wandb
wandb = True
logger = WandbLogger(project="fim_phate", name=exp_name) if wandb else False

In [5]:
# SEED
pl.utilities.seed.seed_everything(seed=seed)

# data loader for the training set.
train_loader = train_dataloader(dataset, n_obs, n_dim, batch_size)

# Defining the pytorch lightning model.
model = LitAutoencoder(
    input_dim=input_dim,
    encoder_layer=encoder_layer,
    activation=activation,
    lr=lr,
    kernel_type=kernel_type,
    loss_emb=loss_emb,
    bandwitdh=bandwitdh,
    t=t,
    scale=scale,
)

# Defining the trainer
# NOTE, we can add more things like gradient clipping, early stopping, Callbacks etc..
# NOTE logger is set to `FALSE`, change to `True` to keep tracks of the metrics.
# NOTE  `enable_checkpointing ` is set to `False`, set to `True` to keep checkpoints of the model.
trainer = Trainer(
    max_epochs=max_epochs,
    fast_dev_run=fast_dev_run,
    accelerator=accelerator,
    devices=devices,
    logger=logger,
    enable_checkpointing=False,
)

# Train the model
trainer.fit(model, train_dataloaders=train_loader)

Global seed set to 258
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 280   
---------------------------------------
280       Trainable params
0         Non-trainable params
280       Total params
0.001     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 5/5 [00:00<00:00, 29.74it/s, loss=7.73, v_num=85cq]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 5/5 [00:00<00:00, 29.05it/s, loss=7.73, v_num=85cq]
