In [4]:
## Progress bar
from tqdm.notebook import tqdm

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
# Torchvision
import torchvision
from torchvision.datasets import CIFAR10
from torchvision import transforms
# PyTorch Lightning
import pytorch_lightning as pl
import numpy as np
from pprint import pprint
from functools import partial
# Setting the seed
pl.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)
torch.autograd.set_detect_anomaly(True)

Global seed set to 42


cuda:0


<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7f12679e2920>

In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Model

In [6]:
from pytorch_lightning import seed_everything, LightningModule, Trainer
from pytorch_lightning.callbacks import TQDMProgressBar
import pandas as pd
import torch
import os
from src.lightning_model.lit_sorting_model import LitSortingModel
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint

random_state = 12345
seed_everything(random_state, workers=True)

model = LitSortingModel(
    gat_head=1,
    # max_step=100000,
    feature_encoded_dim=16,
    dropout=0,
    num_node=4,
    num_train=10000,
    num_val=50,
    num_test=50,
    learning_rate=0.001,
    hint_loss=False
)

saved_model_path = './saved_model/no_hint_loss_lit_sorting_model/'
if not os.path.exists(saved_model_path):
    os.makedirs(saved_model_path)

lr_monitor = LearningRateMonitor(logging_interval='step')

trainer = Trainer(
    max_epochs=5,
    callbacks=[TQDMProgressBar(refresh_rate=100), ModelCheckpoint(monitor='val_loss', mode='min')],
    val_check_interval=0.2,
    # accelerator='gpu' if torch.cuda.is_available() else 'cpu',
    accelerator='cpu', 
    check_val_every_n_epoch=1,
    default_root_dir=saved_model_path,
    # gradient_clip_val=1.0,
)
trainer.fit(model)

Global seed set to 12345
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name  | Type         | Params
---------------------------------------
0 | model | SortingModel | 4.3 K 
---------------------------------------
4.3 K     Trainable params
0         Non-trainable params
4.3 K     Total params
0.017     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

training loss 0 samples: nan
Validation Set 2 samples (@epoch:0): loss=0.8534471988677979, accuracy=0.375


  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.9721454211036747
Validation Set 50 samples (@epoch:0): loss=0.8783798813819885, accuracy=0.3100000023841858


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.7335507292197839
Validation Set 50 samples (@epoch:0): loss=0.5973827242851257, accuracy=0.4950000047683716


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.5630505023157173
Validation Set 50 samples (@epoch:0): loss=0.47576332092285156, accuracy=0.5400000214576721


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.500027459890362
Validation Set 50 samples (@epoch:0): loss=0.34832268953323364, accuracy=0.5649999976158142


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.4685390806236444
Validation Set 50 samples (@epoch:0): loss=0.23696546256542206, accuracy=0.6850000023841858


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.4376185012044542
Validation Set 50 samples (@epoch:1): loss=0.25751349329948425, accuracy=0.6200000047683716


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.4104360511421331
Validation Set 50 samples (@epoch:1): loss=0.1744728535413742, accuracy=0.7149999737739563


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.35928689478370224
Validation Set 50 samples (@epoch:1): loss=0.14477653801441193, accuracy=0.7099999785423279


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.3218013862482371
Validation Set 50 samples (@epoch:1): loss=0.1131429374217987, accuracy=0.7300000190734863


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.2769577285913371
Validation Set 50 samples (@epoch:1): loss=-0.003395494306460023, accuracy=0.824999988079071


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.2329138586966843
Validation Set 50 samples (@epoch:2): loss=0.05484617128968239, accuracy=0.7749999761581421


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.19189051018002587
Validation Set 50 samples (@epoch:2): loss=0.15749436616897583, accuracy=0.7099999785423279


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.20145482182399013
Validation Set 50 samples (@epoch:2): loss=0.038772936910390854, accuracy=0.7950000166893005


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.12545289427578352
Validation Set 50 samples (@epoch:2): loss=-0.001030716928653419, accuracy=0.824999988079071


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.09384797768944235
Validation Set 50 samples (@epoch:2): loss=-0.0025042318738996983, accuracy=0.824999988079071


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.07690099036518191
Validation Set 50 samples (@epoch:3): loss=0.005933017935603857, accuracy=0.8149999976158142


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.08568001780931388
Validation Set 50 samples (@epoch:3): loss=0.09636828303337097, accuracy=0.7599999904632568


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.10631335874687564
Validation Set 50 samples (@epoch:3): loss=0.004386965185403824, accuracy=0.824999988079071


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.06653536814259159
Validation Set 50 samples (@epoch:3): loss=-0.00621824013069272, accuracy=0.8199999928474426


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.06337863163044156
Validation Set 50 samples (@epoch:3): loss=0.07265126705169678, accuracy=0.7450000047683716


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.08754652238804579
Validation Set 50 samples (@epoch:4): loss=-0.03535933047533035, accuracy=0.824999988079071


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.0019202937610158174
Validation Set 50 samples (@epoch:4): loss=-0.11107485741376877, accuracy=0.8899999856948853


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: -0.014669394747828987
Validation Set 50 samples (@epoch:4): loss=-0.1152350902557373, accuracy=0.9049999713897705


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: -0.01629112333107915
Validation Set 50 samples (@epoch:4): loss=-0.043029312044382095, accuracy=0.824999988079071


Validation: 0it [00:00, ?it/s]

training loss 2000 samples: 0.016435704131034356
Validation Set 50 samples (@epoch:4): loss=0.035645317286252975, accuracy=0.7799999713897705


`Trainer.fit` stopped: `max_epochs=5` reached.
