In [1]:
#!/usr/bin/env python
# coding: utf-8

import os
import sys
sys.path.append('./')
sys.path.append('../')

import lightning.pytorch as pl
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import EarlyStopping

import config
from libs.data_loader import BBDataModule
from libs.nn import BaselineModel

# import numpy as np
# import pandas as pd
# from torch.utils.data import Dataset, DataLoader, random_split, default_collate

In [2]:
# import freeze_support
# from multiprocessing import freeze_support
# freeze_support()

cfg = config.BASELINE_MODEL

ROOT_DIR = '.' if os.path.exists('config') else '..' 
csv_file = os.path.join(ROOT_DIR, 'dataset', cfg['train_csv_file'])
# csv_file = os.path.join(ROOT_DIR, 'dataset', 'train.csv')

In [3]:
model = BaselineModel(
    num_input=cfg['num_input'], 
    num_output=cfg['num_output'], 
    layers=cfg['layers'],
    dropout=cfg['dropout']
) 
# print(model)
# testset = BBDataset(csv_file=csv_file, transform=None)
# X, y = default_collate([testset[0]])
# y_pred = model(X)


In [4]:
data_module = BBDataModule(
    csv_file=csv_file, 
    batch_size=cfg['batch_size'], 
    num_workers=cfg['num_workers']
)

In [5]:
log_dir = os.path.join(ROOT_DIR, 'tb_logs')
logger = TensorBoardLogger(log_dir, name="baseline")

trainer = pl.Trainer(
    # limit_train_batches=0.1, # use only 10% of the training data
    min_epochs=1,
    max_epochs=cfg['num_epochs'],
    precision='bf16-mixed',
    callbacks=[EarlyStopping(monitor="val_loss")],
    logger=logger,
    # profiler=profiler,
    # profiler='simple'
)

Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [6]:
trainer.fit(model, data_module)
trainer.validate(model, data_module)


  | Name    | Type    | Params | Mode 
--------------------------------------------
0 | layer0  | Linear  | 384    | train
1 | act0    | ReLU    | 0      | train
2 | layer1  | Linear  | 1.1 K  | train
3 | act1    | ReLU    | 0      | train
4 | layer2  | Linear  | 264    | train
5 | act2    | ReLU    | 0      | train
6 | dropout | Dropout | 0      | train
7 | output  | Linear  | 9      | train
8 | loss_fn | MSELoss | 0      | train
--------------------------------------------
1.7 K     Trainable params
0         Non-trainable params
1.7 K     Total params
0.007     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


                                                                                                                                                                                               



Epoch 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1003/1003 [00:07<00:00, 138.69it/s, v_num=14]
Validation: |                                                                                                                                                            | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                                                                                      | 0/251 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                                                         | 0/251 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|▌                                                                                                                               | 1/251 [00:00<00:00, 265.82it/s][A
Validation DataLoader 0:   1



Validation DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 251/251 [00:00<00:00, 340.25it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        val_loss            0.3550361692905426
        val_rmse            0.5858098864555359
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 0.3550361692905426, 'val_rmse': 0.5858098864555359}]

In [7]:
%load_ext tensorboard
%tensorboard --logdir=../tb_logs/

ModuleNotFoundError: No module named 'tensorboard'