In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint, LearningRateMonitor
from lightning.pytorch.loggers import CSVLogger, TensorBoardLogger
from lightning.pytorch.profilers import PyTorchProfiler

import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
load_dotenv()
tqdm.pandas()

from src.feedback_prize_english_language_learning.lib.data.data_utils import (
    load_datasets, 
    preprocessing_datasets,
    select_features_split_datasets,
)
from src.feedback_prize_english_language_learning.lib.data.data_module import NLPDataModule
from src.feedback_prize_english_language_learning.lib.models.BertRegression import BertRegression
from src.feedback_prize_english_language_learning.lib.config import Config, DataModuleConfig, ModuleConfig
from src.feedback_prize_english_language_learning.lib.utils import create_dirs

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
create_dirs([Config.cache_dir, Config.log_dir, Config.ckpt_dir, Config.prof_dir, Config.perf_dir])
torch.set_float32_matmul_precision("medium")

In [5]:
len(train_df), len(val_df), len(test_df), len(predict_df)

(2502, 626, 783, 3)

In [6]:
train_df.head(1)

In [7]:
lit_datamodule = NLPDataModule(
    DataModuleConfig.label_column,
    train_df,
    val_df,
    test_df,
    pretrained_model_name=ModuleConfig.model_name,
    batch_size=DataModuleConfig.batch_size,
    num_workers=DataModuleConfig.num_workers,
    seed=Config.seed,
)

In [8]:
lit_model = BertRegression(pretrained_model=ModuleConfig.model_name, learning_rate=ModuleConfig.learning_rate)

In [9]:
# lit_datamodule.setup()
# batch = next(iter(lit_datamodule.train_dataloader()))
# inputs, label = batch
# lit_model(**inputs)

In [12]:
callbacks = [
    EarlyStopping(monitor="val-RMSE", mode="min", verbose=True, patience=10),
    ModelCheckpoint(
        dirpath=Config.ckpt_dir,
        filename="model",
    ),
    LearningRateMonitor(logging_interval='step'),
]

loggers = [
    CSVLogger(
        save_dir=Config.log_dir,
        name="csv-logs",
    ),
    TensorBoardLogger(
        Config.log_dir / "tb_logs", 
        name="my_model"
    ),
]

lit_trainer = pl.Trainer(
    precision="16-mixed",
    max_epochs=25,
    deterministic=True,
    logger=loggers,
    callbacks=callbacks,
    log_every_n_steps=10,
    profiler=PyTorchProfiler(output_filename=Config.prof_dir / "profiler.txt"),
    #gpus=-1,
    accelerator="auto",
    strategy="ddp_notebook",
)