In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"


from dotenv import load_dotenv
load_dotenv()

import pandas as pd
import lightning as L

from tqdm.auto import tqdm
from lightning.pytorch.callbacks import ModelCheckpoint

from src.model.modeling_bind import LitBIND
from src.data.dataset import get_train_dataloader, get_dev_dataloader, get_test_dataloader

SEED=42
DATASET_NAME = 'jwengr/PeOcrSanskritPreproc'
MINI_BATCH_SIZE = 4
N_BATCH = 8
BASE_MODEL_NAME='Qwen/Qwen3-0.6B-Base'
EPOCHS=10
LEARNING_RATE = 1e-4
USE_BNTD=True
TRAIN_MAX_LENGTH=128
VALID_MAX_LENGTH=128
INFERENCE_SENTENCE_MAX_LENGTH=64
INFERENCE_SENTENCE_MIN_LENGTH=32
INFERENCE_SENTENCE_N_OVERLAP=3

L.seed_everything(SEED)


train_dl = get_train_dataloader(DATASET_NAME, batch_size=MINI_BATCH_SIZE, max_length=TRAIN_MAX_LENGTH)
dev_dl = get_dev_dataloader(DATASET_NAME, batch_size=MINI_BATCH_SIZE, max_length=VALID_MAX_LENGTH)
test_dl = get_test_dataloader(DATASET_NAME, batch_size=MINI_BATCH_SIZE)

  from .autonotebook import tqdm as notebook_tqdm
Seed set to 42


In [None]:
lit_bind = LitBIND(
    base_model_name=BASE_MODEL_NAME,
    lr=LEARNING_RATE,
    epochs=EPOCHS,
    use_bntd=USE_BNTD,
    inference_sentence_max_length=INFERENCE_SENTENCE_MAX_LENGTH,
    inference_sentence_min_length=INFERENCE_SENTENCE_MIN_LENGTH,
    inference_sentence_n_overlap=INFERENCE_SENTENCE_N_OVERLAP,
    n_tokens_per_char=12
)

checkpoint_callback = ModelCheckpoint(
    dirpath='checkpoints/bind',
    filename=f"{DATASET_NAME.split('/')[1]}-{BASE_MODEL_NAME.split('/')[1]}-addbce-focalloss"+"-{epoch:02d}-{valid_loss:.4f}",
    monitor='valid_loss',
    mode='min',
    save_weights_only=True,
    save_top_k=3,
)

trainer = L.Trainer(
    callbacks=[checkpoint_callback],
    precision='bf16',
    max_epochs=EPOCHS,
    accumulate_grad_batches=N_BATCH
)

trainer.fit(lit_bind, train_dl, dev_dl)

Seed set to 42


KeyboardInterrupt: 

In [2]:
lit_bind = LitBIND.load_from_checkpoint(
    'checkpoints/bind/PeOcrSanskritPreproc-Qwen3-0.6B-Base-addbce-focalloss-epoch=00-valid_loss=0.0463.ckpt',
    base_model_name=BASE_MODEL_NAME,
    lr=LEARNING_RATE,
    epochs=EPOCHS,
    use_bntd=USE_BNTD,
    inference_sentence_max_length=INFERENCE_SENTENCE_MAX_LENGTH,
    inference_sentence_min_length=INFERENCE_SENTENCE_MIN_LENGTH,
    inference_sentence_n_overlap=INFERENCE_SENTENCE_N_OVERLAP,
    n_tokens_per_char=12
)

use full attn qwen3


In [3]:
trainer = L.Trainer()

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [4]:
preds = trainer.predict(lit_bind, test_dl)

You are using a CUDA device ('NVIDIA A100-SXM4-80GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]
/home/jjw1214/.conda/envs/jjw1214_py312/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:433: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=127` in the `DataLoader` to improve performance.


Predicting DataLoader 0: 100%|██████████| 384/384 [01:05<00:00,  5.90it/s]


In [5]:
prediction = []

for pred in tqdm(preds):
    prediction.extend(pred)

inputs = []
true = []
for batch in test_dl:
    true.extend(batch['sentence'])
    inputs.extend(batch['sentence_noisy'])

result_df = pd.DataFrame()
result_df['input'] = inputs
result_df['pred'] = prediction
result_df['true'] = true


100%|██████████| 384/384 [00:00<00:00, 908923.67it/s]


In [6]:
from src.metrics.metric import calculate_metric

In [None]:
(result_df['input'] == result_df['pred']).mean()

np.float64(0.9817470664928292)

: 

In [8]:
calculate_metric(result_df['input'].tolist(), result_df['true'].tolist(), result_df['pred'].tolist())

({'S_D_p': 35.714,
  'S_D_r': 2.283,
  'S_D_f1': 4.292,
  'S_C_p': 28.571,
  'S_C_r': 1.826,
  'S_C_f1': 3.433,
  'C_D_p': 100.0,
  'C_D_r': 1.247,
  'C_D_f1': 2.463,
  'C_C_p': 64.286,
  'C_C_r': 0.802,
  'C_C_f1': 1.584},
 [35.714,
  2.283,
  4.292,
  28.571,
  1.826,
  3.433,
  100.0,
  1.247,
  2.463,
  64.286,
  0.802,
  1.584])

In [None]:
for cat in set(result_df['category']):
    cat_df = result_df[result_df['category']==cat].copy()
    result, result_list = calculate_metric(cat_df['input'].tolist(), cat_df['true'].tolist(), cat_df['pred'].tolist())
    print(cat, result)