In [1]:
import lightning as L

from slrt.constants import *

2025-01-03 14:21:44.497934: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-01-03 14:21:44.589047: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


## Config

In [2]:
model_name = "MSKA"
dataset_name = "Phoenix2014"
dataset_type = "keypoint"
tokenizer_name = "SimpleTokenizer"
ckpt_file = "../experiments/Phoenix2014_Experiment/MSKA3/0/checkpoints/best.ckpt"
dataset_dir = "../data/phoenix2014"
keypoints_file = "../data/preprocessed/phoenix2014/phoenix2014-keypoints-mska.pkl"
vocab_file = "../data/preprocessed/phoenix2014/phoenix2014-gloss-vocab.txt"

In [3]:
# load model
model = ModelClassDict[model_name].load_from_checkpoint(ckpt_file, map_location='cpu')

In [4]:
# get data
datamodule = DataModuleClassDict[dataset_type][dataset_name](
    keypoints_file=keypoints_file,
    batch_size=1, num_workers=10,
    transform=TransformDict[dataset_type],
    tokenizer={"recognition": TokenizerDict["Recognition"][tokenizer_name](vocab_file=vocab_file), }
)
datamodule.setup(stage="test")

dl = datamodule.test_dataloader()

## Predict

In [5]:
# trainer
trainer = L.Trainer(accelerator='gpu', devices=[0], precision="16-mixed", logger=False)
res = trainer.predict(model=model, dataloaders=dl)

  rank_zero_warn(
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
The following callbacks returned in `LightningModule.configure_callbacks` will override existing callbacks passed to Trainer: ModelCheckpoint
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting: 0it [00:00, ?it/s]

In [6]:
for item in res:
    print(f"{item[0][0]:<50}: {' '.join(item[1][0])}")

01April_2010_Thursday_heute_default-5             : ABER MORGEN SONNE SELTEN REGEN
01April_2010_Thursday_tagesschau_default-7        : SAMSTAG WECHSELHAFT BESONDERS FREUNDLICH loc-NORDOST __EMOTION__
01April_2010_Thursday_tagesschau_default-8        : SONNTAG REGEN TEIL GEWITTER loc-SUEDOST DURCHGEHEND REGEN-PLUSPLUS
01April_2011_Friday_tagesschau_default-0          : __ON__ JETZT WETTER WIE AUSSEHEN MORGEN SAMSTAG ZWEITE APRIL __OFF__ __ON__ __OFF__
01April_2011_Friday_tagesschau_default-14         : __ON__ MONTAG WECHSELHAFT MEHR KUEHL AB IN-KOMMEND REGEN FREUNDLICH WARM __OFF__
01April_2011_Friday_tagesschau_default-6          : __ON__ FLUSS loc-NORD HEUTE NACHT NOCH WOLKE KOENNEN NIESELREGEN REGEN loc-REGION
01December_2011_Thursday_heute_default-6          : DURCHGEHEND loc-REGION MILD NEUN BIS UNGEFAEHR VIERZEHN GRAD TAG AUCH REGEN
01December_2011_Thursday_tagesschau_default-6     : WIND MAESSIG FRISCH STARK STURM MEER STARK SCHWER STURM SUEDOSTRAUM SCHWACH WIND
01February_2011_T

## Validate

In [5]:
datamodule.setup(stage="fit")
dev_dataloader = datamodule.val_dataloader()

In [6]:
trainer = L.Trainer(accelerator='gpu', devices=[0], precision="16-mixed", strategy="ddp_notebook", logger=False)

  rank_zero_warn(
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [7]:
trainer.validate(model=model, dataloaders=dev_dataloader)

The following callbacks returned in `LightningModule.configure_callbacks` will override existing callbacks passed to Trainer: ModelCheckpoint
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 1 processes
----------------------------------------------------------------------------------------------------

You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Validation: 0it [00:00, ?it/s]

2025-01-03 14:22:35 Running SCLITE...
2025-01-03 14:22:35 SCLITE completed successfully. Outputs saved to /new_home/xzj23/workspace/SLRT/experiments/Phoenix2014_Experiment/MSKA3/0/hypothesis/dev/epoch_0/recognition/sclite_results
2025-01-03 14:22:35 Epoch 0, DEV_WER: 21.5%




[{'Val/Loss_epoch': 93.92906188964844, 'Val/Word-Error-Rate': 21.5}]

In [8]:
datamodule.setup(stage="test")
test_dataloader = datamodule.test_dataloader()

In [9]:
trainer = L.Trainer(accelerator='gpu', devices=[0], precision="16-mixed", strategy="ddp_notebook", logger=False)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [10]:
trainer.test(model=model, dataloaders=test_dataloader)

The following callbacks returned in `LightningModule.configure_callbacks` will override existing callbacks passed to Trainer: ModelCheckpoint
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 1 processes
----------------------------------------------------------------------------------------------------

You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

2025-01-03 14:23:32 Running SCLITE...
2025-01-03 14:23:33 SCLITE completed successfully. Outputs saved to /new_home/xzj23/workspace/SLRT/experiments/Phoenix2014_Experiment/MSKA3/0/hypothesis/test/test_best_model_after_epoch_0/recognition/sclite_results
2025-01-03 14:23:33 Test best model after epoch -1, TEST_WER: 22.299999237060547%




[{'Test/Loss_epoch': 92.25061798095703,
  'Test/Word-Error-Rate': 22.299999237060547}]

## Validate and Test TFCTCBeamSearchDecoder

In [11]:
from slrt.models import TFCTCBeamSearchDecoder

tf_ctc_decoder = TFCTCBeamSearchDecoder(
    tokenizer=TokenizerDict["Recognition"][tokenizer_name](
        vocab_file=vocab_file, pad_token="<pad>", unk_token="<unk>",
        sos_token="<si>", eos_token="</s>"
    ),
    beam_width=10,
    top_paths=1
)
model.recognition_decoder = tf_ctc_decoder

In [12]:
trainer.validate(model=model, dataloaders=dev_dataloader)

The following callbacks returned in `LightningModule.configure_callbacks` will override existing callbacks passed to Trainer: ModelCheckpoint
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 1 processes
----------------------------------------------------------------------------------------------------

You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Validation: 0it [00:00, ?it/s]

2025-01-03 14:23:39.970413: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


2025-01-03 14:25:32 Running SCLITE...
2025-01-03 14:25:33 SCLITE completed successfully. Outputs saved to /new_home/xzj23/workspace/SLRT/experiments/Phoenix2014_Experiment/MSKA3/0/hypothesis/dev/epoch_0/recognition/sclite_results
2025-01-03 14:25:33 Epoch 0, DEV_WER: 28.399999618530273%




[{'Val/Loss_epoch': 93.92906188964844,
  'Val/Word-Error-Rate': 28.399999618530273}]

In [13]:
trainer.test(model=model, dataloaders=test_dataloader)

The following callbacks returned in `LightningModule.configure_callbacks` will override existing callbacks passed to Trainer: ModelCheckpoint
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 1 processes
----------------------------------------------------------------------------------------------------

You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

2025-01-03 14:25:39.743937: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


2025-01-03 14:27:50 Running SCLITE...
2025-01-03 14:27:51 SCLITE completed successfully. Outputs saved to /new_home/xzj23/workspace/SLRT/experiments/Phoenix2014_Experiment/MSKA3/0/hypothesis/test/test_best_model_after_epoch_0/recognition/sclite_results
2025-01-03 14:27:51 Test best model after epoch -1, TEST_WER: 28.299999237060547%




[{'Test/Loss_epoch': 92.25061798095703,
  'Test/Word-Error-Rate': 28.299999237060547}]